diff options
Diffstat (limited to 'third_party/rust/glslopt/glsl-optimizer/src/util')
59 files changed, 17750 insertions, 0 deletions
diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/bitscan.h b/third_party/rust/glslopt/glsl-optimizer/src/util/bitscan.h new file mode 100644 index 0000000000..895a1e7a37 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/bitscan.h @@ -0,0 +1,326 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef BITSCAN_H +#define BITSCAN_H + +#include <assert.h> +#include <stdint.h> +#include <stdbool.h> +#include <string.h> + +#if defined(_MSC_VER) +#include <intrin.h> +#endif + +#if defined(__POPCNT__) +#include <popcntintrin.h> +#endif + +#include "c99_compat.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Find first bit set in word. Least significant bit is 1. + * Return 0 if no bits set. + */ +#ifdef HAVE___BUILTIN_FFS +#define ffs __builtin_ffs +#elif defined(_MSC_VER) && (_M_IX86 || _M_ARM || _M_AMD64 || _M_IA64) +static inline +int ffs(int i) +{ + unsigned long index; + if (_BitScanForward(&index, i)) + return index + 1; + else + return 0; +} +#else +extern +int ffs(int i); +#endif + +#ifdef HAVE___BUILTIN_FFSLL +#define ffsll __builtin_ffsll +#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM64 || _M_IA64) +static inline int +ffsll(long long int i) +{ + unsigned long index; + if (_BitScanForward64(&index, i)) + return index + 1; + else + return 0; +} +#else +extern int +ffsll(long long int val); +#endif + + +/* Destructively loop over all of the bits in a mask as in: + * + * while (mymask) { + * int i = u_bit_scan(&mymask); + * ... process element i + * } + * + */ +static inline int +u_bit_scan(unsigned *mask) +{ + const int i = ffs(*mask) - 1; + *mask ^= (1u << i); + return i; +} + +static inline int +u_bit_scan64(uint64_t *mask) +{ + const int i = ffsll(*mask) - 1; + *mask ^= (((uint64_t)1) << i); + return i; +} + +/* Determine if an unsigned value is a power of two. + * + * \note + * Zero is treated as a power of two. + */ +static inline bool +util_is_power_of_two_or_zero(unsigned v) +{ + return (v & (v - 1)) == 0; +} + +/* Determine if an uint64_t value is a power of two. + * + * \note + * Zero is treated as a power of two. + */ +static inline bool +util_is_power_of_two_or_zero64(uint64_t v) +{ + return (v & (v - 1)) == 0; +} + +/* Determine if an unsigned value is a power of two. + * + * \note + * Zero is \b not treated as a power of two. + */ +static inline bool +util_is_power_of_two_nonzero(unsigned v) +{ + /* __POPCNT__ is different from HAVE___BUILTIN_POPCOUNT. The latter + * indicates the existence of the __builtin_popcount function. The former + * indicates that _mm_popcnt_u32 exists and is a native instruction. + * + * The other alternative is to use SSE 4.2 compile-time flags. This has + * two drawbacks. First, there is currently no build infrastructure for + * SSE 4.2 (only 4.1), so that would have to be added. Second, some AMD + * CPUs support POPCNT but not SSE 4.2 (e.g., Barcelona). + */ +#ifdef __POPCNT__ + return _mm_popcnt_u32(v) == 1; +#else + return v != 0 && (v & (v - 1)) == 0; +#endif +} + +/* For looping over a bitmask when you want to loop over consecutive bits + * manually, for example: + * + * while (mask) { + * int start, count, i; + * + * u_bit_scan_consecutive_range(&mask, &start, &count); + * + * for (i = 0; i < count; i++) + * ... process element (start+i) + * } + */ +static inline void +u_bit_scan_consecutive_range(unsigned *mask, int *start, int *count) +{ + if (*mask == 0xffffffff) { + *start = 0; + *count = 32; + *mask = 0; + return; + } + *start = ffs(*mask) - 1; + *count = ffs(~(*mask >> *start)) - 1; + *mask &= ~(((1u << *count) - 1) << *start); +} + +static inline void +u_bit_scan_consecutive_range64(uint64_t *mask, int *start, int *count) +{ + if (*mask == ~0ull) { + *start = 0; + *count = 64; + *mask = 0; + return; + } + *start = ffsll(*mask) - 1; + *count = ffsll(~(*mask >> *start)) - 1; + *mask &= ~(((((uint64_t)1) << *count) - 1) << *start); +} + + +/** + * Find last bit set in a word. The least significant bit is 1. + * Return 0 if no bits are set. + * Essentially ffs() in the reverse direction. + */ +static inline unsigned +util_last_bit(unsigned u) +{ +#if defined(HAVE___BUILTIN_CLZ) + return u == 0 ? 0 : 32 - __builtin_clz(u); +#elif defined(_MSC_VER) && (_M_IX86 || _M_ARM || _M_AMD64 || _M_IA64) + unsigned long index; + if (_BitScanReverse(&index, u)) + return index + 1; + else + return 0; +#else + unsigned r = 0; + while (u) { + r++; + u >>= 1; + } + return r; +#endif +} + +/** + * Find last bit set in a word. The least significant bit is 1. + * Return 0 if no bits are set. + * Essentially ffsll() in the reverse direction. + */ +static inline unsigned +util_last_bit64(uint64_t u) +{ +#if defined(HAVE___BUILTIN_CLZLL) + return u == 0 ? 0 : 64 - __builtin_clzll(u); +#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM64 || _M_IA64) + unsigned long index; + if (_BitScanReverse64(&index, u)) + return index + 1; + else + return 0; +#else + unsigned r = 0; + while (u) { + r++; + u >>= 1; + } + return r; +#endif +} + +/** + * Find last bit in a word that does not match the sign bit. The least + * significant bit is 1. + * Return 0 if no bits are set. + */ +static inline unsigned +util_last_bit_signed(int i) +{ + if (i >= 0) + return util_last_bit(i); + else + return util_last_bit(~(unsigned)i); +} + +/* Returns a bitfield in which the first count bits starting at start are + * set. + */ +static inline unsigned +u_bit_consecutive(unsigned start, unsigned count) +{ + assert(start + count <= 32); + if (count == 32) + return ~0; + return ((1u << count) - 1) << start; +} + +static inline uint64_t +u_bit_consecutive64(unsigned start, unsigned count) +{ + assert(start + count <= 64); + if (count == 64) + return ~(uint64_t)0; + return (((uint64_t)1 << count) - 1) << start; +} + +/** + * Return number of bits set in n. + */ +static inline unsigned +util_bitcount(unsigned n) +{ +#if defined(HAVE___BUILTIN_POPCOUNT) + return __builtin_popcount(n); +#else + /* K&R classic bitcount. + * + * For each iteration, clear the LSB from the bitfield. + * Requires only one iteration per set bit, instead of + * one iteration per bit less than highest set bit. + */ + unsigned bits; + for (bits = 0; n; bits++) { + n &= n - 1; + } + return bits; +#endif +} + +static inline unsigned +util_bitcount64(uint64_t n) +{ +#ifdef HAVE___BUILTIN_POPCOUNTLL + return __builtin_popcountll(n); +#else + return util_bitcount(n) + util_bitcount(n >> 32); +#endif +} + +#ifdef __cplusplus +} +#endif + +#endif /* BITSCAN_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/bitset.h b/third_party/rust/glslopt/glsl-optimizer/src/util/bitset.h new file mode 100644 index 0000000000..0fdfe205f3 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/bitset.h @@ -0,0 +1,261 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2006 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file bitset.h + * \brief Bitset of arbitrary size definitions. + * \author Michal Krol + */ + +#ifndef BITSET_H +#define BITSET_H + +#include "util/bitscan.h" +#include "util/macros.h" + +/**************************************************************************** + * generic bitset implementation + */ + +#define BITSET_WORD unsigned int +#define BITSET_WORDBITS (sizeof (BITSET_WORD) * 8) + +/* bitset declarations + */ +#define BITSET_WORDS(bits) (((bits) + BITSET_WORDBITS - 1) / BITSET_WORDBITS) +#define BITSET_DECLARE(name, bits) BITSET_WORD name[BITSET_WORDS(bits)] + +/* bitset operations + */ +#define BITSET_COPY(x, y) memcpy( (x), (y), sizeof (x) ) +#define BITSET_EQUAL(x, y) (memcmp( (x), (y), sizeof (x) ) == 0) +#define BITSET_ZERO(x) memset( (x), 0, sizeof (x) ) +#define BITSET_ONES(x) memset( (x), 0xff, sizeof (x) ) + +#define BITSET_BITWORD(b) ((b) / BITSET_WORDBITS) +#define BITSET_BIT(b) (1u << ((b) % BITSET_WORDBITS)) + +/* single bit operations + */ +#define BITSET_TEST(x, b) (((x)[BITSET_BITWORD(b)] & BITSET_BIT(b)) != 0) +#define BITSET_SET(x, b) ((x)[BITSET_BITWORD(b)] |= BITSET_BIT(b)) +#define BITSET_CLEAR(x, b) ((x)[BITSET_BITWORD(b)] &= ~BITSET_BIT(b)) + +#define BITSET_MASK(b) (((b) % BITSET_WORDBITS == 0) ? ~0 : BITSET_BIT(b) - 1) +#define BITSET_RANGE(b, e) ((BITSET_MASK((e) + 1)) & ~(BITSET_BIT(b) - 1)) + +/* bit range operations + */ +#define BITSET_TEST_RANGE(x, b, e) \ + (BITSET_BITWORD(b) == BITSET_BITWORD(e) ? \ + (((x)[BITSET_BITWORD(b)] & BITSET_RANGE(b, e)) != 0) : \ + (assert (!"BITSET_TEST_RANGE: bit range crosses word boundary"), 0)) +#define BITSET_SET_RANGE(x, b, e) \ + (BITSET_BITWORD(b) == BITSET_BITWORD(e) ? \ + ((x)[BITSET_BITWORD(b)] |= BITSET_RANGE(b, e)) : \ + (assert (!"BITSET_SET_RANGE: bit range crosses word boundary"), 0)) +#define BITSET_CLEAR_RANGE(x, b, e) \ + (BITSET_BITWORD(b) == BITSET_BITWORD(e) ? \ + ((x)[BITSET_BITWORD(b)] &= ~BITSET_RANGE(b, e)) : \ + (assert (!"BITSET_CLEAR_RANGE: bit range crosses word boundary"), 0)) + +/* Get first bit set in a bitset. + */ +static inline int +__bitset_ffs(const BITSET_WORD *x, int n) +{ + int i; + + for (i = 0; i < n; i++) { + if (x[i]) + return ffs(x[i]) + BITSET_WORDBITS * i; + } + + return 0; +} + +#define BITSET_FFS(x) __bitset_ffs(x, ARRAY_SIZE(x)) + +static inline unsigned +__bitset_next_set(unsigned i, BITSET_WORD *tmp, + const BITSET_WORD *set, unsigned size) +{ + unsigned bit, word; + + /* NOTE: The initial conditions for this function are very specific. At + * the start of the loop, the tmp variable must be set to *set and the + * initial i value set to 0. This way, if there is a bit set in the first + * word, we ignore the i-value and just grab that bit (so 0 is ok, even + * though 0 may be returned). If the first word is 0, then the value of + * `word` will be 0 and we will go on to look at the second word. + */ + word = BITSET_BITWORD(i); + while (*tmp == 0) { + word++; + + if (word >= BITSET_WORDS(size)) + return size; + + *tmp = set[word]; + } + + /* Find the next set bit in the non-zero word */ + bit = ffs(*tmp) - 1; + + /* Unset the bit */ + *tmp &= ~(1ull << bit); + + return word * BITSET_WORDBITS + bit; +} + +/** + * Iterates over each set bit in a set + * + * @param __i iteration variable, bit number + * @param __set the bitset to iterate (will not be modified) + * @param __size number of bits in the set to consider + */ +#define BITSET_FOREACH_SET(__i, __set, __size) \ + for (BITSET_WORD __tmp = *(__set), *__foo = &__tmp; __foo != NULL; __foo = NULL) \ + for (__i = 0; \ + (__i = __bitset_next_set(__i, &__tmp, __set, __size)) < __size;) + +#ifdef __cplusplus + +/** + * Simple C++ wrapper of a bitset type of static size, with value semantics + * and basic bitwise arithmetic operators. The operators defined below are + * expected to have the same semantics as the same operator applied to other + * fundamental integer types. T is the name of the struct to instantiate + * it as, and N is the number of bits in the bitset. + */ +#define DECLARE_BITSET_T(T, N) struct T { \ + EXPLICIT_CONVERSION \ + operator bool() const \ + { \ + for (unsigned i = 0; i < BITSET_WORDS(N); i++) \ + if (words[i]) \ + return true; \ + return false; \ + } \ + \ + T & \ + operator=(int x) \ + { \ + const T c = {{ (BITSET_WORD)x }}; \ + return *this = c; \ + } \ + \ + friend bool \ + operator==(const T &b, const T &c) \ + { \ + return BITSET_EQUAL(b.words, c.words); \ + } \ + \ + friend bool \ + operator!=(const T &b, const T &c) \ + { \ + return !(b == c); \ + } \ + \ + friend bool \ + operator==(const T &b, int x) \ + { \ + const T c = {{ (BITSET_WORD)x }}; \ + return b == c; \ + } \ + \ + friend bool \ + operator!=(const T &b, int x) \ + { \ + return !(b == x); \ + } \ + \ + friend T \ + operator~(const T &b) \ + { \ + T c; \ + for (unsigned i = 0; i < BITSET_WORDS(N); i++) \ + c.words[i] = ~b.words[i]; \ + return c; \ + } \ + \ + T & \ + operator|=(const T &b) \ + { \ + for (unsigned i = 0; i < BITSET_WORDS(N); i++) \ + words[i] |= b.words[i]; \ + return *this; \ + } \ + \ + friend T \ + operator|(const T &b, const T &c) \ + { \ + T d = b; \ + d |= c; \ + return d; \ + } \ + \ + T & \ + operator&=(const T &b) \ + { \ + for (unsigned i = 0; i < BITSET_WORDS(N); i++) \ + words[i] &= b.words[i]; \ + return *this; \ + } \ + \ + friend T \ + operator&(const T &b, const T &c) \ + { \ + T d = b; \ + d &= c; \ + return d; \ + } \ + \ + bool \ + test(unsigned i) const \ + { \ + return BITSET_TEST(words, i); \ + } \ + \ + T & \ + set(unsigned i) \ + { \ + BITSET_SET(words, i); \ + return *this; \ + } \ + \ + T & \ + clear(unsigned i) \ + { \ + BITSET_CLEAR(words, i); \ + return *this; \ + } \ + \ + BITSET_WORD words[BITSET_WORDS(N)]; \ + } + +#endif + +#endif diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/blob.c b/third_party/rust/glslopt/glsl-optimizer/src/util/blob.c new file mode 100644 index 0000000000..db192146ac --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/blob.c @@ -0,0 +1,368 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <string.h> + +#include "blob.h" +#include "u_math.h" + +#ifdef HAVE_VALGRIND +#include <valgrind.h> +#include <memcheck.h> +#define VG(x) x +#else +#define VG(x) +#endif + +#define BLOB_INITIAL_SIZE 4096 + +/* Ensure that \blob will be able to fit an additional object of size + * \additional. The growing (if any) will occur by doubling the existing + * allocation. + */ +static bool +grow_to_fit(struct blob *blob, size_t additional) +{ + size_t to_allocate; + uint8_t *new_data; + + if (blob->out_of_memory) + return false; + + if (blob->size + additional <= blob->allocated) + return true; + + if (blob->fixed_allocation) { + blob->out_of_memory = true; + return false; + } + + if (blob->allocated == 0) + to_allocate = BLOB_INITIAL_SIZE; + else + to_allocate = blob->allocated * 2; + + to_allocate = MAX2(to_allocate, blob->allocated + additional); + + new_data = realloc(blob->data, to_allocate); + if (new_data == NULL) { + blob->out_of_memory = true; + return false; + } + + blob->data = new_data; + blob->allocated = to_allocate; + + return true; +} + +/* Align the blob->size so that reading or writing a value at (blob->data + + * blob->size) will result in an access aligned to a granularity of \alignment + * bytes. + * + * \return True unless allocation fails + */ +static bool +align_blob(struct blob *blob, size_t alignment) +{ + const size_t new_size = align64(blob->size, alignment); + + if (blob->size < new_size) { + if (!grow_to_fit(blob, new_size - blob->size)) + return false; + + if (blob->data) + memset(blob->data + blob->size, 0, new_size - blob->size); + blob->size = new_size; + } + + return true; +} + +static void +align_blob_reader(struct blob_reader *blob, size_t alignment) +{ + blob->current = blob->data + align64(blob->current - blob->data, alignment); +} + +void +blob_init(struct blob *blob) +{ + blob->data = NULL; + blob->allocated = 0; + blob->size = 0; + blob->fixed_allocation = false; + blob->out_of_memory = false; +} + +void +blob_init_fixed(struct blob *blob, void *data, size_t size) +{ + blob->data = data; + blob->allocated = size; + blob->size = 0; + blob->fixed_allocation = true; + blob->out_of_memory = false; +} + +void +blob_finish_get_buffer(struct blob *blob, void **buffer, size_t *size) +{ + *buffer = blob->data; + *size = blob->size; + blob->data = NULL; + + /* Trim the buffer. */ + *buffer = realloc(*buffer, *size); +} + +bool +blob_overwrite_bytes(struct blob *blob, + size_t offset, + const void *bytes, + size_t to_write) +{ + /* Detect an attempt to overwrite data out of bounds. */ + if (offset + to_write < offset || blob->size < offset + to_write) + return false; + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(bytes, to_write)); + + if (blob->data) + memcpy(blob->data + offset, bytes, to_write); + + return true; +} + +bool +blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write) +{ + if (! grow_to_fit(blob, to_write)) + return false; + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(bytes, to_write)); + + if (blob->data && to_write > 0) + memcpy(blob->data + blob->size, bytes, to_write); + blob->size += to_write; + + return true; +} + +intptr_t +blob_reserve_bytes(struct blob *blob, size_t to_write) +{ + intptr_t ret; + + if (! grow_to_fit (blob, to_write)) + return -1; + + ret = blob->size; + blob->size += to_write; + + return ret; +} + +intptr_t +blob_reserve_uint32(struct blob *blob) +{ + align_blob(blob, sizeof(uint32_t)); + return blob_reserve_bytes(blob, sizeof(uint32_t)); +} + +intptr_t +blob_reserve_intptr(struct blob *blob) +{ + align_blob(blob, sizeof(intptr_t)); + return blob_reserve_bytes(blob, sizeof(intptr_t)); +} + +#define BLOB_WRITE_TYPE(name, type) \ +bool \ +name(struct blob *blob, type value) \ +{ \ + align_blob(blob, sizeof(value)); \ + return blob_write_bytes(blob, &value, sizeof(value)); \ +} + +BLOB_WRITE_TYPE(blob_write_uint8, uint8_t) +BLOB_WRITE_TYPE(blob_write_uint16, uint16_t) +BLOB_WRITE_TYPE(blob_write_uint32, uint32_t) +BLOB_WRITE_TYPE(blob_write_uint64, uint64_t) +BLOB_WRITE_TYPE(blob_write_intptr, intptr_t) + +#define ASSERT_ALIGNED(_offset, _align) \ + assert(align64((_offset), (_align)) == (_offset)) + +bool +blob_overwrite_uint8 (struct blob *blob, + size_t offset, + uint8_t value) +{ + ASSERT_ALIGNED(offset, sizeof(value)); + return blob_overwrite_bytes(blob, offset, &value, sizeof(value)); +} + +bool +blob_overwrite_uint32 (struct blob *blob, + size_t offset, + uint32_t value) +{ + ASSERT_ALIGNED(offset, sizeof(value)); + return blob_overwrite_bytes(blob, offset, &value, sizeof(value)); +} + +bool +blob_overwrite_intptr (struct blob *blob, + size_t offset, + intptr_t value) +{ + ASSERT_ALIGNED(offset, sizeof(value)); + return blob_overwrite_bytes(blob, offset, &value, sizeof(value)); +} + +bool +blob_write_string(struct blob *blob, const char *str) +{ + return blob_write_bytes(blob, str, strlen(str) + 1); +} + +void +blob_reader_init(struct blob_reader *blob, const void *data, size_t size) +{ + blob->data = data; + blob->end = blob->data + size; + blob->current = data; + blob->overrun = false; +} + +/* Check that an object of size \size can be read from this blob. + * + * If not, set blob->overrun to indicate that we attempted to read too far. + */ +static bool +ensure_can_read(struct blob_reader *blob, size_t size) +{ + if (blob->overrun) + return false; + + if (blob->current <= blob->end && blob->end - blob->current >= size) + return true; + + blob->overrun = true; + + return false; +} + +const void * +blob_read_bytes(struct blob_reader *blob, size_t size) +{ + const void *ret; + + if (! ensure_can_read (blob, size)) + return NULL; + + ret = blob->current; + + blob->current += size; + + return ret; +} + +void +blob_copy_bytes(struct blob_reader *blob, void *dest, size_t size) +{ + const void *bytes; + + bytes = blob_read_bytes(blob, size); + if (bytes == NULL || size == 0) + return; + + memcpy(dest, bytes, size); +} + +void +blob_skip_bytes(struct blob_reader *blob, size_t size) +{ + if (ensure_can_read (blob, size)) + blob->current += size; +} + +/* These next three read functions have identical form. If we add any beyond + * these first three we should probably switch to generating these with a + * preprocessor macro. +*/ + +#define BLOB_READ_TYPE(name, type) \ +type \ +name(struct blob_reader *blob) \ +{ \ + type ret; \ + int size = sizeof(ret); \ + align_blob_reader(blob, size); \ + if (! ensure_can_read(blob, size)) \ + return 0; \ + ret = *((type*) blob->current); \ + blob->current += size; \ + return ret; \ +} + +BLOB_READ_TYPE(blob_read_uint8, uint8_t) +BLOB_READ_TYPE(blob_read_uint16, uint16_t) +BLOB_READ_TYPE(blob_read_uint32, uint32_t) +BLOB_READ_TYPE(blob_read_uint64, uint64_t) +BLOB_READ_TYPE(blob_read_intptr, intptr_t) + +char * +blob_read_string(struct blob_reader *blob) +{ + int size; + char *ret; + uint8_t *nul; + + /* If we're already at the end, then this is an overrun. */ + if (blob->current >= blob->end) { + blob->overrun = true; + return NULL; + } + + /* Similarly, if there is no zero byte in the data remaining in this blob, + * we also consider that an overrun. + */ + nul = memchr(blob->current, 0, blob->end - blob->current); + + if (nul == NULL) { + blob->overrun = true; + return NULL; + } + + size = nul - blob->current + 1; + + assert(ensure_can_read(blob, size)); + + ret = (char *) blob->current; + + blob->current += size; + + return ret; +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/blob.h b/third_party/rust/glslopt/glsl-optimizer/src/util/blob.h new file mode 100644 index 0000000000..e1e156eb43 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/blob.h @@ -0,0 +1,418 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BLOB_H +#define BLOB_H + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* The blob functions implement a simple, low-level API for serializing and + * deserializing. + * + * All objects written to a blob will be serialized directly, (without any + * additional meta-data to describe the data written). Therefore, it is the + * caller's responsibility to ensure that any data can be read later, (either + * by knowing exactly what data is expected, or by writing to the blob + * sufficient meta-data to describe what has been written). + * + * A blob is efficient in that it dynamically grows by doubling in size, so + * allocation costs are logarithmic. + */ + +struct blob { + /* The data actually written to the blob. */ + uint8_t *data; + + /** Number of bytes that have been allocated for \c data. */ + size_t allocated; + + /** The number of bytes that have actual data written to them. */ + size_t size; + + /** True if \c data a fixed allocation that we cannot resize + * + * \see blob_init_fixed + */ + bool fixed_allocation; + + /** + * True if we've ever failed to realloc or if we go pas the end of a fixed + * allocation blob. + */ + bool out_of_memory; +}; + +/* When done reading, the caller can ensure that everything was consumed by + * checking the following: + * + * 1. blob->current should be equal to blob->end, (if not, too little was + * read). + * + * 2. blob->overrun should be false, (otherwise, too much was read). + */ +struct blob_reader { + const uint8_t *data; + const uint8_t *end; + const uint8_t *current; + bool overrun; +}; + +/** + * Init a new, empty blob. + */ +void +blob_init(struct blob *blob); + +/** + * Init a new, fixed-size blob. + * + * A fixed-size blob has a fixed block of data that will not be freed on + * blob_finish and will never be grown. If we hit the end, we simply start + * returning false from the write functions. + * + * If a fixed-size blob has a NULL data pointer then the data is written but + * it otherwise operates normally. This can be used to determine the size + * that will be required to write a given data structure. + */ +void +blob_init_fixed(struct blob *blob, void *data, size_t size); + +/** + * Finish a blob and free its memory. + * + * If \blob was initialized with blob_init_fixed, the data pointer is + * considered to be owned by the user and will not be freed. + */ +static inline void +blob_finish(struct blob *blob) +{ + if (!blob->fixed_allocation) + free(blob->data); +} + +void +blob_finish_get_buffer(struct blob *blob, void **buffer, size_t *size); + +/** + * Add some unstructured, fixed-size data to a blob. + * + * \return True unless allocation failed. + */ +bool +blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write); + +/** + * Reserve space in \blob for a number of bytes. + * + * Space will be allocated within the blob for these byes, but the bytes will + * be left uninitialized. The caller is expected to use \sa + * blob_overwrite_bytes to write to these bytes. + * + * \return An offset to space allocated within \blob to which \to_write bytes + * can be written, (or -1 in case of any allocation error). + */ +intptr_t +blob_reserve_bytes(struct blob *blob, size_t to_write); + +/** + * Similar to \sa blob_reserve_bytes, but only reserves an uint32_t worth of + * space. Note that this must be used if later reading with \sa + * blob_read_uint32, since it aligns the offset correctly. + */ +intptr_t +blob_reserve_uint32(struct blob *blob); + +/** + * Similar to \sa blob_reserve_bytes, but only reserves an intptr_t worth of + * space. Note that this must be used if later reading with \sa + * blob_read_intptr, since it aligns the offset correctly. + */ +intptr_t +blob_reserve_intptr(struct blob *blob); + +/** + * Overwrite some data previously written to the blob. + * + * Writes data to an existing portion of the blob at an offset of \offset. + * This data range must have previously been written to the blob by one of the + * blob_write_* calls. + * + * For example usage, see blob_overwrite_uint32 + * + * \return True unless the requested offset or offset+to_write lie outside + * the current blob's size. + */ +bool +blob_overwrite_bytes(struct blob *blob, + size_t offset, + const void *bytes, + size_t to_write); + +/** + * Add a uint8_t to a blob. + * + * \return True unless allocation failed. + */ +bool +blob_write_uint8(struct blob *blob, uint8_t value); + +/** + * Overwrite a uint8_t previously written to the blob. + * + * Writes a uint8_t value to an existing portion of the blob at an offset of + * \offset. This data range must have previously been written to the blob by + * one of the blob_write_* calls. + * + * \return True unless the requested position or position+to_write lie outside + * the current blob's size. + */ +bool +blob_overwrite_uint8(struct blob *blob, + size_t offset, + uint8_t value); + +/** + * Add a uint16_t to a blob. + * + * \note This function will only write to a uint16_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_uint16(struct blob *blob, uint16_t value); + +/** + * Add a uint32_t to a blob. + * + * \note This function will only write to a uint32_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_uint32(struct blob *blob, uint32_t value); + +/** + * Overwrite a uint32_t previously written to the blob. + * + * Writes a uint32_t value to an existing portion of the blob at an offset of + * \offset. This data range must have previously been written to the blob by + * one of the blob_write_* calls. + * + * + * The expected usage is something like the following pattern: + * + * size_t offset; + * + * offset = blob_reserve_uint32(blob); + * ... various blob write calls, writing N items ... + * blob_overwrite_uint32 (blob, offset, N); + * + * \return True unless the requested position or position+to_write lie outside + * the current blob's size. + */ +bool +blob_overwrite_uint32(struct blob *blob, + size_t offset, + uint32_t value); + +/** + * Add a uint64_t to a blob. + * + * \note This function will only write to a uint64_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_uint64(struct blob *blob, uint64_t value); + +/** + * Add an intptr_t to a blob. + * + * \note This function will only write to an intptr_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_intptr(struct blob *blob, intptr_t value); + +/** + * Overwrite an intptr_t previously written to the blob. + * + * Writes a intptr_t value to an existing portion of the blob at an offset of + * \offset. This data range must have previously been written to the blob by + * one of the blob_write_* calls. + * + * For example usage, see blob_overwrite_uint32 + * + * \return True unless the requested position or position+to_write lie outside + * the current blob's size. + */ +bool +blob_overwrite_intptr(struct blob *blob, + size_t offset, + intptr_t value); + +/** + * Add a NULL-terminated string to a blob, (including the NULL terminator). + * + * \return True unless allocation failed. + */ +bool +blob_write_string(struct blob *blob, const char *str); + +/** + * Start reading a blob, (initializing the contents of \blob for reading). + * + * After this call, the caller can use the various blob_read_* functions to + * read elements from the data array. + * + * For all of the blob_read_* functions, if there is insufficient data + * remaining, the functions will do nothing, (perhaps returning default values + * such as 0). The caller can detect this by noting that the blob_reader's + * current value is unchanged before and after the call. + */ +void +blob_reader_init(struct blob_reader *blob, const void *data, size_t size); + +/** + * Read some unstructured, fixed-size data from the current location, (and + * update the current location to just past this data). + * + * \note The memory returned belongs to the data underlying the blob reader. The + * caller must copy the data in order to use it after the lifetime of the data + * underlying the blob reader. + * + * \return The bytes read (see note above about memory lifetime). + */ +const void * +blob_read_bytes(struct blob_reader *blob, size_t size); + +/** + * Read some unstructured, fixed-size data from the current location, copying + * it to \dest (and update the current location to just past this data) + */ +void +blob_copy_bytes(struct blob_reader *blob, void *dest, size_t size); + +/** + * Skip \size bytes within the blob. + */ +void +blob_skip_bytes(struct blob_reader *blob, size_t size); + +/** + * Read a uint8_t from the current location, (and update the current location + * to just past this uint8_t). + * + * \return The uint8_t read + */ +uint8_t +blob_read_uint8(struct blob_reader *blob); + +/** + * Read a uint16_t from the current location, (and update the current location + * to just past this uint16_t). + * + * \note This function will only read from a uint16_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The uint16_t read + */ +uint16_t +blob_read_uint16(struct blob_reader *blob); + +/** + * Read a uint32_t from the current location, (and update the current location + * to just past this uint32_t). + * + * \note This function will only read from a uint32_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The uint32_t read + */ +uint32_t +blob_read_uint32(struct blob_reader *blob); + +/** + * Read a uint64_t from the current location, (and update the current location + * to just past this uint64_t). + * + * \note This function will only read from a uint64_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The uint64_t read + */ +uint64_t +blob_read_uint64(struct blob_reader *blob); + +/** + * Read an intptr_t value from the current location, (and update the + * current location to just past this intptr_t). + * + * \note This function will only read from an intptr_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The intptr_t read + */ +intptr_t +blob_read_intptr(struct blob_reader *blob); + +/** + * Read a NULL-terminated string from the current location, (and update the + * current location to just past this string). + * + * \note The memory returned belongs to the data underlying the blob reader. The + * caller must copy the string in order to use the string after the lifetime + * of the data underlying the blob reader. + * + * \return The string read (see note above about memory lifetime). However, if + * there is no NULL byte remaining within the blob, this function returns + * NULL. + */ +char * +blob_read_string(struct blob_reader *blob); + +#ifdef __cplusplus +} +#endif + +#endif /* BLOB_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/compiler.h b/third_party/rust/glslopt/glsl-optimizer/src/util/compiler.h new file mode 100644 index 0000000000..43a06b4313 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/compiler.h @@ -0,0 +1,76 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + + +/** + * \file compiler.h + * Compiler-related stuff. + */ + + +#ifndef COMPILER_H +#define COMPILER_H + + +#include <assert.h> + +#include "util/macros.h" + +#include "c99_compat.h" /* inline, __func__, etc. */ + + +/** + * Either define MESA_BIG_ENDIAN or MESA_LITTLE_ENDIAN, and CPU_TO_LE32. + * Do not use these unless absolutely necessary! + * Try to use a runtime test instead. + * For now, only used by some DRI hardware drivers for color/texel packing. + */ +#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN +#if defined(__linux__) +#include <byteswap.h> +#define CPU_TO_LE32( x ) bswap_32( x ) +#elif defined(__APPLE__) +#include <CoreFoundation/CFByteOrder.h> +#define CPU_TO_LE32( x ) CFSwapInt32HostToLittle( x ) +#elif defined(__OpenBSD__) +#include <sys/types.h> +#define CPU_TO_LE32( x ) htole32( x ) +#else /*__linux__ */ +#include <sys/endian.h> +#define CPU_TO_LE32( x ) bswap32( x ) +#endif /*__linux__*/ +#define MESA_BIG_ENDIAN 1 +#else +#define CPU_TO_LE32( x ) ( x ) +#define MESA_LITTLE_ENDIAN 1 +#endif +#define LE32_TO_CPU( x ) CPU_TO_LE32( x ) + + + +#define IEEE_ONE 0x3f800000 + + +#endif /* COMPILER_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/crc32.c b/third_party/rust/glslopt/glsl-optimizer/src/util/crc32.c new file mode 100644 index 0000000000..425046ab5f --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/crc32.c @@ -0,0 +1,134 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * CRC32 implementation. + * + * @author Jose Fonseca + */ + + +#ifdef HAVE_ZLIB +#include <zlib.h> +#endif +#include "crc32.h" + + +static const uint32_t +util_crc32_table[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; + + +/** + * @sa http://www.w3.org/TR/PNG/#D-CRCAppendix + */ +uint32_t +util_hash_crc32(const void *data, size_t size) +{ + const uint8_t *p = data; + uint32_t crc = 0xffffffff; + +#ifdef HAVE_ZLIB + /* Prefer zlib's implementation for better performance. + * zlib's uInt is always "unsigned int" while size_t can be 64bit. + * Since 1.2.9 there's crc32_z that takes size_t, but use the more + * available function to avoid build system complications. + */ + if ((uInt)size == size) + return ~crc32(0, data, size); +#endif + + while (size--) + crc = util_crc32_table[(crc ^ *p++) & 0xff] ^ (crc >> 8); + + return crc; +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/crc32.h b/third_party/rust/glslopt/glsl-optimizer/src/util/crc32.h new file mode 100644 index 0000000000..b6a21f4170 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/crc32.h @@ -0,0 +1,55 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * CRC32 function. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#ifndef CRC32_H_ +#define CRC32_H_ + +#include <stdlib.h> +#include <stdint.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +uint32_t +util_hash_crc32(const void *data, size_t size); + + +#ifdef __cplusplus +} +#endif + +#endif /* CRC32_H_ */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/debug.c b/third_party/rust/glslopt/glsl-optimizer/src/util/debug.c new file mode 100644 index 0000000000..89ae613107 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/debug.c @@ -0,0 +1,114 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <errno.h> +#include <string.h> +#include "debug.h" +#include "u_string.h" + +uint64_t +parse_debug_string(const char *debug, + const struct debug_control *control) +{ + uint64_t flag = 0; + + if (debug != NULL) { + for (; control->string != NULL; control++) { + if (!strcmp(debug, "all")) { + flag |= control->flag; + + } else { + const char *s = debug; + unsigned n; + + for (; n = strcspn(s, ", "), *s; s += MAX2(1, n)) { + if (strlen(control->string) == n && + !strncmp(control->string, s, n)) + flag |= control->flag; + } + } + } + } + + return flag; +} + +bool +comma_separated_list_contains(const char *list, const char *s) +{ + assert(list); + const size_t len = strlen(s); + + for (unsigned n; n = strcspn(list, ","), *list; list += MAX2(1, n)) { + if (n == len && !strncmp(list, s, n)) + return true; + } + + return false; +} + +/** + * Reads an environment variable and interprets its value as a boolean. + * + * Recognizes 0/false/no and 1/true/yes. Other values result in the default value. + */ +bool +env_var_as_boolean(const char *var_name, bool default_value) +{ + const char *str = getenv(var_name); + if (str == NULL) + return default_value; + + if (strcmp(str, "1") == 0 || + strcasecmp(str, "true") == 0 || + strcasecmp(str, "y") == 0 || + strcasecmp(str, "yes") == 0) { + return true; + } else if (strcmp(str, "0") == 0 || + strcasecmp(str, "false") == 0 || + strcasecmp(str, "n") == 0 || + strcasecmp(str, "no") == 0) { + return false; + } else { + return default_value; + } +} + +/** + * Reads an environment variable and interprets its value as a unsigned. + */ +unsigned +env_var_as_unsigned(const char *var_name, unsigned default_value) +{ + char *str = getenv(var_name); + if (str) { + char *end; + unsigned long result; + + errno = 0; + result = strtoul(str, &end, 0); + if (errno == 0 && end != str && *end == '\0') + return result; + } + return default_value; +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/debug.h b/third_party/rust/glslopt/glsl-optimizer/src/util/debug.h new file mode 100644 index 0000000000..bbcc197554 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/debug.h @@ -0,0 +1,53 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _UTIL_DEBUG_H +#define _UTIL_DEBUG_H + +#include <stdint.h> +#include <stdbool.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct debug_control { + const char * string; + uint64_t flag; +}; + +uint64_t +parse_debug_string(const char *debug, + const struct debug_control *control); +bool +comma_separated_list_contains(const char *list, const char *s); +bool +env_var_as_boolean(const char *var_name, bool default_value); +unsigned +env_var_as_unsigned(const char *var_name, unsigned default_value); + +#ifdef __cplusplus +} /* extern C */ +#endif + +#endif /* _UTIL_DEBUG_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/detect_os.h b/third_party/rust/glslopt/glsl-optimizer/src/util/detect_os.h new file mode 100644 index 0000000000..6506948e03 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/detect_os.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright 2008 VMware, Inc. */ + +/** + * Auto-detect the operating system family. + * + * See also: + * - http://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html + * - echo | gcc -dM -E - | sort + * - http://msdn.microsoft.com/en-us/library/b0084kay.aspx + * + * @author José Fonseca <jfonseca@vmware.com> + */ + +#ifndef DETECT_OS_H +#define DETECT_OS_H + +#if defined(__linux__) +#define DETECT_OS_LINUX 1 +#define DETECT_OS_UNIX 1 +#endif + +/* + * Android defines __linux__, so DETECT_OS_LINUX and DETECT_OS_UNIX will + * also be defined. + */ +#if defined(ANDROID) +#define DETECT_OS_ANDROID 1 +#endif + +#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +#define DETECT_OS_FREEBSD 1 +#define DETECT_OS_BSD 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(__OpenBSD__) +#define DETECT_OS_OPENBSD 1 +#define DETECT_OS_BSD 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(__NetBSD__) +#define DETECT_OS_NETBSD 1 +#define DETECT_OS_BSD 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(__DragonFly__) +#define DETECT_OS_DRAGONFLY 1 +#define DETECT_OS_BSD 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(__GNU__) +#define DETECT_OS_HURD 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(__sun) +#define DETECT_OS_SOLARIS 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(__APPLE__) +#define DETECT_OS_APPLE 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(_WIN32) || defined(WIN32) +#define DETECT_OS_WINDOWS 1 +#endif + +#if defined(__HAIKU__) +#define DETECT_OS_HAIKU 1 +#define DETECT_OS_UNIX 1 +#endif + +#if defined(__CYGWIN__) +#define DETECT_OS_CYGWIN 1 +#define DETECT_OS_UNIX 1 +#endif + + +/* + * Make sure DETECT_OS_* are always defined, so that they can be used with #if + */ +#ifndef DETECT_OS_ANDROID +#define DETECT_OS_ANDROID 0 +#endif +#ifndef DETECT_OS_APPLE +#define DETECT_OS_APPLE 0 +#endif +#ifndef DETECT_OS_BSD +#define DETECT_OS_BSD 0 +#endif +#ifndef DETECT_OS_CYGWIN +#define DETECT_OS_CYGWIN 0 +#endif +#ifndef DETECT_OS_DRAGONFLY +#define DETECT_OS_DRAGONFLY 0 +#endif +#ifndef DETECT_OS_FREEBSD +#define DETECT_OS_FREEBSD 0 +#endif +#ifndef DETECT_OS_HAIKU +#define DETECT_OS_HAIKU 0 +#endif +#ifndef DETECT_OS_HURD +#define DETECT_OS_HURD 0 +#endif +#ifndef DETECT_OS_LINUX +#define DETECT_OS_LINUX 0 +#endif +#ifndef DETECT_OS_NETBSD +#define DETECT_OS_NETBSD 0 +#endif +#ifndef DETECT_OS_OPENBSD +#define DETECT_OS_OPENBSD 0 +#endif +#ifndef DETECT_OS_SOLARIS +#define DETECT_OS_SOLARIS 0 +#endif +#ifndef DETECT_OS_UNIX +#define DETECT_OS_UNIX 0 +#endif +#ifndef DETECT_OS_WINDOWS +#define DETECT_OS_WINDOWS 0 +#endif + +#endif /* DETECT_OS_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/disk_cache.c b/third_party/rust/glslopt/glsl-optimizer/src/util/disk_cache.c new file mode 100644 index 0000000000..a92d621927 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/disk_cache.c @@ -0,0 +1,1344 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifdef ENABLE_SHADER_CACHE + +#include <ctype.h> +#include <ftw.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/file.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <unistd.h> +#include <fcntl.h> +#include <pwd.h> +#include <errno.h> +#include <dirent.h> +#include <inttypes.h> +#include "zlib.h" + +#ifdef HAVE_ZSTD +#include "zstd.h" +#endif + +#include "util/crc32.h" +#include "util/debug.h" +#include "util/rand_xor.h" +#include "util/u_atomic.h" +#include "util/u_queue.h" +#include "util/mesa-sha1.h" +#include "util/ralloc.h" +#include "util/compiler.h" + +#include "disk_cache.h" + +/* Number of bits to mask off from a cache key to get an index. */ +#define CACHE_INDEX_KEY_BITS 16 + +/* Mask for computing an index from a key. */ +#define CACHE_INDEX_KEY_MASK ((1 << CACHE_INDEX_KEY_BITS) - 1) + +/* The number of keys that can be stored in the index. */ +#define CACHE_INDEX_MAX_KEYS (1 << CACHE_INDEX_KEY_BITS) + +/* The cache version should be bumped whenever a change is made to the + * structure of cache entries or the index. This will give any 3rd party + * applications reading the cache entries a chance to adjust to the changes. + * + * - The cache version is checked internally when reading a cache entry. If we + * ever have a mismatch we are in big trouble as this means we had a cache + * collision. In case of such an event please check the skys for giant + * asteroids and that the entire Mesa team hasn't been eaten by wolves. + * + * - There is no strict requirement that cache versions be backwards + * compatible but effort should be taken to limit disruption where possible. + */ +#define CACHE_VERSION 1 + +/* 3 is the recomended level, with 22 as the absolute maximum */ +#define ZSTD_COMPRESSION_LEVEL 3 + +struct disk_cache { + /* The path to the cache directory. */ + char *path; + bool path_init_failed; + + /* Thread queue for compressing and writing cache entries to disk */ + struct util_queue cache_queue; + + /* Seed for rand, which is used to pick a random directory */ + uint64_t seed_xorshift128plus[2]; + + /* A pointer to the mmapped index file within the cache directory. */ + uint8_t *index_mmap; + size_t index_mmap_size; + + /* Pointer to total size of all objects in cache (within index_mmap) */ + uint64_t *size; + + /* Pointer to stored keys, (within index_mmap). */ + uint8_t *stored_keys; + + /* Maximum size of all cached objects (in bytes). */ + uint64_t max_size; + + /* Driver cache keys. */ + uint8_t *driver_keys_blob; + size_t driver_keys_blob_size; + + disk_cache_put_cb blob_put_cb; + disk_cache_get_cb blob_get_cb; +}; + +struct disk_cache_put_job { + struct util_queue_fence fence; + + struct disk_cache *cache; + + cache_key key; + + /* Copy of cache data to be compressed and written. */ + void *data; + + /* Size of data to be compressed and written. */ + size_t size; + + struct cache_item_metadata cache_item_metadata; +}; + +/* Create a directory named 'path' if it does not already exist. + * + * Returns: 0 if path already exists as a directory or if created. + * -1 in all other cases. + */ +static int +mkdir_if_needed(const char *path) +{ + struct stat sb; + + /* If the path exists already, then our work is done if it's a + * directory, but it's an error if it is not. + */ + if (stat(path, &sb) == 0) { + if (S_ISDIR(sb.st_mode)) { + return 0; + } else { + fprintf(stderr, "Cannot use %s for shader cache (not a directory)" + "---disabling.\n", path); + return -1; + } + } + + int ret = mkdir(path, 0755); + if (ret == 0 || (ret == -1 && errno == EEXIST)) + return 0; + + fprintf(stderr, "Failed to create %s for shader cache (%s)---disabling.\n", + path, strerror(errno)); + + return -1; +} + +/* Concatenate an existing path and a new name to form a new path. If the new + * path does not exist as a directory, create it then return the resulting + * name of the new path (ralloc'ed off of 'ctx'). + * + * Returns NULL on any error, such as: + * + * <path> does not exist or is not a directory + * <path>/<name> exists but is not a directory + * <path>/<name> cannot be created as a directory + */ +static char * +concatenate_and_mkdir(void *ctx, const char *path, const char *name) +{ + char *new_path; + struct stat sb; + + if (stat(path, &sb) != 0 || ! S_ISDIR(sb.st_mode)) + return NULL; + + new_path = ralloc_asprintf(ctx, "%s/%s", path, name); + + if (mkdir_if_needed(new_path) == 0) + return new_path; + else + return NULL; +} + +#define DRV_KEY_CPY(_dst, _src, _src_size) \ +do { \ + memcpy(_dst, _src, _src_size); \ + _dst += _src_size; \ +} while (0); + +struct disk_cache * +disk_cache_create(const char *gpu_name, const char *driver_id, + uint64_t driver_flags) +{ + void *local; + struct disk_cache *cache = NULL; + char *path, *max_size_str; + uint64_t max_size; + int fd = -1; + struct stat sb; + size_t size; + + uint8_t cache_version = CACHE_VERSION; + size_t cv_size = sizeof(cache_version); + + /* If running as a users other than the real user disable cache */ + if (geteuid() != getuid()) + return NULL; + + /* A ralloc context for transient data during this invocation. */ + local = ralloc_context(NULL); + if (local == NULL) + goto fail; + + /* At user request, disable shader cache entirely. */ + if (env_var_as_boolean("MESA_GLSL_CACHE_DISABLE", false)) + goto fail; + + cache = rzalloc(NULL, struct disk_cache); + if (cache == NULL) + goto fail; + + /* Assume failure. */ + cache->path_init_failed = true; + + /* Determine path for cache based on the first defined name as follows: + * + * $MESA_GLSL_CACHE_DIR + * $XDG_CACHE_HOME/mesa_shader_cache + * <pwd.pw_dir>/.cache/mesa_shader_cache + */ + path = getenv("MESA_GLSL_CACHE_DIR"); + if (path) { + if (mkdir_if_needed(path) == -1) + goto path_fail; + + path = concatenate_and_mkdir(local, path, CACHE_DIR_NAME); + if (path == NULL) + goto path_fail; + } + + if (path == NULL) { + char *xdg_cache_home = getenv("XDG_CACHE_HOME"); + + if (xdg_cache_home) { + if (mkdir_if_needed(xdg_cache_home) == -1) + goto path_fail; + + path = concatenate_and_mkdir(local, xdg_cache_home, CACHE_DIR_NAME); + if (path == NULL) + goto path_fail; + } + } + + if (path == NULL) { + char *buf; + size_t buf_size; + struct passwd pwd, *result; + + buf_size = sysconf(_SC_GETPW_R_SIZE_MAX); + if (buf_size == -1) + buf_size = 512; + + /* Loop until buf_size is large enough to query the directory */ + while (1) { + buf = ralloc_size(local, buf_size); + + getpwuid_r(getuid(), &pwd, buf, buf_size, &result); + if (result) + break; + + if (errno == ERANGE) { + ralloc_free(buf); + buf = NULL; + buf_size *= 2; + } else { + goto path_fail; + } + } + + path = concatenate_and_mkdir(local, pwd.pw_dir, ".cache"); + if (path == NULL) + goto path_fail; + + path = concatenate_and_mkdir(local, path, CACHE_DIR_NAME); + if (path == NULL) + goto path_fail; + } + + cache->path = ralloc_strdup(cache, path); + if (cache->path == NULL) + goto path_fail; + + path = ralloc_asprintf(local, "%s/index", cache->path); + if (path == NULL) + goto path_fail; + + fd = open(path, O_RDWR | O_CREAT | O_CLOEXEC, 0644); + if (fd == -1) + goto path_fail; + + if (fstat(fd, &sb) == -1) + goto path_fail; + + /* Force the index file to be the expected size. */ + size = sizeof(*cache->size) + CACHE_INDEX_MAX_KEYS * CACHE_KEY_SIZE; + if (sb.st_size != size) { + if (ftruncate(fd, size) == -1) + goto path_fail; + } + + /* We map this shared so that other processes see updates that we + * make. + * + * Note: We do use atomic addition to ensure that multiple + * processes don't scramble the cache size recorded in the + * index. But we don't use any locking to prevent multiple + * processes from updating the same entry simultaneously. The idea + * is that if either result lands entirely in the index, then + * that's equivalent to a well-ordered write followed by an + * eviction and a write. On the other hand, if the simultaneous + * writes result in a corrupt entry, that's not really any + * different than both entries being evicted, (since within the + * guarantees of the cryptographic hash, a corrupt entry is + * unlikely to ever match a real cache key). + */ + cache->index_mmap = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + if (cache->index_mmap == MAP_FAILED) + goto path_fail; + cache->index_mmap_size = size; + + cache->size = (uint64_t *) cache->index_mmap; + cache->stored_keys = cache->index_mmap + sizeof(uint64_t); + + max_size = 0; + + max_size_str = getenv("MESA_GLSL_CACHE_MAX_SIZE"); + if (max_size_str) { + char *end; + max_size = strtoul(max_size_str, &end, 10); + if (end == max_size_str) { + max_size = 0; + } else { + switch (*end) { + case 'K': + case 'k': + max_size *= 1024; + break; + case 'M': + case 'm': + max_size *= 1024*1024; + break; + case '\0': + case 'G': + case 'g': + default: + max_size *= 1024*1024*1024; + break; + } + } + } + + /* Default to 1GB for maximum cache size. */ + if (max_size == 0) { + max_size = 1024*1024*1024; + } + + cache->max_size = max_size; + + /* 4 threads were chosen below because just about all modern CPUs currently + * available that run Mesa have *at least* 4 cores. For these CPUs allowing + * more threads can result in the queue being processed faster, thus + * avoiding excessive memory use due to a backlog of cache entrys building + * up in the queue. Since we set the UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY + * flag this should have little negative impact on low core systems. + * + * The queue will resize automatically when it's full, so adding new jobs + * doesn't stall. + */ + util_queue_init(&cache->cache_queue, "disk$", 32, 4, + UTIL_QUEUE_INIT_RESIZE_IF_FULL | + UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY | + UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY); + + cache->path_init_failed = false; + + path_fail: + + if (fd != -1) + close(fd); + + cache->driver_keys_blob_size = cv_size; + + /* Create driver id keys */ + size_t id_size = strlen(driver_id) + 1; + size_t gpu_name_size = strlen(gpu_name) + 1; + cache->driver_keys_blob_size += id_size; + cache->driver_keys_blob_size += gpu_name_size; + + /* We sometimes store entire structs that contains a pointers in the cache, + * use pointer size as a key to avoid hard to debug issues. + */ + uint8_t ptr_size = sizeof(void *); + size_t ptr_size_size = sizeof(ptr_size); + cache->driver_keys_blob_size += ptr_size_size; + + size_t driver_flags_size = sizeof(driver_flags); + cache->driver_keys_blob_size += driver_flags_size; + + cache->driver_keys_blob = + ralloc_size(cache, cache->driver_keys_blob_size); + if (!cache->driver_keys_blob) + goto fail; + + uint8_t *drv_key_blob = cache->driver_keys_blob; + DRV_KEY_CPY(drv_key_blob, &cache_version, cv_size) + DRV_KEY_CPY(drv_key_blob, driver_id, id_size) + DRV_KEY_CPY(drv_key_blob, gpu_name, gpu_name_size) + DRV_KEY_CPY(drv_key_blob, &ptr_size, ptr_size_size) + DRV_KEY_CPY(drv_key_blob, &driver_flags, driver_flags_size) + + /* Seed our rand function */ + s_rand_xorshift128plus(cache->seed_xorshift128plus, true); + + ralloc_free(local); + + return cache; + + fail: + if (cache) + ralloc_free(cache); + ralloc_free(local); + + return NULL; +} + +void +disk_cache_destroy(struct disk_cache *cache) +{ + if (cache && !cache->path_init_failed) { + util_queue_finish(&cache->cache_queue); + util_queue_destroy(&cache->cache_queue); + munmap(cache->index_mmap, cache->index_mmap_size); + } + + ralloc_free(cache); +} + +void +disk_cache_wait_for_idle(struct disk_cache *cache) +{ + util_queue_finish(&cache->cache_queue); +} + +/* Return a filename within the cache's directory corresponding to 'key'. The + * returned filename is ralloced with 'cache' as the parent context. + * + * Returns NULL if out of memory. + */ +static char * +get_cache_file(struct disk_cache *cache, const cache_key key) +{ + char buf[41]; + char *filename; + + if (cache->path_init_failed) + return NULL; + + _mesa_sha1_format(buf, key); + if (asprintf(&filename, "%s/%c%c/%s", cache->path, buf[0], + buf[1], buf + 2) == -1) + return NULL; + + return filename; +} + +/* Create the directory that will be needed for the cache file for \key. + * + * Obviously, the implementation here must closely match + * _get_cache_file above. +*/ +static void +make_cache_file_directory(struct disk_cache *cache, const cache_key key) +{ + char *dir; + char buf[41]; + + _mesa_sha1_format(buf, key); + if (asprintf(&dir, "%s/%c%c", cache->path, buf[0], buf[1]) == -1) + return; + + mkdir_if_needed(dir); + free(dir); +} + +/* Given a directory path and predicate function, find the entry with + * the oldest access time in that directory for which the predicate + * returns true. + * + * Returns: A malloc'ed string for the path to the chosen file, (or + * NULL on any error). The caller should free the string when + * finished. + */ +static char * +choose_lru_file_matching(const char *dir_path, + bool (*predicate)(const char *dir_path, + const struct stat *, + const char *, const size_t)) +{ + DIR *dir; + struct dirent *entry; + char *filename; + char *lru_name = NULL; + time_t lru_atime = 0; + + dir = opendir(dir_path); + if (dir == NULL) + return NULL; + + while (1) { + entry = readdir(dir); + if (entry == NULL) + break; + + struct stat sb; + if (fstatat(dirfd(dir), entry->d_name, &sb, 0) == 0) { + if (!lru_atime || (sb.st_atime < lru_atime)) { + size_t len = strlen(entry->d_name); + + if (!predicate(dir_path, &sb, entry->d_name, len)) + continue; + + char *tmp = realloc(lru_name, len + 1); + if (tmp) { + lru_name = tmp; + memcpy(lru_name, entry->d_name, len + 1); + lru_atime = sb.st_atime; + } + } + } + } + + if (lru_name == NULL) { + closedir(dir); + return NULL; + } + + if (asprintf(&filename, "%s/%s", dir_path, lru_name) < 0) + filename = NULL; + + free(lru_name); + closedir(dir); + + return filename; +} + +/* Is entry a regular file, and not having a name with a trailing + * ".tmp" + */ +static bool +is_regular_non_tmp_file(const char *path, const struct stat *sb, + const char *d_name, const size_t len) +{ + if (!S_ISREG(sb->st_mode)) + return false; + + if (len >= 4 && strcmp(&d_name[len-4], ".tmp") == 0) + return false; + + return true; +} + +/* Returns the size of the deleted file, (or 0 on any error). */ +static size_t +unlink_lru_file_from_directory(const char *path) +{ + struct stat sb; + char *filename; + + filename = choose_lru_file_matching(path, is_regular_non_tmp_file); + if (filename == NULL) + return 0; + + if (stat(filename, &sb) == -1) { + free (filename); + return 0; + } + + unlink(filename); + free (filename); + + return sb.st_blocks * 512; +} + +/* Is entry a directory with a two-character name, (and not the + * special name of ".."). We also return false if the dir is empty. + */ +static bool +is_two_character_sub_directory(const char *path, const struct stat *sb, + const char *d_name, const size_t len) +{ + if (!S_ISDIR(sb->st_mode)) + return false; + + if (len != 2) + return false; + + if (strcmp(d_name, "..") == 0) + return false; + + char *subdir; + if (asprintf(&subdir, "%s/%s", path, d_name) == -1) + return false; + DIR *dir = opendir(subdir); + free(subdir); + + if (dir == NULL) + return false; + + unsigned subdir_entries = 0; + struct dirent *d; + while ((d = readdir(dir)) != NULL) { + if(++subdir_entries > 2) + break; + } + closedir(dir); + + /* If dir only contains '.' and '..' it must be empty */ + if (subdir_entries <= 2) + return false; + + return true; +} + +static void +evict_lru_item(struct disk_cache *cache) +{ + char *dir_path; + + /* With a reasonably-sized, full cache, (and with keys generated + * from a cryptographic hash), we can choose two random hex digits + * and reasonably expect the directory to exist with a file in it. + * Provides pseudo-LRU eviction to reduce checking all cache files. + */ + uint64_t rand64 = rand_xorshift128plus(cache->seed_xorshift128plus); + if (asprintf(&dir_path, "%s/%02" PRIx64 , cache->path, rand64 & 0xff) < 0) + return; + + size_t size = unlink_lru_file_from_directory(dir_path); + + free(dir_path); + + if (size) { + p_atomic_add(cache->size, - (uint64_t)size); + return; + } + + /* In the case where the random choice of directory didn't find + * something, we choose the least recently accessed from the + * existing directories. + * + * Really, the only reason this code exists is to allow the unit + * tests to work, (which use an artificially-small cache to be able + * to force a single cached item to be evicted). + */ + dir_path = choose_lru_file_matching(cache->path, + is_two_character_sub_directory); + if (dir_path == NULL) + return; + + size = unlink_lru_file_from_directory(dir_path); + + free(dir_path); + + if (size) + p_atomic_add(cache->size, - (uint64_t)size); +} + +void +disk_cache_remove(struct disk_cache *cache, const cache_key key) +{ + struct stat sb; + + char *filename = get_cache_file(cache, key); + if (filename == NULL) { + return; + } + + if (stat(filename, &sb) == -1) { + free(filename); + return; + } + + unlink(filename); + free(filename); + + if (sb.st_blocks) + p_atomic_add(cache->size, - (uint64_t)sb.st_blocks * 512); +} + +static ssize_t +read_all(int fd, void *buf, size_t count) +{ + char *in = buf; + ssize_t read_ret; + size_t done; + + for (done = 0; done < count; done += read_ret) { + read_ret = read(fd, in + done, count - done); + if (read_ret == -1 || read_ret == 0) + return -1; + } + return done; +} + +static ssize_t +write_all(int fd, const void *buf, size_t count) +{ + const char *out = buf; + ssize_t written; + size_t done; + + for (done = 0; done < count; done += written) { + written = write(fd, out + done, count - done); + if (written == -1) + return -1; + } + return done; +} + +/* From the zlib docs: + * "If the memory is available, buffers sizes on the order of 128K or 256K + * bytes should be used." + */ +#define BUFSIZE 256 * 1024 + +/** + * Compresses cache entry in memory and writes it to disk. Returns the size + * of the data written to disk. + */ +static size_t +deflate_and_write_to_disk(const void *in_data, size_t in_data_size, int dest, + const char *filename) +{ +#ifdef HAVE_ZSTD + /* from the zstd docs (https://facebook.github.io/zstd/zstd_manual.html): + * compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + */ + size_t out_size = ZSTD_compressBound(in_data_size); + void * out = malloc(out_size); + + size_t ret = ZSTD_compress(out, out_size, in_data, in_data_size, + ZSTD_COMPRESSION_LEVEL); + if (ZSTD_isError(ret)) { + free(out); + return 0; + } + ssize_t written = write_all(dest, out, ret); + if (written == -1) { + free(out); + return 0; + } + free(out); + return ret; +#else + unsigned char *out; + + /* allocate deflate state */ + z_stream strm; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.next_in = (uint8_t *) in_data; + strm.avail_in = in_data_size; + + int ret = deflateInit(&strm, Z_BEST_COMPRESSION); + if (ret != Z_OK) + return 0; + + /* compress until end of in_data */ + size_t compressed_size = 0; + int flush; + + out = malloc(BUFSIZE * sizeof(unsigned char)); + if (out == NULL) + return 0; + + do { + int remaining = in_data_size - BUFSIZE; + flush = remaining > 0 ? Z_NO_FLUSH : Z_FINISH; + in_data_size -= BUFSIZE; + + /* Run deflate() on input until the output buffer is not full (which + * means there is no more data to deflate). + */ + do { + strm.avail_out = BUFSIZE; + strm.next_out = out; + + ret = deflate(&strm, flush); /* no bad return value */ + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + + size_t have = BUFSIZE - strm.avail_out; + compressed_size += have; + + ssize_t written = write_all(dest, out, have); + if (written == -1) { + (void)deflateEnd(&strm); + free(out); + return 0; + } + } while (strm.avail_out == 0); + + /* all input should be used */ + assert(strm.avail_in == 0); + + } while (flush != Z_FINISH); + + /* stream should be complete */ + assert(ret == Z_STREAM_END); + + /* clean up and return */ + (void)deflateEnd(&strm); + free(out); + return compressed_size; +# endif +} + +static struct disk_cache_put_job * +create_put_job(struct disk_cache *cache, const cache_key key, + const void *data, size_t size, + struct cache_item_metadata *cache_item_metadata) +{ + struct disk_cache_put_job *dc_job = (struct disk_cache_put_job *) + malloc(sizeof(struct disk_cache_put_job) + size); + + if (dc_job) { + dc_job->cache = cache; + memcpy(dc_job->key, key, sizeof(cache_key)); + dc_job->data = dc_job + 1; + memcpy(dc_job->data, data, size); + dc_job->size = size; + + /* Copy the cache item metadata */ + if (cache_item_metadata) { + dc_job->cache_item_metadata.type = cache_item_metadata->type; + if (cache_item_metadata->type == CACHE_ITEM_TYPE_GLSL) { + dc_job->cache_item_metadata.num_keys = + cache_item_metadata->num_keys; + dc_job->cache_item_metadata.keys = (cache_key *) + malloc(cache_item_metadata->num_keys * sizeof(cache_key)); + + if (!dc_job->cache_item_metadata.keys) + goto fail; + + memcpy(dc_job->cache_item_metadata.keys, + cache_item_metadata->keys, + sizeof(cache_key) * cache_item_metadata->num_keys); + } + } else { + dc_job->cache_item_metadata.type = CACHE_ITEM_TYPE_UNKNOWN; + dc_job->cache_item_metadata.keys = NULL; + } + } + + return dc_job; + +fail: + free(dc_job); + + return NULL; +} + +static void +destroy_put_job(void *job, int thread_index) +{ + if (job) { + struct disk_cache_put_job *dc_job = (struct disk_cache_put_job *) job; + free(dc_job->cache_item_metadata.keys); + + free(job); + } +} + +struct cache_entry_file_data { + uint32_t crc32; + uint32_t uncompressed_size; +}; + +static void +cache_put(void *job, int thread_index) +{ + assert(job); + + int fd = -1, fd_final = -1, err, ret; + unsigned i = 0; + char *filename = NULL, *filename_tmp = NULL; + struct disk_cache_put_job *dc_job = (struct disk_cache_put_job *) job; + + filename = get_cache_file(dc_job->cache, dc_job->key); + if (filename == NULL) + goto done; + + /* If the cache is too large, evict something else first. */ + while (*dc_job->cache->size + dc_job->size > dc_job->cache->max_size && + i < 8) { + evict_lru_item(dc_job->cache); + i++; + } + + /* Write to a temporary file to allow for an atomic rename to the + * final destination filename, (to prevent any readers from seeing + * a partially written file). + */ + if (asprintf(&filename_tmp, "%s.tmp", filename) == -1) + goto done; + + fd = open(filename_tmp, O_WRONLY | O_CLOEXEC | O_CREAT, 0644); + + /* Make the two-character subdirectory within the cache as needed. */ + if (fd == -1) { + if (errno != ENOENT) + goto done; + + make_cache_file_directory(dc_job->cache, dc_job->key); + + fd = open(filename_tmp, O_WRONLY | O_CLOEXEC | O_CREAT, 0644); + if (fd == -1) + goto done; + } + + /* With the temporary file open, we take an exclusive flock on + * it. If the flock fails, then another process still has the file + * open with the flock held. So just let that file be responsible + * for writing the file. + */ +#ifdef HAVE_FLOCK + err = flock(fd, LOCK_EX | LOCK_NB); +#else + struct flock lock = { + .l_start = 0, + .l_len = 0, /* entire file */ + .l_type = F_WRLCK, + .l_whence = SEEK_SET + }; + err = fcntl(fd, F_SETLK, &lock); +#endif + if (err == -1) + goto done; + + /* Now that we have the lock on the open temporary file, we can + * check to see if the destination file already exists. If so, + * another process won the race between when we saw that the file + * didn't exist and now. In this case, we don't do anything more, + * (to ensure the size accounting of the cache doesn't get off). + */ + fd_final = open(filename, O_RDONLY | O_CLOEXEC); + if (fd_final != -1) { + unlink(filename_tmp); + goto done; + } + + /* OK, we're now on the hook to write out a file that we know is + * not in the cache, and is also not being written out to the cache + * by some other process. + */ + + /* Write the driver_keys_blob, this can be used find information about the + * mesa version that produced the entry or deal with hash collisions, + * should that ever become a real problem. + */ + ret = write_all(fd, dc_job->cache->driver_keys_blob, + dc_job->cache->driver_keys_blob_size); + if (ret == -1) { + unlink(filename_tmp); + goto done; + } + + /* Write the cache item metadata. This data can be used to deal with + * hash collisions, as well as providing useful information to 3rd party + * tools reading the cache files. + */ + ret = write_all(fd, &dc_job->cache_item_metadata.type, + sizeof(uint32_t)); + if (ret == -1) { + unlink(filename_tmp); + goto done; + } + + if (dc_job->cache_item_metadata.type == CACHE_ITEM_TYPE_GLSL) { + ret = write_all(fd, &dc_job->cache_item_metadata.num_keys, + sizeof(uint32_t)); + if (ret == -1) { + unlink(filename_tmp); + goto done; + } + + ret = write_all(fd, dc_job->cache_item_metadata.keys[0], + dc_job->cache_item_metadata.num_keys * + sizeof(cache_key)); + if (ret == -1) { + unlink(filename_tmp); + goto done; + } + } + + /* Create CRC of the data. We will read this when restoring the cache and + * use it to check for corruption. + */ + struct cache_entry_file_data cf_data; + cf_data.crc32 = util_hash_crc32(dc_job->data, dc_job->size); + cf_data.uncompressed_size = dc_job->size; + + size_t cf_data_size = sizeof(cf_data); + ret = write_all(fd, &cf_data, cf_data_size); + if (ret == -1) { + unlink(filename_tmp); + goto done; + } + + /* Now, finally, write out the contents to the temporary file, then + * rename them atomically to the destination filename, and also + * perform an atomic increment of the total cache size. + */ + size_t file_size = deflate_and_write_to_disk(dc_job->data, dc_job->size, + fd, filename_tmp); + if (file_size == 0) { + unlink(filename_tmp); + goto done; + } + ret = rename(filename_tmp, filename); + if (ret == -1) { + unlink(filename_tmp); + goto done; + } + + struct stat sb; + if (stat(filename, &sb) == -1) { + /* Something went wrong remove the file */ + unlink(filename); + goto done; + } + + p_atomic_add(dc_job->cache->size, sb.st_blocks * 512); + + done: + if (fd_final != -1) + close(fd_final); + /* This close finally releases the flock, (now that the final file + * has been renamed into place and the size has been added). + */ + if (fd != -1) + close(fd); + free(filename_tmp); + free(filename); +} + +void +disk_cache_put(struct disk_cache *cache, const cache_key key, + const void *data, size_t size, + struct cache_item_metadata *cache_item_metadata) +{ + if (cache->blob_put_cb) { + cache->blob_put_cb(key, CACHE_KEY_SIZE, data, size); + return; + } + + if (cache->path_init_failed) + return; + + struct disk_cache_put_job *dc_job = + create_put_job(cache, key, data, size, cache_item_metadata); + + if (dc_job) { + util_queue_fence_init(&dc_job->fence); + util_queue_add_job(&cache->cache_queue, dc_job, &dc_job->fence, + cache_put, destroy_put_job, dc_job->size); + } +} + +/** + * Decompresses cache entry, returns true if successful. + */ +static bool +inflate_cache_data(uint8_t *in_data, size_t in_data_size, + uint8_t *out_data, size_t out_data_size) +{ +#ifdef HAVE_ZSTD + size_t ret = ZSTD_decompress(out_data, out_data_size, in_data, in_data_size); + return !ZSTD_isError(ret); +#else + z_stream strm; + + /* allocate inflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.next_in = in_data; + strm.avail_in = in_data_size; + strm.next_out = out_data; + strm.avail_out = out_data_size; + + int ret = inflateInit(&strm); + if (ret != Z_OK) + return false; + + ret = inflate(&strm, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + + /* Unless there was an error we should have decompressed everything in one + * go as we know the uncompressed file size. + */ + if (ret != Z_STREAM_END) { + (void)inflateEnd(&strm); + return false; + } + assert(strm.avail_out == 0); + + /* clean up and return */ + (void)inflateEnd(&strm); + return true; +#endif +} + +void * +disk_cache_get(struct disk_cache *cache, const cache_key key, size_t *size) +{ + int fd = -1, ret; + struct stat sb; + char *filename = NULL; + uint8_t *data = NULL; + uint8_t *uncompressed_data = NULL; + uint8_t *file_header = NULL; + + if (size) + *size = 0; + + if (cache->blob_get_cb) { + /* This is what Android EGL defines as the maxValueSize in egl_cache_t + * class implementation. + */ + const signed long max_blob_size = 64 * 1024; + void *blob = malloc(max_blob_size); + if (!blob) + return NULL; + + signed long bytes = + cache->blob_get_cb(key, CACHE_KEY_SIZE, blob, max_blob_size); + + if (!bytes) { + free(blob); + return NULL; + } + + if (size) + *size = bytes; + return blob; + } + + filename = get_cache_file(cache, key); + if (filename == NULL) + goto fail; + + fd = open(filename, O_RDONLY | O_CLOEXEC); + if (fd == -1) + goto fail; + + if (fstat(fd, &sb) == -1) + goto fail; + + data = malloc(sb.st_size); + if (data == NULL) + goto fail; + + size_t ck_size = cache->driver_keys_blob_size; + file_header = malloc(ck_size); + if (!file_header) + goto fail; + + if (sb.st_size < ck_size) + goto fail; + + ret = read_all(fd, file_header, ck_size); + if (ret == -1) + goto fail; + + /* Check for extremely unlikely hash collisions */ + if (memcmp(cache->driver_keys_blob, file_header, ck_size) != 0) { + assert(!"Mesa cache keys mismatch!"); + goto fail; + } + + size_t cache_item_md_size = sizeof(uint32_t); + uint32_t md_type; + ret = read_all(fd, &md_type, cache_item_md_size); + if (ret == -1) + goto fail; + + if (md_type == CACHE_ITEM_TYPE_GLSL) { + uint32_t num_keys; + cache_item_md_size += sizeof(uint32_t); + ret = read_all(fd, &num_keys, sizeof(uint32_t)); + if (ret == -1) + goto fail; + + /* The cache item metadata is currently just used for distributing + * precompiled shaders, they are not used by Mesa so just skip them for + * now. + * TODO: pass the metadata back to the caller and do some basic + * validation. + */ + cache_item_md_size += num_keys * sizeof(cache_key); + ret = lseek(fd, num_keys * sizeof(cache_key), SEEK_CUR); + if (ret == -1) + goto fail; + } + + /* Load the CRC that was created when the file was written. */ + struct cache_entry_file_data cf_data; + size_t cf_data_size = sizeof(cf_data); + ret = read_all(fd, &cf_data, cf_data_size); + if (ret == -1) + goto fail; + + /* Load the actual cache data. */ + size_t cache_data_size = + sb.st_size - cf_data_size - ck_size - cache_item_md_size; + ret = read_all(fd, data, cache_data_size); + if (ret == -1) + goto fail; + + /* Uncompress the cache data */ + uncompressed_data = malloc(cf_data.uncompressed_size); + if (!inflate_cache_data(data, cache_data_size, uncompressed_data, + cf_data.uncompressed_size)) + goto fail; + + /* Check the data for corruption */ + if (cf_data.crc32 != util_hash_crc32(uncompressed_data, + cf_data.uncompressed_size)) + goto fail; + + free(data); + free(filename); + free(file_header); + close(fd); + + if (size) + *size = cf_data.uncompressed_size; + + return uncompressed_data; + + fail: + if (data) + free(data); + if (uncompressed_data) + free(uncompressed_data); + if (filename) + free(filename); + if (file_header) + free(file_header); + if (fd != -1) + close(fd); + + return NULL; +} + +void +disk_cache_put_key(struct disk_cache *cache, const cache_key key) +{ + const uint32_t *key_chunk = (const uint32_t *) key; + int i = CPU_TO_LE32(*key_chunk) & CACHE_INDEX_KEY_MASK; + unsigned char *entry; + + if (cache->blob_put_cb) { + cache->blob_put_cb(key, CACHE_KEY_SIZE, key_chunk, sizeof(uint32_t)); + return; + } + + if (cache->path_init_failed) + return; + + entry = &cache->stored_keys[i * CACHE_KEY_SIZE]; + + memcpy(entry, key, CACHE_KEY_SIZE); +} + +/* This function lets us test whether a given key was previously + * stored in the cache with disk_cache_put_key(). The implement is + * efficient by not using syscalls or hitting the disk. It's not + * race-free, but the races are benign. If we race with someone else + * calling disk_cache_put_key, then that's just an extra cache miss and an + * extra recompile. + */ +bool +disk_cache_has_key(struct disk_cache *cache, const cache_key key) +{ + const uint32_t *key_chunk = (const uint32_t *) key; + int i = CPU_TO_LE32(*key_chunk) & CACHE_INDEX_KEY_MASK; + unsigned char *entry; + + if (cache->blob_get_cb) { + uint32_t blob; + return cache->blob_get_cb(key, CACHE_KEY_SIZE, &blob, sizeof(uint32_t)); + } + + if (cache->path_init_failed) + return false; + + entry = &cache->stored_keys[i * CACHE_KEY_SIZE]; + + return memcmp(entry, key, CACHE_KEY_SIZE) == 0; +} + +void +disk_cache_compute_key(struct disk_cache *cache, const void *data, size_t size, + cache_key key) +{ + struct mesa_sha1 ctx; + + _mesa_sha1_init(&ctx); + _mesa_sha1_update(&ctx, cache->driver_keys_blob, + cache->driver_keys_blob_size); + _mesa_sha1_update(&ctx, data, size); + _mesa_sha1_final(&ctx, key); +} + +void +disk_cache_set_callbacks(struct disk_cache *cache, disk_cache_put_cb put, + disk_cache_get_cb get) +{ + cache->blob_put_cb = put; + cache->blob_get_cb = get; +} + +#endif /* ENABLE_SHADER_CACHE */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/disk_cache.h b/third_party/rust/glslopt/glsl-optimizer/src/util/disk_cache.h new file mode 100644 index 0000000000..09b316e6e8 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/disk_cache.h @@ -0,0 +1,321 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef DISK_CACHE_H +#define DISK_CACHE_H + +#ifdef HAVE_DLFCN_H +#include <dlfcn.h> +#include <stdio.h> +#include "util/build_id.h" +#endif +#include <assert.h> +#include <stdint.h> +#include <stdbool.h> +#include <sys/stat.h> +#include "util/mesa-sha1.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Size of cache keys in bytes. */ +#define CACHE_KEY_SIZE 20 + +#define CACHE_DIR_NAME "mesa_shader_cache" + +typedef uint8_t cache_key[CACHE_KEY_SIZE]; + +/* WARNING: 3rd party applications might be reading the cache item metadata. + * Do not change these values without making the change widely known. + * Please contact Valve developers and make them aware of this change. + */ +#define CACHE_ITEM_TYPE_UNKNOWN 0x0 +#define CACHE_ITEM_TYPE_GLSL 0x1 + +typedef void +(*disk_cache_put_cb) (const void *key, signed long keySize, + const void *value, signed long valueSize); + +typedef signed long +(*disk_cache_get_cb) (const void *key, signed long keySize, + void *value, signed long valueSize); + +struct cache_item_metadata { + /** + * The cache item type. This could be used to identify a GLSL cache item, + * a certain type of IR (tgsi, nir, etc), or signal that it is the final + * binary form of the shader. + */ + uint32_t type; + + /** GLSL cache item metadata */ + cache_key *keys; /* sha1 list of shaders that make up the cache item */ + uint32_t num_keys; +}; + +struct disk_cache; + +static inline char * +disk_cache_format_hex_id(char *buf, const uint8_t *hex_id, unsigned size) +{ + static const char hex_digits[] = "0123456789abcdef"; + unsigned i; + + for (i = 0; i < size; i += 2) { + buf[i] = hex_digits[hex_id[i >> 1] >> 4]; + buf[i + 1] = hex_digits[hex_id[i >> 1] & 0x0f]; + } + buf[i] = '\0'; + + return buf; +} + +#ifdef HAVE_DLADDR +static inline bool +disk_cache_get_function_timestamp(void *ptr, uint32_t* timestamp) +{ + Dl_info info; + struct stat st; + if (!dladdr(ptr, &info) || !info.dli_fname) { + return false; + } + if (stat(info.dli_fname, &st)) { + return false; + } + + if (!st.st_mtime) { + fprintf(stderr, "Mesa: The provided filesystem timestamp for the cache " + "is bogus! Disabling On-disk cache.\n"); + return false; + } + + *timestamp = st.st_mtime; + + return true; +} + +static inline bool +disk_cache_get_function_identifier(void *ptr, struct mesa_sha1 *ctx) +{ + uint32_t timestamp; + +#ifdef HAVE_DL_ITERATE_PHDR + const struct build_id_note *note = NULL; + if ((note = build_id_find_nhdr_for_addr(ptr))) { + _mesa_sha1_update(ctx, build_id_data(note), build_id_length(note)); + } else +#endif + if (disk_cache_get_function_timestamp(ptr, ×tamp)) { + _mesa_sha1_update(ctx, ×tamp, sizeof(timestamp)); + } else + return false; + return true; +} +#endif + +/* Provide inlined stub functions if the shader cache is disabled. */ + +#ifdef ENABLE_SHADER_CACHE + +/** + * Create a new cache object. + * + * This function creates the handle necessary for all subsequent cache_* + * functions. + * + * This cache provides two distinct operations: + * + * o Storage and retrieval of arbitrary objects by cryptographic + * name (or "key"). This is provided via disk_cache_put() and + * disk_cache_get(). + * + * o The ability to store a key alone and check later whether the + * key was previously stored. This is provided via disk_cache_put_key() + * and disk_cache_has_key(). + * + * The put_key()/has_key() operations are conceptually identical to + * put()/get() with no data, but are provided separately to allow for + * a more efficient implementation. + * + * In all cases, the keys are sequences of 20 bytes. It is anticipated + * that callers will compute appropriate SHA-1 signatures for keys, + * (though nothing in this implementation directly relies on how the + * names are computed). See mesa-sha1.h and _mesa_sha1_compute for + * assistance in computing SHA-1 signatures. + */ +struct disk_cache * +disk_cache_create(const char *gpu_name, const char *timestamp, + uint64_t driver_flags); + +/** + * Destroy a cache object, (freeing all associated resources). + */ +void +disk_cache_destroy(struct disk_cache *cache); + +/* Wait for all previous disk_cache_put() calls to be processed (used for unit + * testing). + */ +void +disk_cache_wait_for_idle(struct disk_cache *cache); + +/** + * Remove the item in the cache under the name \key. + */ +void +disk_cache_remove(struct disk_cache *cache, const cache_key key); + +/** + * Store an item in the cache under the name \key. + * + * The item can be retrieved later with disk_cache_get(), (unless the item has + * been evicted in the interim). + * + * Any call to disk_cache_put() may cause an existing, random item to be + * evicted from the cache. + */ +void +disk_cache_put(struct disk_cache *cache, const cache_key key, + const void *data, size_t size, + struct cache_item_metadata *cache_item_metadata); + +/** + * Retrieve an item previously stored in the cache with the name <key>. + * + * The item must have been previously stored with a call to disk_cache_put(). + * + * If \size is non-NULL, then, on successful return, it will be set to the + * size of the object. + * + * \return A pointer to the stored object if found. NULL if the object + * is not found, or if any error occurs, (memory allocation failure, + * filesystem error, etc.). The returned data is malloc'ed so the + * caller should call free() it when finished. + */ +void * +disk_cache_get(struct disk_cache *cache, const cache_key key, size_t *size); + +/** + * Store the name \key within the cache, (without any associated data). + * + * Later this key can be checked with disk_cache_has_key(), (unless the key + * has been evicted in the interim). + * + * Any call to disk_cache_put_key() may cause an existing, random key to be + * evicted from the cache. + */ +void +disk_cache_put_key(struct disk_cache *cache, const cache_key key); + +/** + * Test whether the name \key was previously recorded in the cache. + * + * Return value: True if disk_cache_put_key() was previously called with + * \key, (and the key was not evicted in the interim). + * + * Note: disk_cache_has_key() will only return true for keys passed to + * disk_cache_put_key(). Specifically, a call to disk_cache_put() will not cause + * disk_cache_has_key() to return true for the same key. + */ +bool +disk_cache_has_key(struct disk_cache *cache, const cache_key key); + +/** + * Compute the name \key from \data of given \size. + */ +void +disk_cache_compute_key(struct disk_cache *cache, const void *data, size_t size, + cache_key key); + +void +disk_cache_set_callbacks(struct disk_cache *cache, disk_cache_put_cb put, + disk_cache_get_cb get); + +#else + +static inline struct disk_cache * +disk_cache_create(const char *gpu_name, const char *timestamp, + uint64_t driver_flags) +{ + return NULL; +} + +static inline void +disk_cache_destroy(struct disk_cache *cache) { + return; +} + +static inline void +disk_cache_put(struct disk_cache *cache, const cache_key key, + const void *data, size_t size, + struct cache_item_metadata *cache_item_metadata) +{ + return; +} + +static inline void +disk_cache_remove(struct disk_cache *cache, const cache_key key) +{ + return; +} + +static inline uint8_t * +disk_cache_get(struct disk_cache *cache, const cache_key key, size_t *size) +{ + return NULL; +} + +static inline void +disk_cache_put_key(struct disk_cache *cache, const cache_key key) +{ + return; +} + +static inline bool +disk_cache_has_key(struct disk_cache *cache, const cache_key key) +{ + return false; +} + +static inline void +disk_cache_compute_key(struct disk_cache *cache, const void *data, size_t size, + const cache_key key) +{ + return; +} + +static inline void +disk_cache_set_callbacks(struct disk_cache *cache, disk_cache_put_cb put, + disk_cache_get_cb get) +{ + return; +} + +#endif /* ENABLE_SHADER_CACHE */ + +#ifdef __cplusplus +} +#endif + +#endif /* CACHE_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/fast_urem_by_const.h b/third_party/rust/glslopt/glsl-optimizer/src/util/fast_urem_by_const.h new file mode 100644 index 0000000000..beb253d229 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/fast_urem_by_const.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2010 Valve Software + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdint.h> + +/* + * Code for fast 32-bit unsigned remainder, based off of "Faster Remainder by + * Direct Computation: Applications to Compilers and Software Libraries," + * available at https://arxiv.org/pdf/1902.01961.pdf. + * + * util_fast_urem32(n, d, REMAINDER_MAGIC(d)) returns the same thing as + * n % d for any unsigned n and d, however it compiles down to only a few + * multiplications, so it should be faster than plain uint32_t modulo if the + * same divisor is used many times. + */ + +#define REMAINDER_MAGIC(divisor) \ + ((uint64_t) ~0ull / (divisor) + 1) + +/* + * Get bits 64-96 of a 32x64-bit multiply. If __int128_t is available, we use + * it, which usually compiles down to one instruction on 64-bit architectures. + * Otherwise on 32-bit architectures we usually get four instructions (one + * 32x32->64 multiply, one 32x32->32 multiply, and one 64-bit add). + */ + +static inline uint32_t +_mul32by64_hi(uint32_t a, uint64_t b) +{ +#ifdef HAVE_UINT128 + return ((__uint128_t) b * a) >> 64; +#else + /* + * Let b = b0 + 2^32 * b1. Then a * b = a * b0 + 2^32 * a * b1. We would + * have to do a 96-bit addition to get the full result, except that only + * one term has non-zero lower 32 bits, which means that to get the high 32 + * bits, we only have to add the high 64 bits of each term. Unfortunately, + * we have to do the 64-bit addition in case the low 32 bits overflow. + */ + uint32_t b0 = (uint32_t) b; + uint32_t b1 = b >> 32; + return ((((uint64_t) a * b0) >> 32) + (uint64_t) a * b1) >> 32; +#endif +} + +static inline uint32_t +util_fast_urem32(uint32_t n, uint32_t d, uint64_t magic) +{ + uint64_t lowbits = magic * n; + uint32_t result = _mul32by64_hi(d, lowbits); + assert(result == n % d); + return result; +} + diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/fnv1a.h b/third_party/rust/glslopt/glsl-optimizer/src/util/fnv1a.h new file mode 100644 index 0000000000..0f92d0b0e4 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/fnv1a.h @@ -0,0 +1,61 @@ +/* + * Copyright © 2009,2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#ifndef _FNV1A_H +#define _FNV1A_H + +enum { + _mesa_fnv32_1a_offset_bias = 2166136261u, +}; + +/** + * Quick FNV-1a hash implementation based on: + * http://www.isthe.com/chongo/tech/comp/fnv/ + * + * FNV-1a is not be the best hash out there -- Jenkins's lookup3 is supposed + * to be quite good, and it probably beats FNV. But FNV has the advantage + * that it involves almost no code. For an improvement on both, see Paul + * Hsieh's http://www.azillionmonkeys.com/qed/hash.html + */ +static inline uint32_t +_mesa_fnv32_1a_accumulate_block(uint32_t hash, const void *data, size_t size) +{ + const uint8_t *bytes = (const uint8_t *)data; + + while (size-- != 0) { + hash ^= *bytes; + hash = hash * 0x01000193; + bytes++; + } + + return hash; +} + +#define _mesa_fnv32_1a_accumulate(hash, expr) \ + _mesa_fnv32_1a_accumulate_block(hash, &(expr), sizeof(expr)) + +#endif diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/format/u_format.h b/third_party/rust/glslopt/glsl-optimizer/src/util/format/u_format.h new file mode 100644 index 0000000000..1b6f0b8828 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/format/u_format.h @@ -0,0 +1,1665 @@ +/************************************************************************** + * + * Copyright 2009-2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef U_FORMAT_H +#define U_FORMAT_H + + +#include "pipe/p_format.h" +#include "pipe/p_defines.h" +#include "util/u_debug.h" + +union pipe_color_union; +struct pipe_screen; + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Describe how to pack/unpack pixels into/from the prescribed format. + * + * XXX: This could be renamed to something like util_format_pack, or broke down + * in flags inside util_format_block that said exactly what we want. + */ +enum util_format_layout { + /** + * Formats with util_format_block::width == util_format_block::height == 1 + * that can be described as an ordinary data structure. + */ + UTIL_FORMAT_LAYOUT_PLAIN, + + /** + * Formats with sub-sampled channels. + * + * This is for formats like YVYU where there is less than one sample per + * pixel. + */ + UTIL_FORMAT_LAYOUT_SUBSAMPLED, + + /** + * S3 Texture Compression formats. + */ + UTIL_FORMAT_LAYOUT_S3TC, + + /** + * Red-Green Texture Compression formats. + */ + UTIL_FORMAT_LAYOUT_RGTC, + + /** + * Ericsson Texture Compression + */ + UTIL_FORMAT_LAYOUT_ETC, + + /** + * BC6/7 Texture Compression + */ + UTIL_FORMAT_LAYOUT_BPTC, + + UTIL_FORMAT_LAYOUT_ASTC, + + UTIL_FORMAT_LAYOUT_ATC, + + /** Formats with 2 or more planes. */ + UTIL_FORMAT_LAYOUT_PLANAR2, + UTIL_FORMAT_LAYOUT_PLANAR3, + + UTIL_FORMAT_LAYOUT_FXT1 = 10, + + /** + * Everything else that doesn't fit in any of the above layouts. + */ + UTIL_FORMAT_LAYOUT_OTHER, +}; + + +struct util_format_block +{ + /** Block width in pixels */ + unsigned width; + + /** Block height in pixels */ + unsigned height; + + /** Block depth in pixels */ + unsigned depth; + + /** Block size in bits */ + unsigned bits; +}; + + +enum util_format_type { + UTIL_FORMAT_TYPE_VOID = 0, + UTIL_FORMAT_TYPE_UNSIGNED = 1, + UTIL_FORMAT_TYPE_SIGNED = 2, + UTIL_FORMAT_TYPE_FIXED = 3, + UTIL_FORMAT_TYPE_FLOAT = 4 +}; + + +enum util_format_colorspace { + UTIL_FORMAT_COLORSPACE_RGB = 0, + UTIL_FORMAT_COLORSPACE_SRGB = 1, + UTIL_FORMAT_COLORSPACE_YUV = 2, + UTIL_FORMAT_COLORSPACE_ZS = 3 +}; + + +struct util_format_channel_description +{ + unsigned type:5; /**< UTIL_FORMAT_TYPE_x */ + unsigned normalized:1; + unsigned pure_integer:1; + unsigned size:9; /**< bits per channel */ + unsigned shift:16; /** number of bits from lsb */ +}; + + +struct util_format_description +{ + enum pipe_format format; + + const char *name; + + /** + * Short name, striped of the prefix, lower case. + */ + const char *short_name; + + /** + * Pixel block dimensions. + */ + struct util_format_block block; + + enum util_format_layout layout; + + /** + * The number of channels. + */ + unsigned nr_channels:3; + + /** + * Whether all channels have the same number of (whole) bytes and type. + */ + unsigned is_array:1; + + /** + * Whether the pixel format can be described as a bitfield structure. + * + * In particular: + * - pixel depth must be 8, 16, or 32 bits; + * - all channels must be unsigned, signed, or void + */ + unsigned is_bitmask:1; + + /** + * Whether channels have mixed types (ignoring UTIL_FORMAT_TYPE_VOID). + */ + unsigned is_mixed:1; + + /** + * Whether the format contains UNORM channels + */ + unsigned is_unorm:1; + + /** + * Whether the format contains SNORM channels + */ + unsigned is_snorm:1; + + /** + * Input channel description, in the order XYZW. + * + * Only valid for UTIL_FORMAT_LAYOUT_PLAIN formats. + * + * If each channel is accessed as an individual N-byte value, X is always + * at the lowest address in memory, Y is always next, and so on. For all + * currently-defined formats, the N-byte value has native endianness. + * + * If instead a group of channels is accessed as a single N-byte value, + * the order of the channels within that value depends on endianness. + * For big-endian targets, X is the most significant subvalue, + * otherwise it is the least significant one. + * + * For example, if X is 8 bits and Y is 24 bits, the memory order is: + * + * 0 1 2 3 + * little-endian: X Yl Ym Yu (l = lower, m = middle, u = upper) + * big-endian: X Yu Ym Yl + * + * If X is 5 bits, Y is 5 bits, Z is 5 bits and W is 1 bit, the layout is: + * + * 0 1 + * msb lsb msb lsb + * little-endian: YYYXXXXX WZZZZZYY + * big-endian: XXXXXYYY YYZZZZZW + */ + struct util_format_channel_description channel[4]; + + /** + * Output channel swizzle. + * + * The order is either: + * - RGBA + * - YUV(A) + * - ZS + * depending on the colorspace. + */ + unsigned char swizzle[4]; + + /** + * Colorspace transformation. + */ + enum util_format_colorspace colorspace; + + /** + * Unpack pixel blocks to R8G8B8A8_UNORM. + * Note: strides are in bytes. + * + * Only defined for non-depth-stencil formats. + */ + void + (*unpack_rgba_8unorm)(uint8_t *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Pack pixel blocks from R8G8B8A8_UNORM. + * Note: strides are in bytes. + * + * Only defined for non-depth-stencil formats. + */ + void + (*pack_rgba_8unorm)(uint8_t *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Fetch a single pixel (i, j) from a block. + * + * XXX: Only defined for a very few select formats. + */ + void + (*fetch_rgba_8unorm)(uint8_t *dst, + const uint8_t *src, + unsigned i, unsigned j); + + /** + * Unpack pixel blocks to R32G32B32A32_FLOAT. + * Note: strides are in bytes. + * + * Only defined for non-depth-stencil formats. + */ + void + (*unpack_rgba_float)(float *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Pack pixel blocks from R32G32B32A32_FLOAT. + * Note: strides are in bytes. + * + * Only defined for non-depth-stencil formats. + */ + void + (*pack_rgba_float)(uint8_t *dst, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Fetch a single pixel (i, j) from a block. + * + * Only defined for non-depth-stencil and non-integer formats. + */ + void + (*fetch_rgba_float)(float *dst, + const uint8_t *src, + unsigned i, unsigned j); + + /** + * Unpack pixels to Z32_UNORM. + * Note: strides are in bytes. + * + * Only defined for depth formats. + */ + void + (*unpack_z_32unorm)(uint32_t *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Pack pixels from Z32_FLOAT. + * Note: strides are in bytes. + * + * Only defined for depth formats. + */ + void + (*pack_z_32unorm)(uint8_t *dst, unsigned dst_stride, + const uint32_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Unpack pixels to Z32_FLOAT. + * Note: strides are in bytes. + * + * Only defined for depth formats. + */ + void + (*unpack_z_float)(float *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Pack pixels from Z32_FLOAT. + * Note: strides are in bytes. + * + * Only defined for depth formats. + */ + void + (*pack_z_float)(uint8_t *dst, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Unpack pixels to S8_UINT. + * Note: strides are in bytes. + * + * Only defined for stencil formats. + */ + void + (*unpack_s_8uint)(uint8_t *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Pack pixels from S8_UINT. + * Note: strides are in bytes. + * + * Only defined for stencil formats. + */ + void + (*pack_s_8uint)(uint8_t *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Unpack pixel blocks to R32G32B32A32_UINT. + * Note: strides are in bytes. + * + * Only defined for INT formats. + */ + void + (*unpack_rgba_uint)(uint32_t *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + + void + (*pack_rgba_uint)(uint8_t *dst, unsigned dst_stride, + const uint32_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Unpack pixel blocks to R32G32B32A32_SINT. + * Note: strides are in bytes. + * + * Only defined for INT formats. + */ + void + (*unpack_rgba_sint)(int32_t *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + + void + (*pack_rgba_sint)(uint8_t *dst, unsigned dst_stride, + const int32_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Fetch a single pixel (i, j) from a block. + * + * Only defined for unsigned (pure) integer formats. + */ + void + (*fetch_rgba_uint)(uint32_t *dst, + const uint8_t *src, + unsigned i, unsigned j); + + /** + * Fetch a single pixel (i, j) from a block. + * + * Only defined for signed (pure) integer formats. + */ + void + (*fetch_rgba_sint)(int32_t *dst, + const uint8_t *src, + unsigned i, unsigned j); +}; + + +const struct util_format_description * +util_format_description(enum pipe_format format); + + +/* + * Format query functions. + */ + +static inline const char * +util_format_name(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(desc); + if (!desc) { + return "PIPE_FORMAT_???"; + } + + return desc->name; +} + +static inline const char * +util_format_short_name(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(desc); + if (!desc) { + return "???"; + } + + return desc->short_name; +} + +/** + * Whether this format is plain, see UTIL_FORMAT_LAYOUT_PLAIN for more info. + */ +static inline boolean +util_format_is_plain(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + if (!format) { + return FALSE; + } + + return desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ? TRUE : FALSE; +} + +static inline boolean +util_format_is_compressed(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(desc); + if (!desc) { + return FALSE; + } + + switch (desc->layout) { + case UTIL_FORMAT_LAYOUT_S3TC: + case UTIL_FORMAT_LAYOUT_RGTC: + case UTIL_FORMAT_LAYOUT_ETC: + case UTIL_FORMAT_LAYOUT_BPTC: + case UTIL_FORMAT_LAYOUT_ASTC: + case UTIL_FORMAT_LAYOUT_ATC: + case UTIL_FORMAT_LAYOUT_FXT1: + /* XXX add other formats in the future */ + return TRUE; + default: + return FALSE; + } +} + +static inline boolean +util_format_is_s3tc(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(desc); + if (!desc) { + return FALSE; + } + + return desc->layout == UTIL_FORMAT_LAYOUT_S3TC ? TRUE : FALSE; +} + +static inline boolean +util_format_is_etc(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(desc); + if (!desc) { + return FALSE; + } + + return desc->layout == UTIL_FORMAT_LAYOUT_ETC ? TRUE : FALSE; +} + +static inline boolean +util_format_is_srgb(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + return desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB; +} + +static inline boolean +util_format_has_depth(const struct util_format_description *desc) +{ + return desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS && + desc->swizzle[0] != PIPE_SWIZZLE_NONE; +} + +static inline boolean +util_format_has_stencil(const struct util_format_description *desc) +{ + return desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS && + desc->swizzle[1] != PIPE_SWIZZLE_NONE; +} + +static inline boolean +util_format_is_depth_or_stencil(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(desc); + if (!desc) { + return FALSE; + } + + return util_format_has_depth(desc) || + util_format_has_stencil(desc); +} + +static inline boolean +util_format_is_depth_and_stencil(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(desc); + if (!desc) { + return FALSE; + } + + return util_format_has_depth(desc) && + util_format_has_stencil(desc); +} + +/** + * For depth-stencil formats, return the equivalent depth-only format. + */ +static inline enum pipe_format +util_format_get_depth_only(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return PIPE_FORMAT_Z24X8_UNORM; + + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + return PIPE_FORMAT_X8Z24_UNORM; + + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return PIPE_FORMAT_Z32_FLOAT; + + default: + return format; + } +} + +static inline boolean +util_format_is_yuv(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(desc); + if (!desc) { + return FALSE; + } + + return desc->colorspace == UTIL_FORMAT_COLORSPACE_YUV; +} + +/** + * Calculates the depth format type based upon the incoming format description. + */ +static inline unsigned +util_get_depth_format_type(const struct util_format_description *desc) +{ + unsigned depth_channel = desc->swizzle[0]; + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS && + depth_channel != PIPE_SWIZZLE_NONE) { + return desc->channel[depth_channel].type; + } else { + return UTIL_FORMAT_TYPE_VOID; + } +} + + +/** + * Calculates the MRD for the depth format. MRD is used in depth bias + * for UNORM and unbound depth buffers. When the depth buffer is floating + * point, the depth bias calculation does not use the MRD. However, the + * default MRD will be 1.0 / ((1 << 24) - 1). + */ +double +util_get_depth_format_mrd(const struct util_format_description *desc); + + +/** + * Return whether this is an RGBA, Z, S, or combined ZS format. + * Useful for initializing pipe_blit_info::mask. + */ +static inline unsigned +util_format_get_mask(enum pipe_format format) +{ + const struct util_format_description *desc = + util_format_description(format); + + if (!desc) + return 0; + + if (util_format_has_depth(desc)) { + if (util_format_has_stencil(desc)) { + return PIPE_MASK_ZS; + } else { + return PIPE_MASK_Z; + } + } else { + if (util_format_has_stencil(desc)) { + return PIPE_MASK_S; + } else { + return PIPE_MASK_RGBA; + } + } +} + +/** + * Give the RGBA colormask of the channels that can be represented in this + * format. + * + * That is, the channels whose values are preserved. + */ +static inline unsigned +util_format_colormask(const struct util_format_description *desc) +{ + unsigned colormask; + unsigned chan; + + switch (desc->colorspace) { + case UTIL_FORMAT_COLORSPACE_RGB: + case UTIL_FORMAT_COLORSPACE_SRGB: + case UTIL_FORMAT_COLORSPACE_YUV: + colormask = 0; + for (chan = 0; chan < 4; ++chan) { + if (desc->swizzle[chan] < 4) { + colormask |= (1 << chan); + } + } + return colormask; + case UTIL_FORMAT_COLORSPACE_ZS: + return 0; + default: + assert(0); + return 0; + } +} + + +/** + * Checks if color mask covers every channel for the specified format + * + * @param desc a format description to check colormask with + * @param colormask a bit mask for channels, matches format of PIPE_MASK_RGBA + */ +static inline boolean +util_format_colormask_full(const struct util_format_description *desc, unsigned colormask) +{ + return (~colormask & util_format_colormask(desc)) == 0; +} + + +boolean +util_format_is_float(enum pipe_format format); + + +boolean +util_format_has_alpha(enum pipe_format format); + + +boolean +util_format_is_luminance(enum pipe_format format); + +boolean +util_format_is_alpha(enum pipe_format format); + +boolean +util_format_is_luminance_alpha(enum pipe_format format); + + +boolean +util_format_is_intensity(enum pipe_format format); + +boolean +util_format_is_subsampled_422(enum pipe_format format); + +boolean +util_format_is_pure_integer(enum pipe_format format); + +boolean +util_format_is_pure_sint(enum pipe_format format); + +boolean +util_format_is_pure_uint(enum pipe_format format); + +boolean +util_format_is_snorm(enum pipe_format format); + +boolean +util_format_is_unorm(enum pipe_format format); + +boolean +util_format_is_snorm8(enum pipe_format format); + +/** + * Check if the src format can be blitted to the destination format with + * a simple memcpy. For example, blitting from RGBA to RGBx is OK, but not + * the reverse. + */ +boolean +util_is_format_compatible(const struct util_format_description *src_desc, + const struct util_format_description *dst_desc); + +/** + * Whether this format is a rgab8 variant. + * + * That is, any format that matches the + * + * PIPE_FORMAT_?8?8?8?8_UNORM + */ +static inline boolean +util_format_is_rgba8_variant(const struct util_format_description *desc) +{ + unsigned chan; + + if(desc->block.width != 1 || + desc->block.height != 1 || + desc->block.bits != 32) + return FALSE; + + for(chan = 0; chan < 4; ++chan) { + if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED && + desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) + return FALSE; + if(desc->channel[chan].type == UTIL_FORMAT_TYPE_UNSIGNED && + !desc->channel[chan].normalized) + return FALSE; + if(desc->channel[chan].size != 8) + return FALSE; + } + + return TRUE; +} + +/** + * Return total bits needed for the pixel format per block. + */ +static inline uint +util_format_get_blocksizebits(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(desc); + if (!desc) { + return 0; + } + + return desc->block.bits; +} + +/** + * Return bytes per block (not pixel) for the given format. + */ +static inline uint +util_format_get_blocksize(enum pipe_format format) +{ + uint bits = util_format_get_blocksizebits(format); + uint bytes = bits / 8; + + assert(bits % 8 == 0); + assert(bytes > 0); + if (bytes == 0) { + bytes = 1; + } + + return bytes; +} + +static inline uint +util_format_get_blockwidth(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(desc); + if (!desc) { + return 1; + } + + return desc->block.width; +} + +static inline uint +util_format_get_blockheight(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(desc); + if (!desc) { + return 1; + } + + return desc->block.height; +} + +static inline uint +util_format_get_blockdepth(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(desc); + if (!desc) { + return 1; + } + + return desc->block.depth; +} + +static inline unsigned +util_format_get_nblocksx(enum pipe_format format, + unsigned x) +{ + unsigned blockwidth = util_format_get_blockwidth(format); + return (x + blockwidth - 1) / blockwidth; +} + +static inline unsigned +util_format_get_nblocksy(enum pipe_format format, + unsigned y) +{ + unsigned blockheight = util_format_get_blockheight(format); + return (y + blockheight - 1) / blockheight; +} + +static inline unsigned +util_format_get_nblocksz(enum pipe_format format, + unsigned z) +{ + unsigned blockdepth = util_format_get_blockdepth(format); + return (z + blockdepth - 1) / blockdepth; +} + +static inline unsigned +util_format_get_nblocks(enum pipe_format format, + unsigned width, + unsigned height) +{ + assert(util_format_get_blockdepth(format) == 1); + return util_format_get_nblocksx(format, width) * util_format_get_nblocksy(format, height); +} + +static inline size_t +util_format_get_stride(enum pipe_format format, + unsigned width) +{ + return (size_t)util_format_get_nblocksx(format, width) * util_format_get_blocksize(format); +} + +static inline size_t +util_format_get_2d_size(enum pipe_format format, + size_t stride, + unsigned height) +{ + return util_format_get_nblocksy(format, height) * stride; +} + +static inline uint +util_format_get_component_bits(enum pipe_format format, + enum util_format_colorspace colorspace, + uint component) +{ + const struct util_format_description *desc = util_format_description(format); + enum util_format_colorspace desc_colorspace; + + assert(format); + if (!format) { + return 0; + } + + assert(component < 4); + + /* Treat RGB and SRGB as equivalent. */ + if (colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { + colorspace = UTIL_FORMAT_COLORSPACE_RGB; + } + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { + desc_colorspace = UTIL_FORMAT_COLORSPACE_RGB; + } else { + desc_colorspace = desc->colorspace; + } + + if (desc_colorspace != colorspace) { + return 0; + } + + switch (desc->swizzle[component]) { + case PIPE_SWIZZLE_X: + return desc->channel[0].size; + case PIPE_SWIZZLE_Y: + return desc->channel[1].size; + case PIPE_SWIZZLE_Z: + return desc->channel[2].size; + case PIPE_SWIZZLE_W: + return desc->channel[3].size; + default: + return 0; + } +} + +/** + * Given a linear RGB colorspace format, return the corresponding SRGB + * format, or PIPE_FORMAT_NONE if none. + */ +static inline enum pipe_format +util_format_srgb(enum pipe_format format) +{ + if (util_format_is_srgb(format)) + return format; + + switch (format) { + case PIPE_FORMAT_L8_UNORM: + return PIPE_FORMAT_L8_SRGB; + case PIPE_FORMAT_R8_UNORM: + return PIPE_FORMAT_R8_SRGB; + case PIPE_FORMAT_L8A8_UNORM: + return PIPE_FORMAT_L8A8_SRGB; + case PIPE_FORMAT_R8G8_UNORM: + return PIPE_FORMAT_R8G8_SRGB; + case PIPE_FORMAT_R8G8B8_UNORM: + return PIPE_FORMAT_R8G8B8_SRGB; + case PIPE_FORMAT_B8G8R8_UNORM: + return PIPE_FORMAT_B8G8R8_SRGB; + case PIPE_FORMAT_A8B8G8R8_UNORM: + return PIPE_FORMAT_A8B8G8R8_SRGB; + case PIPE_FORMAT_X8B8G8R8_UNORM: + return PIPE_FORMAT_X8B8G8R8_SRGB; + case PIPE_FORMAT_B8G8R8A8_UNORM: + return PIPE_FORMAT_B8G8R8A8_SRGB; + case PIPE_FORMAT_B8G8R8X8_UNORM: + return PIPE_FORMAT_B8G8R8X8_SRGB; + case PIPE_FORMAT_A8R8G8B8_UNORM: + return PIPE_FORMAT_A8R8G8B8_SRGB; + case PIPE_FORMAT_X8R8G8B8_UNORM: + return PIPE_FORMAT_X8R8G8B8_SRGB; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return PIPE_FORMAT_R8G8B8A8_SRGB; + case PIPE_FORMAT_R8G8B8X8_UNORM: + return PIPE_FORMAT_R8G8B8X8_SRGB; + case PIPE_FORMAT_DXT1_RGB: + return PIPE_FORMAT_DXT1_SRGB; + case PIPE_FORMAT_DXT1_RGBA: + return PIPE_FORMAT_DXT1_SRGBA; + case PIPE_FORMAT_DXT3_RGBA: + return PIPE_FORMAT_DXT3_SRGBA; + case PIPE_FORMAT_DXT5_RGBA: + return PIPE_FORMAT_DXT5_SRGBA; + case PIPE_FORMAT_B5G6R5_UNORM: + return PIPE_FORMAT_B5G6R5_SRGB; + case PIPE_FORMAT_BPTC_RGBA_UNORM: + return PIPE_FORMAT_BPTC_SRGBA; + case PIPE_FORMAT_ETC2_RGB8: + return PIPE_FORMAT_ETC2_SRGB8; + case PIPE_FORMAT_ETC2_RGB8A1: + return PIPE_FORMAT_ETC2_SRGB8A1; + case PIPE_FORMAT_ETC2_RGBA8: + return PIPE_FORMAT_ETC2_SRGBA8; + case PIPE_FORMAT_ASTC_4x4: + return PIPE_FORMAT_ASTC_4x4_SRGB; + case PIPE_FORMAT_ASTC_5x4: + return PIPE_FORMAT_ASTC_5x4_SRGB; + case PIPE_FORMAT_ASTC_5x5: + return PIPE_FORMAT_ASTC_5x5_SRGB; + case PIPE_FORMAT_ASTC_6x5: + return PIPE_FORMAT_ASTC_6x5_SRGB; + case PIPE_FORMAT_ASTC_6x6: + return PIPE_FORMAT_ASTC_6x6_SRGB; + case PIPE_FORMAT_ASTC_8x5: + return PIPE_FORMAT_ASTC_8x5_SRGB; + case PIPE_FORMAT_ASTC_8x6: + return PIPE_FORMAT_ASTC_8x6_SRGB; + case PIPE_FORMAT_ASTC_8x8: + return PIPE_FORMAT_ASTC_8x8_SRGB; + case PIPE_FORMAT_ASTC_10x5: + return PIPE_FORMAT_ASTC_10x5_SRGB; + case PIPE_FORMAT_ASTC_10x6: + return PIPE_FORMAT_ASTC_10x6_SRGB; + case PIPE_FORMAT_ASTC_10x8: + return PIPE_FORMAT_ASTC_10x8_SRGB; + case PIPE_FORMAT_ASTC_10x10: + return PIPE_FORMAT_ASTC_10x10_SRGB; + case PIPE_FORMAT_ASTC_12x10: + return PIPE_FORMAT_ASTC_12x10_SRGB; + case PIPE_FORMAT_ASTC_12x12: + return PIPE_FORMAT_ASTC_12x12_SRGB; + case PIPE_FORMAT_ASTC_3x3x3: + return PIPE_FORMAT_ASTC_3x3x3_SRGB; + case PIPE_FORMAT_ASTC_4x3x3: + return PIPE_FORMAT_ASTC_4x3x3_SRGB; + case PIPE_FORMAT_ASTC_4x4x3: + return PIPE_FORMAT_ASTC_4x4x3_SRGB; + case PIPE_FORMAT_ASTC_4x4x4: + return PIPE_FORMAT_ASTC_4x4x4_SRGB; + case PIPE_FORMAT_ASTC_5x4x4: + return PIPE_FORMAT_ASTC_5x4x4_SRGB; + case PIPE_FORMAT_ASTC_5x5x4: + return PIPE_FORMAT_ASTC_5x5x4_SRGB; + case PIPE_FORMAT_ASTC_5x5x5: + return PIPE_FORMAT_ASTC_5x5x5_SRGB; + case PIPE_FORMAT_ASTC_6x5x5: + return PIPE_FORMAT_ASTC_6x5x5_SRGB; + case PIPE_FORMAT_ASTC_6x6x5: + return PIPE_FORMAT_ASTC_6x6x5_SRGB; + case PIPE_FORMAT_ASTC_6x6x6: + return PIPE_FORMAT_ASTC_6x6x6_SRGB; + + default: + return PIPE_FORMAT_NONE; + } +} + +/** + * Given an sRGB format, return the corresponding linear colorspace format. + * For non sRGB formats, return the format unchanged. + */ +static inline enum pipe_format +util_format_linear(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_L8_SRGB: + return PIPE_FORMAT_L8_UNORM; + case PIPE_FORMAT_R8_SRGB: + return PIPE_FORMAT_R8_UNORM; + case PIPE_FORMAT_L8A8_SRGB: + return PIPE_FORMAT_L8A8_UNORM; + case PIPE_FORMAT_R8G8_SRGB: + return PIPE_FORMAT_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_SRGB: + return PIPE_FORMAT_R8G8B8_UNORM; + case PIPE_FORMAT_B8G8R8_SRGB: + return PIPE_FORMAT_B8G8R8_UNORM; + case PIPE_FORMAT_A8B8G8R8_SRGB: + return PIPE_FORMAT_A8B8G8R8_UNORM; + case PIPE_FORMAT_X8B8G8R8_SRGB: + return PIPE_FORMAT_X8B8G8R8_UNORM; + case PIPE_FORMAT_B8G8R8A8_SRGB: + return PIPE_FORMAT_B8G8R8A8_UNORM; + case PIPE_FORMAT_B8G8R8X8_SRGB: + return PIPE_FORMAT_B8G8R8X8_UNORM; + case PIPE_FORMAT_A8R8G8B8_SRGB: + return PIPE_FORMAT_A8R8G8B8_UNORM; + case PIPE_FORMAT_X8R8G8B8_SRGB: + return PIPE_FORMAT_X8R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_SRGB: + return PIPE_FORMAT_R8G8B8A8_UNORM; + case PIPE_FORMAT_R8G8B8X8_SRGB: + return PIPE_FORMAT_R8G8B8X8_UNORM; + case PIPE_FORMAT_DXT1_SRGB: + return PIPE_FORMAT_DXT1_RGB; + case PIPE_FORMAT_DXT1_SRGBA: + return PIPE_FORMAT_DXT1_RGBA; + case PIPE_FORMAT_DXT3_SRGBA: + return PIPE_FORMAT_DXT3_RGBA; + case PIPE_FORMAT_DXT5_SRGBA: + return PIPE_FORMAT_DXT5_RGBA; + case PIPE_FORMAT_B5G6R5_SRGB: + return PIPE_FORMAT_B5G6R5_UNORM; + case PIPE_FORMAT_BPTC_SRGBA: + return PIPE_FORMAT_BPTC_RGBA_UNORM; + case PIPE_FORMAT_ETC2_SRGB8: + return PIPE_FORMAT_ETC2_RGB8; + case PIPE_FORMAT_ETC2_SRGB8A1: + return PIPE_FORMAT_ETC2_RGB8A1; + case PIPE_FORMAT_ETC2_SRGBA8: + return PIPE_FORMAT_ETC2_RGBA8; + case PIPE_FORMAT_ASTC_4x4_SRGB: + return PIPE_FORMAT_ASTC_4x4; + case PIPE_FORMAT_ASTC_5x4_SRGB: + return PIPE_FORMAT_ASTC_5x4; + case PIPE_FORMAT_ASTC_5x5_SRGB: + return PIPE_FORMAT_ASTC_5x5; + case PIPE_FORMAT_ASTC_6x5_SRGB: + return PIPE_FORMAT_ASTC_6x5; + case PIPE_FORMAT_ASTC_6x6_SRGB: + return PIPE_FORMAT_ASTC_6x6; + case PIPE_FORMAT_ASTC_8x5_SRGB: + return PIPE_FORMAT_ASTC_8x5; + case PIPE_FORMAT_ASTC_8x6_SRGB: + return PIPE_FORMAT_ASTC_8x6; + case PIPE_FORMAT_ASTC_8x8_SRGB: + return PIPE_FORMAT_ASTC_8x8; + case PIPE_FORMAT_ASTC_10x5_SRGB: + return PIPE_FORMAT_ASTC_10x5; + case PIPE_FORMAT_ASTC_10x6_SRGB: + return PIPE_FORMAT_ASTC_10x6; + case PIPE_FORMAT_ASTC_10x8_SRGB: + return PIPE_FORMAT_ASTC_10x8; + case PIPE_FORMAT_ASTC_10x10_SRGB: + return PIPE_FORMAT_ASTC_10x10; + case PIPE_FORMAT_ASTC_12x10_SRGB: + return PIPE_FORMAT_ASTC_12x10; + case PIPE_FORMAT_ASTC_12x12_SRGB: + return PIPE_FORMAT_ASTC_12x12; + case PIPE_FORMAT_ASTC_3x3x3_SRGB: + return PIPE_FORMAT_ASTC_3x3x3; + case PIPE_FORMAT_ASTC_4x3x3_SRGB: + return PIPE_FORMAT_ASTC_4x3x3; + case PIPE_FORMAT_ASTC_4x4x3_SRGB: + return PIPE_FORMAT_ASTC_4x4x3; + case PIPE_FORMAT_ASTC_4x4x4_SRGB: + return PIPE_FORMAT_ASTC_4x4x4; + case PIPE_FORMAT_ASTC_5x4x4_SRGB: + return PIPE_FORMAT_ASTC_5x4x4; + case PIPE_FORMAT_ASTC_5x5x4_SRGB: + return PIPE_FORMAT_ASTC_5x5x4; + case PIPE_FORMAT_ASTC_5x5x5_SRGB: + return PIPE_FORMAT_ASTC_5x5x5; + case PIPE_FORMAT_ASTC_6x5x5_SRGB: + return PIPE_FORMAT_ASTC_6x5x5; + case PIPE_FORMAT_ASTC_6x6x5_SRGB: + return PIPE_FORMAT_ASTC_6x6x5; + case PIPE_FORMAT_ASTC_6x6x6_SRGB: + return PIPE_FORMAT_ASTC_6x6x6; + default: + return format; + } +} + +/** + * Given a depth-stencil format, return the corresponding stencil-only format. + * For stencil-only formats, return the format unchanged. + */ +static inline enum pipe_format +util_format_stencil_only(enum pipe_format format) +{ + switch (format) { + /* mask out the depth component */ + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return PIPE_FORMAT_X24S8_UINT; + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + return PIPE_FORMAT_S8X24_UINT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return PIPE_FORMAT_X32_S8X24_UINT; + + /* stencil only formats */ + case PIPE_FORMAT_X24S8_UINT: + case PIPE_FORMAT_S8X24_UINT: + case PIPE_FORMAT_X32_S8X24_UINT: + case PIPE_FORMAT_S8_UINT: + return format; + + default: + assert(0); + return PIPE_FORMAT_NONE; + } +} + +/** + * Converts PIPE_FORMAT_*I* to PIPE_FORMAT_*R*. + * This is identity for non-intensity formats. + */ +static inline enum pipe_format +util_format_intensity_to_red(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_I8_UNORM: + return PIPE_FORMAT_R8_UNORM; + case PIPE_FORMAT_I8_SNORM: + return PIPE_FORMAT_R8_SNORM; + case PIPE_FORMAT_I16_UNORM: + return PIPE_FORMAT_R16_UNORM; + case PIPE_FORMAT_I16_SNORM: + return PIPE_FORMAT_R16_SNORM; + case PIPE_FORMAT_I16_FLOAT: + return PIPE_FORMAT_R16_FLOAT; + case PIPE_FORMAT_I32_FLOAT: + return PIPE_FORMAT_R32_FLOAT; + case PIPE_FORMAT_I8_UINT: + return PIPE_FORMAT_R8_UINT; + case PIPE_FORMAT_I8_SINT: + return PIPE_FORMAT_R8_SINT; + case PIPE_FORMAT_I16_UINT: + return PIPE_FORMAT_R16_UINT; + case PIPE_FORMAT_I16_SINT: + return PIPE_FORMAT_R16_SINT; + case PIPE_FORMAT_I32_UINT: + return PIPE_FORMAT_R32_UINT; + case PIPE_FORMAT_I32_SINT: + return PIPE_FORMAT_R32_SINT; + default: + assert(!util_format_is_intensity(format)); + return format; + } +} + +/** + * Converts PIPE_FORMAT_*L* to PIPE_FORMAT_*R*. + * This is identity for non-luminance formats. + */ +static inline enum pipe_format +util_format_luminance_to_red(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_L8_UNORM: + return PIPE_FORMAT_R8_UNORM; + case PIPE_FORMAT_L8_SNORM: + return PIPE_FORMAT_R8_SNORM; + case PIPE_FORMAT_L16_UNORM: + return PIPE_FORMAT_R16_UNORM; + case PIPE_FORMAT_L16_SNORM: + return PIPE_FORMAT_R16_SNORM; + case PIPE_FORMAT_L16_FLOAT: + return PIPE_FORMAT_R16_FLOAT; + case PIPE_FORMAT_L32_FLOAT: + return PIPE_FORMAT_R32_FLOAT; + case PIPE_FORMAT_L8_UINT: + return PIPE_FORMAT_R8_UINT; + case PIPE_FORMAT_L8_SINT: + return PIPE_FORMAT_R8_SINT; + case PIPE_FORMAT_L16_UINT: + return PIPE_FORMAT_R16_UINT; + case PIPE_FORMAT_L16_SINT: + return PIPE_FORMAT_R16_SINT; + case PIPE_FORMAT_L32_UINT: + return PIPE_FORMAT_R32_UINT; + case PIPE_FORMAT_L32_SINT: + return PIPE_FORMAT_R32_SINT; + + case PIPE_FORMAT_LATC1_UNORM: + return PIPE_FORMAT_RGTC1_UNORM; + case PIPE_FORMAT_LATC1_SNORM: + return PIPE_FORMAT_RGTC1_SNORM; + + case PIPE_FORMAT_L4A4_UNORM: + return PIPE_FORMAT_R4A4_UNORM; + + case PIPE_FORMAT_L8A8_UNORM: + return PIPE_FORMAT_R8A8_UNORM; + case PIPE_FORMAT_L8A8_SNORM: + return PIPE_FORMAT_R8A8_SNORM; + case PIPE_FORMAT_L16A16_UNORM: + return PIPE_FORMAT_R16A16_UNORM; + case PIPE_FORMAT_L16A16_SNORM: + return PIPE_FORMAT_R16A16_SNORM; + case PIPE_FORMAT_L16A16_FLOAT: + return PIPE_FORMAT_R16A16_FLOAT; + case PIPE_FORMAT_L32A32_FLOAT: + return PIPE_FORMAT_R32A32_FLOAT; + case PIPE_FORMAT_L8A8_UINT: + return PIPE_FORMAT_R8A8_UINT; + case PIPE_FORMAT_L8A8_SINT: + return PIPE_FORMAT_R8A8_SINT; + case PIPE_FORMAT_L16A16_UINT: + return PIPE_FORMAT_R16A16_UINT; + case PIPE_FORMAT_L16A16_SINT: + return PIPE_FORMAT_R16A16_SINT; + case PIPE_FORMAT_L32A32_UINT: + return PIPE_FORMAT_R32A32_UINT; + case PIPE_FORMAT_L32A32_SINT: + return PIPE_FORMAT_R32A32_SINT; + + /* We don't have compressed red-alpha variants for these. */ + case PIPE_FORMAT_LATC2_UNORM: + case PIPE_FORMAT_LATC2_SNORM: + return PIPE_FORMAT_NONE; + + default: + assert(!util_format_is_luminance(format) && + !util_format_is_luminance_alpha(format)); + return format; + } +} + +static inline unsigned +util_format_get_num_planes(enum pipe_format format) +{ + switch (util_format_description(format)->layout) { + case UTIL_FORMAT_LAYOUT_PLANAR3: + return 3; + case UTIL_FORMAT_LAYOUT_PLANAR2: + return 2; + default: + return 1; + } +} + +static inline enum pipe_format +util_format_get_plane_format(enum pipe_format format, unsigned plane) +{ + switch (format) { + case PIPE_FORMAT_YV12: + case PIPE_FORMAT_YV16: + case PIPE_FORMAT_IYUV: + return PIPE_FORMAT_R8_UNORM; + case PIPE_FORMAT_NV12: + return !plane ? PIPE_FORMAT_R8_UNORM : PIPE_FORMAT_RG88_UNORM; + case PIPE_FORMAT_NV21: + return !plane ? PIPE_FORMAT_R8_UNORM : PIPE_FORMAT_GR88_UNORM; + case PIPE_FORMAT_P010: + case PIPE_FORMAT_P016: + return !plane ? PIPE_FORMAT_R16_UNORM : PIPE_FORMAT_R16G16_UNORM; + default: + return format; + } +} + +static inline unsigned +util_format_get_plane_width(enum pipe_format format, unsigned plane, + unsigned width) +{ + switch (format) { + case PIPE_FORMAT_YV12: + case PIPE_FORMAT_YV16: + case PIPE_FORMAT_IYUV: + case PIPE_FORMAT_NV12: + case PIPE_FORMAT_NV21: + case PIPE_FORMAT_P010: + case PIPE_FORMAT_P016: + return !plane ? width : (width + 1) / 2; + default: + return width; + } +} + +static inline unsigned +util_format_get_plane_height(enum pipe_format format, unsigned plane, + unsigned height) +{ + switch (format) { + case PIPE_FORMAT_YV12: + case PIPE_FORMAT_IYUV: + case PIPE_FORMAT_NV12: + case PIPE_FORMAT_NV21: + case PIPE_FORMAT_P010: + case PIPE_FORMAT_P016: + return !plane ? height : (height + 1) / 2; + case PIPE_FORMAT_YV16: + default: + return height; + } +} + +bool util_format_planar_is_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned storage_sample_count, + unsigned bind); + +/** + * Return the number of components stored. + * Formats with block size != 1x1 will always have 1 component (the block). + */ +static inline unsigned +util_format_get_nr_components(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + return desc->nr_channels; +} + +/** + * Return the index of the first non-void channel + * -1 if no non-void channels + */ +static inline int +util_format_get_first_non_void_channel(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + int i; + + for (i = 0; i < 4; i++) + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) + break; + + if (i == 4) + return -1; + + return i; +} + +/** + * Whether this format is any 8-bit UNORM variant. Looser than + * util_is_rgba8_variant (also includes alpha textures, for instance). + */ + +static inline bool +util_format_is_unorm8(const struct util_format_description *desc) +{ + int c = util_format_get_first_non_void_channel(desc->format); + + if (c == -1) + return false; + + return desc->is_unorm && desc->is_array && desc->channel[c].size == 8; +} + +static inline void +util_format_unpack_z_float(enum pipe_format format, float *dst, + const void *src, unsigned w) +{ + const struct util_format_description *desc = util_format_description(format); + + desc->unpack_z_float(dst, 0, (const uint8_t *)src, 0, w, 1); +} + +static inline void +util_format_unpack_z_32unorm(enum pipe_format format, uint32_t *dst, + const void *src, unsigned w) +{ + const struct util_format_description *desc = util_format_description(format); + + desc->unpack_z_32unorm(dst, 0, (const uint8_t *)src, 0, w, 1); +} + +static inline void +util_format_unpack_s_8uint(enum pipe_format format, uint8_t *dst, + const void *src, unsigned w) +{ + const struct util_format_description *desc = util_format_description(format); + + desc->unpack_s_8uint(dst, 0, (const uint8_t *)src, 0, w, 1); +} + +static inline void +util_format_unpack_rgba_float(enum pipe_format format, float *dst, + const void *src, unsigned w) +{ + const struct util_format_description *desc = util_format_description(format); + + desc->unpack_rgba_float(dst, 0, (const uint8_t *)src, 0, w, 1); +} + +/** + * Unpacks a row of color data to 32-bit RGBA, either integers for pure + * integer formats (sign-extended for signed data), or 32-bit floats. + */ +static inline void +util_format_unpack_rgba(enum pipe_format format, void *dst, + const void *src, unsigned w) +{ + const struct util_format_description *desc = util_format_description(format); + + if (util_format_is_pure_uint(format)) + desc->unpack_rgba_uint((uint32_t *)dst, 0, (const uint8_t *)src, 0, w, 1); + else if (util_format_is_pure_sint(format)) + desc->unpack_rgba_sint((int32_t *)dst, 0, (const uint8_t *)src, 0, w, 1); + else + desc->unpack_rgba_float((float *)dst, 0, (const uint8_t *)src, 0, w, 1); +} + +static inline void +util_format_pack_z_float(enum pipe_format format, void *dst, + const float *src, unsigned w) +{ + const struct util_format_description *desc = util_format_description(format); + + desc->pack_z_float((uint8_t *)dst, 0, src, 0, w, 1); +} + +static inline void +util_format_pack_z_32unorm(enum pipe_format format, void *dst, + const uint32_t *src, unsigned w) +{ + const struct util_format_description *desc = util_format_description(format); + + desc->pack_z_32unorm((uint8_t *)dst, 0, src, 0, w, 1); +} + +static inline void +util_format_pack_s_8uint(enum pipe_format format, void *dst, + const uint8_t *src, unsigned w) +{ + const struct util_format_description *desc = util_format_description(format); + + desc->pack_s_8uint((uint8_t *)dst, 0, src, 0, w, 1); +} + +/** + * Packs a row of color data from 32-bit RGBA, either integers for pure + * integer formats, or 32-bit floats. Values are clamped to the packed + * representation's range. + */ +static inline void +util_format_pack_rgba(enum pipe_format format, void *dst, + const void *src, unsigned w) +{ + const struct util_format_description *desc = util_format_description(format); + + if (util_format_is_pure_uint(format)) + desc->pack_rgba_uint((uint8_t *)dst, 0, (const uint32_t *)src, 0, w, 1); + else if (util_format_is_pure_sint(format)) + desc->pack_rgba_sint((uint8_t *)dst, 0, (const int32_t *)src, 0, w, 1); + else + desc->pack_rgba_float((uint8_t *)dst, 0, (const float *)src, 0, w, 1); +} + +/* + * Format access functions. + */ + +void +util_format_read_4f(enum pipe_format format, + float *dst, unsigned dst_stride, + const void *src, unsigned src_stride, + unsigned x, unsigned y, unsigned w, unsigned h); + +void +util_format_write_4f(enum pipe_format format, + const float *src, unsigned src_stride, + void *dst, unsigned dst_stride, + unsigned x, unsigned y, unsigned w, unsigned h); + +void +util_format_read_4ub(enum pipe_format format, + uint8_t *dst, unsigned dst_stride, + const void *src, unsigned src_stride, + unsigned x, unsigned y, unsigned w, unsigned h); + +void +util_format_write_4ub(enum pipe_format format, + const uint8_t *src, unsigned src_stride, + void *dst, unsigned dst_stride, + unsigned x, unsigned y, unsigned w, unsigned h); + +void +util_format_read_4ui(enum pipe_format format, + unsigned *dst, unsigned dst_stride, + const void *src, unsigned src_stride, + unsigned x, unsigned y, unsigned w, unsigned h); + +void +util_format_write_4ui(enum pipe_format format, + const unsigned int *src, unsigned src_stride, + void *dst, unsigned dst_stride, + unsigned x, unsigned y, unsigned w, unsigned h); + +void +util_format_read_4i(enum pipe_format format, + int *dst, unsigned dst_stride, + const void *src, unsigned src_stride, + unsigned x, unsigned y, unsigned w, unsigned h); + +void +util_format_write_4i(enum pipe_format format, + const int *src, unsigned src_stride, + void *dst, unsigned dst_stride, + unsigned x, unsigned y, unsigned w, unsigned h); + +/* + * Generic format conversion; + */ + +boolean +util_format_fits_8unorm(const struct util_format_description *format_desc); + +boolean +util_format_translate(enum pipe_format dst_format, + void *dst, unsigned dst_stride, + unsigned dst_x, unsigned dst_y, + enum pipe_format src_format, + const void *src, unsigned src_stride, + unsigned src_x, unsigned src_y, + unsigned width, unsigned height); + +boolean +util_format_translate_3d(enum pipe_format dst_format, + void *dst, unsigned dst_stride, + unsigned dst_slice_stride, + unsigned dst_x, unsigned dst_y, + unsigned dst_z, + enum pipe_format src_format, + const void *src, unsigned src_stride, + unsigned src_slice_stride, + unsigned src_x, unsigned src_y, + unsigned src_z, unsigned width, + unsigned height, unsigned depth); + +/* + * Swizzle operations. + */ + +/* Compose two sets of swizzles. + * If V is a 4D vector and the function parameters represent functions that + * swizzle vector components, this holds: + * swz2(swz1(V)) = dst(V) + */ +void util_format_compose_swizzles(const unsigned char swz1[4], + const unsigned char swz2[4], + unsigned char dst[4]); + +/* Apply the swizzle provided in \param swz (which is one of PIPE_SWIZZLE_x) + * to \param src and store the result in \param dst. + * \param is_integer determines the value written for PIPE_SWIZZLE_1. + */ +void util_format_apply_color_swizzle(union pipe_color_union *dst, + const union pipe_color_union *src, + const unsigned char swz[4], + const boolean is_integer); + +void pipe_swizzle_4f(float *dst, const float *src, + const unsigned char swz[4]); + +void util_format_unswizzle_4f(float *dst, const float *src, + const unsigned char swz[4]); + +enum pipe_format +util_format_snorm8_to_sint8(enum pipe_format format); + + +extern void +util_copy_rect(ubyte * dst, enum pipe_format format, + unsigned dst_stride, unsigned dst_x, unsigned dst_y, + unsigned width, unsigned height, const ubyte * src, + int src_stride, unsigned src_x, unsigned src_y); + +#ifdef __cplusplus +} // extern "C" { +#endif + +#endif /* ! U_FORMAT_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/futex.h b/third_party/rust/glslopt/glsl-optimizer/src/util/futex.h new file mode 100644 index 0000000000..4d712e2ef2 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/futex.h @@ -0,0 +1,113 @@ +/* + * Copyright © 2015 Intel + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef UTIL_FUTEX_H +#define UTIL_FUTEX_H + +#if defined(HAVE_LINUX_FUTEX_H) +#define UTIL_FUTEX_SUPPORTED 1 + +#include <limits.h> +#include <stdint.h> +#include <unistd.h> +#include <linux/futex.h> +#include <sys/syscall.h> +#include <sys/time.h> + +static inline long sys_futex(void *addr1, int op, int val1, const struct timespec *timeout, void *addr2, int val3) +{ + return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); +} + +static inline int futex_wake(uint32_t *addr, int count) +{ + return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0); +} + +static inline int futex_wait(uint32_t *addr, int32_t value, const struct timespec *timeout) +{ + /* FUTEX_WAIT_BITSET with FUTEX_BITSET_MATCH_ANY is equivalent to + * FUTEX_WAIT, except that it treats the timeout as absolute. */ + return sys_futex(addr, FUTEX_WAIT_BITSET, value, timeout, NULL, + FUTEX_BITSET_MATCH_ANY); +} + +#elif defined(__FreeBSD__) +#define UTIL_FUTEX_SUPPORTED 1 + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/umtx.h> +#include <sys/time.h> + +static inline int futex_wake(uint32_t *addr, int count) +{ + assert(count == (int)(uint32_t)count); /* Check that bits weren't discarded */ + return _umtx_op(addr, UMTX_OP_WAKE, (uint32_t)count, NULL, NULL) == -1 ? errno : 0; +} + +static inline int futex_wait(uint32_t *addr, int32_t value, struct timespec *timeout) +{ + void *uaddr = NULL, *uaddr2 = NULL; + struct _umtx_time tmo = { + ._flags = UMTX_ABSTIME, + ._clockid = CLOCK_MONOTONIC + }; + + assert(value == (int)(uint32_t)value); /* Check that bits weren't discarded */ + + if (timeout != NULL) { + tmo._timeout = *timeout; + uaddr = (void *)(uintptr_t)sizeof(tmo); + uaddr2 = (void *)&tmo; + } + + return _umtx_op(addr, UMTX_OP_WAIT_UINT, (uint32_t)value, uaddr, uaddr2) == -1 ? errno : 0; +} + +#elif defined(__OpenBSD__) +#define UTIL_FUTEX_SUPPORTED 1 + +#include <sys/time.h> +#include <sys/futex.h> + +static inline int futex_wake(uint32_t *addr, int count) +{ + return futex(addr, FUTEX_WAKE, count, NULL, NULL); +} + +static inline int futex_wait(uint32_t *addr, int32_t value, const struct timespec *timeout) +{ + struct timespec tsrel, tsnow; + clock_gettime(CLOCK_MONOTONIC, &tsnow); + timespecsub(timeout, &tsrel, &tsrel); + return futex(addr, FUTEX_WAIT, value, &tsrel, NULL); +} + +#else +#define UTIL_FUTEX_SUPPORTED 0 +#endif + +#endif /* UTIL_FUTEX_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/half_float.c b/third_party/rust/glslopt/glsl-optimizer/src/util/half_float.c new file mode 100644 index 0000000000..aae690a56a --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/half_float.c @@ -0,0 +1,213 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * Copyright 2015 Philip Taylor <philip@zaynar.co.uk> + * Copyright 2018 Advanced Micro Devices, Inc. + * Copyright (C) 2018-2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <math.h> +#include <assert.h> +#include "half_float.h" +#include "util/u_half.h" +#include "rounding.h" +#include "softfloat.h" +#include "macros.h" + +typedef union { float f; int32_t i; uint32_t u; } fi_type; + +/** + * Convert a 4-byte float to a 2-byte half float. + * + * Not all float32 values can be represented exactly as a float16 value. We + * round such intermediate float32 values to the nearest float16. When the + * float32 lies exactly between to float16 values, we round to the one with + * an even mantissa. + * + * This rounding behavior has several benefits: + * - It has no sign bias. + * + * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's + * GPU ISA. + * + * - By reproducing the behavior of the GPU (at least on Intel hardware), + * compile-time evaluation of constant packHalf2x16 GLSL expressions will + * result in the same value as if the expression were executed on the GPU. + */ +uint16_t +_mesa_float_to_half(float val) +{ + const fi_type fi = {val}; + const int flt_m = fi.i & 0x7fffff; + const int flt_e = (fi.i >> 23) & 0xff; + const int flt_s = (fi.i >> 31) & 0x1; + int s, e, m = 0; + uint16_t result; + + /* sign bit */ + s = flt_s; + + /* handle special cases */ + if ((flt_e == 0) && (flt_m == 0)) { + /* zero */ + /* m = 0; - already set */ + e = 0; + } + else if ((flt_e == 0) && (flt_m != 0)) { + /* denorm -- denorm float maps to 0 half */ + /* m = 0; - already set */ + e = 0; + } + else if ((flt_e == 0xff) && (flt_m == 0)) { + /* infinity */ + /* m = 0; - already set */ + e = 31; + } + else if ((flt_e == 0xff) && (flt_m != 0)) { + /* NaN */ + m = 1; + e = 31; + } + else { + /* regular number */ + const int new_exp = flt_e - 127; + if (new_exp < -14) { + /* The float32 lies in the range (0.0, min_normal16) and is rounded + * to a nearby float16 value. The result will be either zero, subnormal, + * or normal. + */ + e = 0; + m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f)); + } + else if (new_exp > 15) { + /* map this value to infinity */ + /* m = 0; - already set */ + e = 31; + } + else { + /* The float32 lies in the range + * [min_normal16, max_normal16 + max_step16) + * and is rounded to a nearby float16 value. The result will be + * either normal or infinite. + */ + e = new_exp + 15; + m = _mesa_lroundevenf(flt_m / (float) (1 << 13)); + } + } + + assert(0 <= m && m <= 1024); + if (m == 1024) { + /* The float32 was rounded upwards into the range of the next exponent, + * so bump the exponent. This correctly handles the case where f32 + * should be rounded up to float16 infinity. + */ + ++e; + m = 0; + } + + result = (s << 15) | (e << 10) | m; + return result; +} + +uint16_t +_mesa_float_to_float16_rtz(float val) +{ + return _mesa_float_to_half_rtz(val); +} + +/** + * Convert a 2-byte half float to a 4-byte float. + * Based on code from: + * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html + */ +float +_mesa_half_to_float(uint16_t val) +{ + return util_half_to_float(val); +} + +/** + * Convert 0.0 to 0x00, 1.0 to 0xff. + * Values outside the range [0.0, 1.0] will give undefined results. + */ +uint8_t _mesa_half_to_unorm8(uint16_t val) +{ + const int m = val & 0x3ff; + const int e = (val >> 10) & 0x1f; + ASSERTED const int s = (val >> 15) & 0x1; + + /* v = round_to_nearest(1.mmmmmmmmmm * 2^(e-15) * 255) + * = round_to_nearest((1.mmmmmmmmmm * 255) * 2^(e-15)) + * = round_to_nearest((1mmmmmmmmmm * 255) * 2^(e-25)) + * = round_to_zero((1mmmmmmmmmm * 255) * 2^(e-25) + 0.5) + * = round_to_zero(((1mmmmmmmmmm * 255) * 2^(e-24) + 1) / 2) + * + * This happens to give the correct answer for zero/subnormals too + */ + assert(s == 0 && val <= FP16_ONE); /* check 0 <= this <= 1 */ + /* (implies e <= 15, which means the bit-shifts below are safe) */ + + uint32_t v = ((1 << 10) | m) * 255; + v = ((v >> (24 - e)) + 1) >> 1; + return v; +} + +/** + * Takes a uint16_t, divides by 65536, converts the infinite-precision + * result to fp16 with round-to-zero. Used by the ASTC decoder. + */ +uint16_t _mesa_uint16_div_64k_to_half(uint16_t v) +{ + /* Zero or subnormal. Set the mantissa to (v << 8) and return. */ + if (v < 4) + return v << 8; + + /* Count the leading 0s in the uint16_t */ +#ifdef HAVE___BUILTIN_CLZ + int n = __builtin_clz(v) - 16; +#else + int n = 16; + for (int i = 15; i >= 0; i--) { + if (v & (1 << i)) { + n = 15 - i; + break; + } + } +#endif + + /* Shift the mantissa up so bit 16 is the hidden 1 bit, + * mask it off, then shift back down to 10 bits + */ + int m = ( ((uint32_t)v << (n + 1)) & 0xffff ) >> 6; + + /* (0{n} 1 X{15-n}) * 2^-16 + * = 1.X * 2^(15-n-16) + * = 1.X * 2^(14-n - 15) + * which is the FP16 form with e = 14 - n + */ + int e = 14 - n; + + assert(e >= 1 && e <= 30); + assert(m >= 0 && m < 0x400); + + return (e << 10) | m; +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/half_float.h b/third_party/rust/glslopt/glsl-optimizer/src/util/half_float.h new file mode 100644 index 0000000000..c9fad9a940 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/half_float.h @@ -0,0 +1,85 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * Copyright (C) 2018-2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _HALF_FLOAT_H_ +#define _HALF_FLOAT_H_ + +#include <stdbool.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define FP16_ONE ((uint16_t) 0x3c00) +#define FP16_ZERO ((uint16_t) 0) + +uint16_t _mesa_float_to_half(float val); +float _mesa_half_to_float(uint16_t val); +uint8_t _mesa_half_to_unorm8(uint16_t v); +uint16_t _mesa_uint16_div_64k_to_half(uint16_t v); + +/* + * _mesa_float_to_float16_rtz is no more than a wrapper to the counterpart + * softfloat.h call. Still, softfloat.h conversion API is meant to be kept + * private. In other words, only use the API published here, instead of + * calling directly the softfloat.h one. + */ +uint16_t _mesa_float_to_float16_rtz(float val); + +static inline uint16_t +_mesa_float_to_float16_rtne(float val) +{ + return _mesa_float_to_half(val); +} + +static inline bool +_mesa_half_is_negative(uint16_t h) +{ + return !!(h & 0x8000); +} + + +#ifdef __cplusplus + +/* Helper class for disambiguating fp16 from uint16_t in C++ overloads */ + +struct float16_t { + uint16_t bits; + float16_t(float f) : bits(_mesa_float_to_half(f)) {} + float16_t(double d) : bits(_mesa_float_to_half(d)) {} + float16_t(uint16_t bits) : bits(bits) {} + static float16_t one() { return float16_t(FP16_ONE); } + static float16_t zero() { return float16_t(FP16_ZERO); } +}; + +#endif + + +#ifdef __cplusplus +} /* extern C */ +#endif + +#endif /* _HALF_FLOAT_H_ */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/hash_table.c b/third_party/rust/glslopt/glsl-optimizer/src/util/hash_table.c new file mode 100644 index 0000000000..0b0077cc7f --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/hash_table.c @@ -0,0 +1,859 @@ +/* + * Copyright © 2009,2012 Intel Corporation + * Copyright © 1988-2004 Keith Packard and Bart Massey. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Except as contained in this notice, the names of the authors + * or their institutions shall not be used in advertising or + * otherwise to promote the sale, use or other dealings in this + * Software without prior written authorization from the + * authors. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * Keith Packard <keithp@keithp.com> + */ + +/** + * Implements an open-addressing, linear-reprobing hash table. + * + * For more information, see: + * + * http://cgit.freedesktop.org/~anholt/hash_table/tree/README + */ + +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include "hash_table.h" +#include "ralloc.h" +#include "macros.h" +#include "u_memory.h" +#include "fast_urem_by_const.h" +#include "util/u_memory.h" + +#define XXH_INLINE_ALL +#include "xxhash.h" + +/** + * Magic number that gets stored outside of the struct hash_table. + * + * The hash table needs a particular pointer to be the marker for a key that + * was deleted from the table, along with NULL for the "never allocated in the + * table" marker. Legacy GL allows any GLuint to be used as a GL object name, + * and we use a 1:1 mapping from GLuints to key pointers, so we need to be + * able to track a GLuint that happens to match the deleted key outside of + * struct hash_table. We tell the hash table to use "1" as the deleted key + * value, so that we test the deleted-key-in-the-table path as best we can. + */ +#define DELETED_KEY_VALUE 1 + +static inline void * +uint_key(unsigned id) +{ + return (void *)(uintptr_t) id; +} + +static const uint32_t deleted_key_value; + +/** + * From Knuth -- a good choice for hash/rehash values is p, p-2 where + * p and p-2 are both prime. These tables are sized to have an extra 10% + * free to avoid exponential performance degradation as the hash table fills + */ +static const struct { + uint32_t max_entries, size, rehash; + uint64_t size_magic, rehash_magic; +} hash_sizes[] = { +#define ENTRY(max_entries, size, rehash) \ + { max_entries, size, rehash, \ + REMAINDER_MAGIC(size), REMAINDER_MAGIC(rehash) } + + ENTRY(2, 5, 3 ), + ENTRY(4, 7, 5 ), + ENTRY(8, 13, 11 ), + ENTRY(16, 19, 17 ), + ENTRY(32, 43, 41 ), + ENTRY(64, 73, 71 ), + ENTRY(128, 151, 149 ), + ENTRY(256, 283, 281 ), + ENTRY(512, 571, 569 ), + ENTRY(1024, 1153, 1151 ), + ENTRY(2048, 2269, 2267 ), + ENTRY(4096, 4519, 4517 ), + ENTRY(8192, 9013, 9011 ), + ENTRY(16384, 18043, 18041 ), + ENTRY(32768, 36109, 36107 ), + ENTRY(65536, 72091, 72089 ), + ENTRY(131072, 144409, 144407 ), + ENTRY(262144, 288361, 288359 ), + ENTRY(524288, 576883, 576881 ), + ENTRY(1048576, 1153459, 1153457 ), + ENTRY(2097152, 2307163, 2307161 ), + ENTRY(4194304, 4613893, 4613891 ), + ENTRY(8388608, 9227641, 9227639 ), + ENTRY(16777216, 18455029, 18455027 ), + ENTRY(33554432, 36911011, 36911009 ), + ENTRY(67108864, 73819861, 73819859 ), + ENTRY(134217728, 147639589, 147639587 ), + ENTRY(268435456, 295279081, 295279079 ), + ENTRY(536870912, 590559793, 590559791 ), + ENTRY(1073741824, 1181116273, 1181116271 ), + ENTRY(2147483648ul, 2362232233ul, 2362232231ul ) +}; + +ASSERTED static inline bool +key_pointer_is_reserved(const struct hash_table *ht, const void *key) +{ + return key == NULL || key == ht->deleted_key; +} + +static int +entry_is_free(const struct hash_entry *entry) +{ + return entry->key == NULL; +} + +static int +entry_is_deleted(const struct hash_table *ht, struct hash_entry *entry) +{ + return entry->key == ht->deleted_key; +} + +static int +entry_is_present(const struct hash_table *ht, struct hash_entry *entry) +{ + return entry->key != NULL && entry->key != ht->deleted_key; +} + +bool +_mesa_hash_table_init(struct hash_table *ht, + void *mem_ctx, + uint32_t (*key_hash_function)(const void *key), + bool (*key_equals_function)(const void *a, + const void *b)) +{ + ht->size_index = 0; + ht->size = hash_sizes[ht->size_index].size; + ht->rehash = hash_sizes[ht->size_index].rehash; + ht->size_magic = hash_sizes[ht->size_index].size_magic; + ht->rehash_magic = hash_sizes[ht->size_index].rehash_magic; + ht->max_entries = hash_sizes[ht->size_index].max_entries; + ht->key_hash_function = key_hash_function; + ht->key_equals_function = key_equals_function; + ht->table = rzalloc_array(mem_ctx, struct hash_entry, ht->size); + ht->entries = 0; + ht->deleted_entries = 0; + ht->deleted_key = &deleted_key_value; + + return ht->table != NULL; +} + +struct hash_table * +_mesa_hash_table_create(void *mem_ctx, + uint32_t (*key_hash_function)(const void *key), + bool (*key_equals_function)(const void *a, + const void *b)) +{ + struct hash_table *ht; + + /* mem_ctx is used to allocate the hash table, but the hash table is used + * to allocate all of the suballocations. + */ + ht = ralloc(mem_ctx, struct hash_table); + if (ht == NULL) + return NULL; + + if (!_mesa_hash_table_init(ht, ht, key_hash_function, key_equals_function)) { + ralloc_free(ht); + return NULL; + } + + return ht; +} + +struct hash_table * +_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx) +{ + struct hash_table *ht; + + ht = ralloc(dst_mem_ctx, struct hash_table); + if (ht == NULL) + return NULL; + + memcpy(ht, src, sizeof(struct hash_table)); + + ht->table = ralloc_array(ht, struct hash_entry, ht->size); + if (ht->table == NULL) { + ralloc_free(ht); + return NULL; + } + + memcpy(ht->table, src->table, ht->size * sizeof(struct hash_entry)); + + return ht; +} + +/** + * Frees the given hash table. + * + * If delete_function is passed, it gets called on each entry present before + * freeing. + */ +void +_mesa_hash_table_destroy(struct hash_table *ht, + void (*delete_function)(struct hash_entry *entry)) +{ + if (!ht) + return; + + if (delete_function) { + hash_table_foreach(ht, entry) { + delete_function(entry); + } + } + ralloc_free(ht); +} + +/** + * Deletes all entries of the given hash table without deleting the table + * itself or changing its structure. + * + * If delete_function is passed, it gets called on each entry present. + */ +void +_mesa_hash_table_clear(struct hash_table *ht, + void (*delete_function)(struct hash_entry *entry)) +{ + struct hash_entry *entry; + + for (entry = ht->table; entry != ht->table + ht->size; entry++) { + if (entry->key == NULL) + continue; + + if (delete_function != NULL && entry->key != ht->deleted_key) + delete_function(entry); + + entry->key = NULL; + } + + ht->entries = 0; + ht->deleted_entries = 0; +} + +/** Sets the value of the key pointer used for deleted entries in the table. + * + * The assumption is that usually keys are actual pointers, so we use a + * default value of a pointer to an arbitrary piece of storage in the library. + * But in some cases a consumer wants to store some other sort of value in the + * table, like a uint32_t, in which case that pointer may conflict with one of + * their valid keys. This lets that user select a safe value. + * + * This must be called before any keys are actually deleted from the table. + */ +void +_mesa_hash_table_set_deleted_key(struct hash_table *ht, const void *deleted_key) +{ + ht->deleted_key = deleted_key; +} + +static struct hash_entry * +hash_table_search(struct hash_table *ht, uint32_t hash, const void *key) +{ + assert(!key_pointer_is_reserved(ht, key)); + + uint32_t size = ht->size; + uint32_t start_hash_address = util_fast_urem32(hash, size, ht->size_magic); + uint32_t double_hash = 1 + util_fast_urem32(hash, ht->rehash, + ht->rehash_magic); + uint32_t hash_address = start_hash_address; + + do { + struct hash_entry *entry = ht->table + hash_address; + + if (entry_is_free(entry)) { + return NULL; + } else if (entry_is_present(ht, entry) && entry->hash == hash) { + if (ht->key_equals_function(key, entry->key)) { + return entry; + } + } + + hash_address += double_hash; + if (hash_address >= size) + hash_address -= size; + } while (hash_address != start_hash_address); + + return NULL; +} + +/** + * Finds a hash table entry with the given key and hash of that key. + * + * Returns NULL if no entry is found. Note that the data pointer may be + * modified by the user. + */ +struct hash_entry * +_mesa_hash_table_search(struct hash_table *ht, const void *key) +{ + assert(ht->key_hash_function); + return hash_table_search(ht, ht->key_hash_function(key), key); +} + +struct hash_entry * +_mesa_hash_table_search_pre_hashed(struct hash_table *ht, uint32_t hash, + const void *key) +{ + assert(ht->key_hash_function == NULL || hash == ht->key_hash_function(key)); + return hash_table_search(ht, hash, key); +} + +static struct hash_entry * +hash_table_insert(struct hash_table *ht, uint32_t hash, + const void *key, void *data); + +static void +hash_table_insert_rehash(struct hash_table *ht, uint32_t hash, + const void *key, void *data) +{ + uint32_t size = ht->size; + uint32_t start_hash_address = util_fast_urem32(hash, size, ht->size_magic); + uint32_t double_hash = 1 + util_fast_urem32(hash, ht->rehash, + ht->rehash_magic); + uint32_t hash_address = start_hash_address; + do { + struct hash_entry *entry = ht->table + hash_address; + + if (likely(entry->key == NULL)) { + entry->hash = hash; + entry->key = key; + entry->data = data; + return; + } + + hash_address += double_hash; + if (hash_address >= size) + hash_address -= size; + } while (true); +} + +static void +_mesa_hash_table_rehash(struct hash_table *ht, unsigned new_size_index) +{ + struct hash_table old_ht; + struct hash_entry *table; + + if (new_size_index >= ARRAY_SIZE(hash_sizes)) + return; + + table = rzalloc_array(ralloc_parent(ht->table), struct hash_entry, + hash_sizes[new_size_index].size); + if (table == NULL) + return; + + old_ht = *ht; + + ht->table = table; + ht->size_index = new_size_index; + ht->size = hash_sizes[ht->size_index].size; + ht->rehash = hash_sizes[ht->size_index].rehash; + ht->size_magic = hash_sizes[ht->size_index].size_magic; + ht->rehash_magic = hash_sizes[ht->size_index].rehash_magic; + ht->max_entries = hash_sizes[ht->size_index].max_entries; + ht->entries = 0; + ht->deleted_entries = 0; + + hash_table_foreach(&old_ht, entry) { + hash_table_insert_rehash(ht, entry->hash, entry->key, entry->data); + } + + ht->entries = old_ht.entries; + + ralloc_free(old_ht.table); +} + +static struct hash_entry * +hash_table_insert(struct hash_table *ht, uint32_t hash, + const void *key, void *data) +{ + struct hash_entry *available_entry = NULL; + + assert(!key_pointer_is_reserved(ht, key)); + + if (ht->entries >= ht->max_entries) { + _mesa_hash_table_rehash(ht, ht->size_index + 1); + } else if (ht->deleted_entries + ht->entries >= ht->max_entries) { + _mesa_hash_table_rehash(ht, ht->size_index); + } + + uint32_t size = ht->size; + uint32_t start_hash_address = util_fast_urem32(hash, size, ht->size_magic); + uint32_t double_hash = 1 + util_fast_urem32(hash, ht->rehash, + ht->rehash_magic); + uint32_t hash_address = start_hash_address; + do { + struct hash_entry *entry = ht->table + hash_address; + + if (!entry_is_present(ht, entry)) { + /* Stash the first available entry we find */ + if (available_entry == NULL) + available_entry = entry; + if (entry_is_free(entry)) + break; + } + + /* Implement replacement when another insert happens + * with a matching key. This is a relatively common + * feature of hash tables, with the alternative + * generally being "insert the new value as well, and + * return it first when the key is searched for". + * + * Note that the hash table doesn't have a delete + * callback. If freeing of old data pointers is + * required to avoid memory leaks, perform a search + * before inserting. + */ + if (!entry_is_deleted(ht, entry) && + entry->hash == hash && + ht->key_equals_function(key, entry->key)) { + entry->key = key; + entry->data = data; + return entry; + } + + hash_address += double_hash; + if (hash_address >= size) + hash_address -= size; + } while (hash_address != start_hash_address); + + if (available_entry) { + if (entry_is_deleted(ht, available_entry)) + ht->deleted_entries--; + available_entry->hash = hash; + available_entry->key = key; + available_entry->data = data; + ht->entries++; + return available_entry; + } + + /* We could hit here if a required resize failed. An unchecked-malloc + * application could ignore this result. + */ + return NULL; +} + +/** + * Inserts the key with the given hash into the table. + * + * Note that insertion may rearrange the table on a resize or rehash, + * so previously found hash_entries are no longer valid after this function. + */ +struct hash_entry * +_mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data) +{ + assert(ht->key_hash_function); + return hash_table_insert(ht, ht->key_hash_function(key), key, data); +} + +struct hash_entry * +_mesa_hash_table_insert_pre_hashed(struct hash_table *ht, uint32_t hash, + const void *key, void *data) +{ + assert(ht->key_hash_function == NULL || hash == ht->key_hash_function(key)); + return hash_table_insert(ht, hash, key, data); +} + +/** + * This function deletes the given hash table entry. + * + * Note that deletion doesn't otherwise modify the table, so an iteration over + * the table deleting entries is safe. + */ +void +_mesa_hash_table_remove(struct hash_table *ht, + struct hash_entry *entry) +{ + if (!entry) + return; + + entry->key = ht->deleted_key; + ht->entries--; + ht->deleted_entries++; +} + +/** + * Removes the entry with the corresponding key, if exists. + */ +void _mesa_hash_table_remove_key(struct hash_table *ht, + const void *key) +{ + _mesa_hash_table_remove(ht, _mesa_hash_table_search(ht, key)); +} + +/** + * This function is an iterator over the hash table. + * + * Pass in NULL for the first entry, as in the start of a for loop. Note that + * an iteration over the table is O(table_size) not O(entries). + */ +struct hash_entry * +_mesa_hash_table_next_entry(struct hash_table *ht, + struct hash_entry *entry) +{ + if (entry == NULL) + entry = ht->table; + else + entry = entry + 1; + + for (; entry != ht->table + ht->size; entry++) { + if (entry_is_present(ht, entry)) { + return entry; + } + } + + return NULL; +} + +/** + * Returns a random entry from the hash table. + * + * This may be useful in implementing random replacement (as opposed + * to just removing everything) in caches based on this hash table + * implementation. @predicate may be used to filter entries, or may + * be set to NULL for no filtering. + */ +struct hash_entry * +_mesa_hash_table_random_entry(struct hash_table *ht, + bool (*predicate)(struct hash_entry *entry)) +{ + struct hash_entry *entry; + uint32_t i = rand() % ht->size; + + if (ht->entries == 0) + return NULL; + + for (entry = ht->table + i; entry != ht->table + ht->size; entry++) { + if (entry_is_present(ht, entry) && + (!predicate || predicate(entry))) { + return entry; + } + } + + for (entry = ht->table; entry != ht->table + i; entry++) { + if (entry_is_present(ht, entry) && + (!predicate || predicate(entry))) { + return entry; + } + } + + return NULL; +} + + +uint32_t +_mesa_hash_data(const void *data, size_t size) +{ + return XXH32(data, size, 0); +} + +uint32_t +_mesa_hash_int(const void *key) +{ + return XXH32(key, sizeof(int), 0); +} + +uint32_t +_mesa_hash_uint(const void *key) +{ + return XXH32(key, sizeof(unsigned), 0); +} + +uint32_t +_mesa_hash_u32(const void *key) +{ + return XXH32(key, 4, 0); +} + +/** FNV-1a string hash implementation */ +uint32_t +_mesa_hash_string(const void *_key) +{ + uint32_t hash = _mesa_fnv32_1a_offset_bias; + const char *key = _key; + + while (*key != 0) { + hash = _mesa_fnv32_1a_accumulate(hash, *key); + key++; + } + + return hash; +} + +uint32_t +_mesa_hash_pointer(const void *pointer) +{ + uintptr_t num = (uintptr_t) pointer; + return (uint32_t) ((num >> 2) ^ (num >> 6) ^ (num >> 10) ^ (num >> 14)); +} + +bool +_mesa_key_int_equal(const void *a, const void *b) +{ + return *((const int *)a) == *((const int *)b); +} + +bool +_mesa_key_uint_equal(const void *a, const void *b) +{ + + return *((const unsigned *)a) == *((const unsigned *)b); +} + +bool +_mesa_key_u32_equal(const void *a, const void *b) +{ + return *((const uint32_t *)a) == *((const uint32_t *)b); +} + +/** + * String compare function for use as the comparison callback in + * _mesa_hash_table_create(). + */ +bool +_mesa_key_string_equal(const void *a, const void *b) +{ + return strcmp(a, b) == 0; +} + +bool +_mesa_key_pointer_equal(const void *a, const void *b) +{ + return a == b; +} + +/** + * Helper to create a hash table with pointer keys. + */ +struct hash_table * +_mesa_pointer_hash_table_create(void *mem_ctx) +{ + return _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); +} + +/** + * Hash table wrapper which supports 64-bit keys. + * + * TODO: unify all hash table implementations. + */ + +struct hash_key_u64 { + uint64_t value; +}; + +static uint32_t +key_u64_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct hash_key_u64)); +} + +static bool +key_u64_equals(const void *a, const void *b) +{ + const struct hash_key_u64 *aa = a; + const struct hash_key_u64 *bb = b; + + return aa->value == bb->value; +} + +#define FREED_KEY_VALUE 0 + +struct hash_table_u64 * +_mesa_hash_table_u64_create(void *mem_ctx) +{ + STATIC_ASSERT(FREED_KEY_VALUE != DELETED_KEY_VALUE); + struct hash_table_u64 *ht; + + ht = CALLOC_STRUCT(hash_table_u64); + if (!ht) + return NULL; + + if (sizeof(void *) == 8) { + ht->table = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + } else { + ht->table = _mesa_hash_table_create(mem_ctx, key_u64_hash, + key_u64_equals); + } + + if (ht->table) + _mesa_hash_table_set_deleted_key(ht->table, uint_key(DELETED_KEY_VALUE)); + + return ht; +} + +void +_mesa_hash_table_u64_clear(struct hash_table_u64 *ht, + void (*delete_function)(struct hash_entry *entry)) +{ + if (!ht) + return; + + if (ht->deleted_key_data) { + if (delete_function) { + struct hash_table *table = ht->table; + struct hash_entry entry; + + /* Create a fake entry for the delete function. */ + if (sizeof(void *) == 8) { + entry.hash = table->key_hash_function(table->deleted_key); + } else { + struct hash_key_u64 _key = { .value = (uintptr_t)table->deleted_key }; + entry.hash = table->key_hash_function(&_key); + } + entry.key = table->deleted_key; + entry.data = ht->deleted_key_data; + + delete_function(&entry); + } + ht->deleted_key_data = NULL; + } + + if (ht->freed_key_data) { + if (delete_function) { + struct hash_table *table = ht->table; + struct hash_entry entry; + + /* Create a fake entry for the delete function. */ + if (sizeof(void *) == 8) { + entry.hash = table->key_hash_function(uint_key(FREED_KEY_VALUE)); + } else { + struct hash_key_u64 _key = { .value = (uintptr_t)FREED_KEY_VALUE }; + entry.hash = table->key_hash_function(&_key); + } + entry.key = uint_key(FREED_KEY_VALUE); + entry.data = ht->freed_key_data; + + delete_function(&entry); + } + ht->freed_key_data = NULL; + } + + _mesa_hash_table_clear(ht->table, delete_function); +} + +void +_mesa_hash_table_u64_destroy(struct hash_table_u64 *ht, + void (*delete_function)(struct hash_entry *entry)) +{ + if (!ht) + return; + + _mesa_hash_table_u64_clear(ht, delete_function); + _mesa_hash_table_destroy(ht->table, delete_function); + free(ht); +} + +void +_mesa_hash_table_u64_insert(struct hash_table_u64 *ht, uint64_t key, + void *data) +{ + if (key == FREED_KEY_VALUE) { + ht->freed_key_data = data; + return; + } + + if (key == DELETED_KEY_VALUE) { + ht->deleted_key_data = data; + return; + } + + if (sizeof(void *) == 8) { + _mesa_hash_table_insert(ht->table, (void *)(uintptr_t)key, data); + } else { + struct hash_key_u64 *_key = CALLOC_STRUCT(hash_key_u64); + + if (!_key) + return; + _key->value = key; + + _mesa_hash_table_insert(ht->table, _key, data); + } +} + +static struct hash_entry * +hash_table_u64_search(struct hash_table_u64 *ht, uint64_t key) +{ + if (sizeof(void *) == 8) { + return _mesa_hash_table_search(ht->table, (void *)(uintptr_t)key); + } else { + struct hash_key_u64 _key = { .value = key }; + return _mesa_hash_table_search(ht->table, &_key); + } +} + +void * +_mesa_hash_table_u64_search(struct hash_table_u64 *ht, uint64_t key) +{ + struct hash_entry *entry; + + if (key == FREED_KEY_VALUE) + return ht->freed_key_data; + + if (key == DELETED_KEY_VALUE) + return ht->deleted_key_data; + + entry = hash_table_u64_search(ht, key); + if (!entry) + return NULL; + + return entry->data; +} + +void +_mesa_hash_table_u64_remove(struct hash_table_u64 *ht, uint64_t key) +{ + struct hash_entry *entry; + + if (key == FREED_KEY_VALUE) { + ht->freed_key_data = NULL; + return; + } + + if (key == DELETED_KEY_VALUE) { + ht->deleted_key_data = NULL; + return; + } + + entry = hash_table_u64_search(ht, key); + if (!entry) + return; + + if (sizeof(void *) == 8) { + _mesa_hash_table_remove(ht->table, entry); + } else { + struct hash_key *_key = (struct hash_key *)entry->key; + + _mesa_hash_table_remove(ht->table, entry); + free(_key); + } +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/hash_table.h b/third_party/rust/glslopt/glsl-optimizer/src/util/hash_table.h new file mode 100644 index 0000000000..b1eb9d4e21 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/hash_table.h @@ -0,0 +1,183 @@ +/* + * Copyright © 2009,2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#ifndef _HASH_TABLE_H +#define _HASH_TABLE_H + +#include <stdlib.h> +#include <inttypes.h> +#include <stdbool.h> +#include "c99_compat.h" +#include "fnv1a.h" +#include "macros.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct hash_entry { + uint32_t hash; + const void *key; + void *data; +}; + +struct hash_table { + struct hash_entry *table; + uint32_t (*key_hash_function)(const void *key); + bool (*key_equals_function)(const void *a, const void *b); + const void *deleted_key; + uint32_t size; + uint32_t rehash; + uint64_t size_magic; + uint64_t rehash_magic; + uint32_t max_entries; + uint32_t size_index; + uint32_t entries; + uint32_t deleted_entries; +}; + +struct hash_table * +_mesa_hash_table_create(void *mem_ctx, + uint32_t (*key_hash_function)(const void *key), + bool (*key_equals_function)(const void *a, + const void *b)); + +bool +_mesa_hash_table_init(struct hash_table *ht, + void *mem_ctx, + uint32_t (*key_hash_function)(const void *key), + bool (*key_equals_function)(const void *a, + const void *b)); + +struct hash_table * +_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx); +void _mesa_hash_table_destroy(struct hash_table *ht, + void (*delete_function)(struct hash_entry *entry)); +void _mesa_hash_table_clear(struct hash_table *ht, + void (*delete_function)(struct hash_entry *entry)); +void _mesa_hash_table_set_deleted_key(struct hash_table *ht, + const void *deleted_key); + +static inline uint32_t _mesa_hash_table_num_entries(struct hash_table *ht) +{ + return ht->entries; +} + +struct hash_entry * +_mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data); +struct hash_entry * +_mesa_hash_table_insert_pre_hashed(struct hash_table *ht, uint32_t hash, + const void *key, void *data); +struct hash_entry * +_mesa_hash_table_search(struct hash_table *ht, const void *key); +struct hash_entry * +_mesa_hash_table_search_pre_hashed(struct hash_table *ht, uint32_t hash, + const void *key); +void _mesa_hash_table_remove(struct hash_table *ht, + struct hash_entry *entry); +void _mesa_hash_table_remove_key(struct hash_table *ht, + const void *key); + +struct hash_entry *_mesa_hash_table_next_entry(struct hash_table *ht, + struct hash_entry *entry); +struct hash_entry * +_mesa_hash_table_random_entry(struct hash_table *ht, + bool (*predicate)(struct hash_entry *entry)); + +uint32_t _mesa_hash_data(const void *data, size_t size); + +uint32_t _mesa_hash_int(const void *key); +uint32_t _mesa_hash_uint(const void *key); +uint32_t _mesa_hash_u32(const void *key); +uint32_t _mesa_hash_string(const void *key); +uint32_t _mesa_hash_pointer(const void *pointer); + +bool _mesa_key_int_equal(const void *a, const void *b); +bool _mesa_key_uint_equal(const void *a, const void *b); +bool _mesa_key_u32_equal(const void *a, const void *b); +bool _mesa_key_string_equal(const void *a, const void *b); +bool _mesa_key_pointer_equal(const void *a, const void *b); + +struct hash_table * +_mesa_pointer_hash_table_create(void *mem_ctx); + +/** + * This foreach function is safe against deletion (which just replaces + * an entry's data with the deleted marker), but not against insertion + * (which may rehash the table, making entry a dangling pointer). + */ +#define hash_table_foreach(ht, entry) \ + for (struct hash_entry *entry = _mesa_hash_table_next_entry(ht, NULL); \ + entry != NULL; \ + entry = _mesa_hash_table_next_entry(ht, entry)) + +static inline void +hash_table_call_foreach(struct hash_table *ht, + void (*callback)(const void *key, + void *data, + void *closure), + void *closure) +{ + hash_table_foreach(ht, entry) + callback(entry->key, entry->data, closure); +} + +/** + * Hash table wrapper which supports 64-bit keys. + */ +struct hash_table_u64 { + struct hash_table *table; + void *freed_key_data; + void *deleted_key_data; +}; + +struct hash_table_u64 * +_mesa_hash_table_u64_create(void *mem_ctx); + +void +_mesa_hash_table_u64_destroy(struct hash_table_u64 *ht, + void (*delete_function)(struct hash_entry *entry)); + +void +_mesa_hash_table_u64_insert(struct hash_table_u64 *ht, uint64_t key, + void *data); + +void * +_mesa_hash_table_u64_search(struct hash_table_u64 *ht, uint64_t key); + +void +_mesa_hash_table_u64_remove(struct hash_table_u64 *ht, uint64_t key); + +void +_mesa_hash_table_u64_clear(struct hash_table_u64 *ht, + void (*delete_function)(struct hash_entry *entry)); + +#ifdef __cplusplus +} /* extern C */ +#endif + +#endif /* _HASH_TABLE_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/list.h b/third_party/rust/glslopt/glsl-optimizer/src/util/list.h new file mode 100644 index 0000000000..8a18c4b0d8 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/list.h @@ -0,0 +1,249 @@ +/************************************************************************** + * + * Copyright 2006 VMware, Inc., Bismarck, ND. USA. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +/** + * \file + * List macros heavily inspired by the Linux kernel + * list handling. No list looping yet. + * + * Is not threadsafe, so common operations need to + * be protected using an external mutex. + */ + +#ifndef _UTIL_LIST_H_ +#define _UTIL_LIST_H_ + + +#include <stdbool.h> +#include <stddef.h> +#include <assert.h> +#include "c99_compat.h" + +#ifdef DEBUG +# define list_assert(cond, msg) assert(cond && msg) +#else +# define list_assert(cond, msg) (void)(0 && (cond)) +#endif + +struct list_head +{ + struct list_head *prev; + struct list_head *next; +}; + +static inline void list_inithead(struct list_head *item) +{ + item->prev = item; + item->next = item; +} + +static inline void list_add(struct list_head *item, struct list_head *list) +{ + item->prev = list; + item->next = list->next; + list->next->prev = item; + list->next = item; +} + +static inline void list_addtail(struct list_head *item, struct list_head *list) +{ + item->next = list; + item->prev = list->prev; + list->prev->next = item; + list->prev = item; +} + +static inline bool list_is_empty(const struct list_head *list); + +static inline void list_replace(struct list_head *from, struct list_head *to) +{ + if (list_is_empty(from)) { + list_inithead(to); + } else { + to->prev = from->prev; + to->next = from->next; + from->next->prev = to; + from->prev->next = to; + } +} + +static inline void list_del(struct list_head *item) +{ + item->prev->next = item->next; + item->next->prev = item->prev; + item->prev = item->next = NULL; +} + +static inline void list_delinit(struct list_head *item) +{ + item->prev->next = item->next; + item->next->prev = item->prev; + item->next = item; + item->prev = item; +} + +static inline bool list_is_empty(const struct list_head *list) +{ + return list->next == list; +} + +/** + * Returns whether the list has exactly one element. + */ +static inline bool list_is_singular(const struct list_head *list) +{ + return list->next != NULL && list->next != list && list->next->next == list; +} + +static inline unsigned list_length(const struct list_head *list) +{ + struct list_head *node; + unsigned length = 0; + for (node = list->next; node != list; node = node->next) + length++; + return length; +} + +static inline void list_splice(struct list_head *src, struct list_head *dst) +{ + if (list_is_empty(src)) + return; + + src->next->prev = dst; + src->prev->next = dst->next; + dst->next->prev = src->prev; + dst->next = src->next; +} + +static inline void list_splicetail(struct list_head *src, struct list_head *dst) +{ + if (list_is_empty(src)) + return; + + src->prev->next = dst; + src->next->prev = dst->prev; + dst->prev->next = src->next; + dst->prev = src->prev; +} + +static inline void list_validate(const struct list_head *list) +{ + struct list_head *node; + assert(list->next->prev == list && list->prev->next == list); + for (node = list->next; node != list; node = node->next) + assert(node->next->prev == node && node->prev->next == node); +} + +#define LIST_ENTRY(__type, __item, __field) \ + ((__type *)(((char *)(__item)) - offsetof(__type, __field))) + +/** + * Cast from a pointer to a member of a struct back to the containing struct. + * + * 'sample' MUST be initialized, or else the result is undefined! + */ +#ifndef container_of +#define container_of(ptr, sample, member) \ + (void *)((char *)(ptr) \ + - ((char *)&(sample)->member - (char *)(sample))) +#endif + +#define list_first_entry(ptr, type, member) \ + LIST_ENTRY(type, (ptr)->next, member) + +#define list_last_entry(ptr, type, member) \ + LIST_ENTRY(type, (ptr)->prev, member) + + +#define LIST_FOR_EACH_ENTRY(pos, head, member) \ + for (pos = NULL, pos = container_of((head)->next, pos, member); \ + &pos->member != (head); \ + pos = container_of(pos->member.next, pos, member)) + +#define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member) \ + for (pos = NULL, pos = container_of((head)->next, pos, member), \ + storage = container_of(pos->member.next, pos, member); \ + &pos->member != (head); \ + pos = storage, storage = container_of(storage->member.next, storage, member)) + +#define LIST_FOR_EACH_ENTRY_SAFE_REV(pos, storage, head, member) \ + for (pos = NULL, pos = container_of((head)->prev, pos, member), \ + storage = container_of(pos->member.prev, pos, member); \ + &pos->member != (head); \ + pos = storage, storage = container_of(storage->member.prev, storage, member)) + +#define LIST_FOR_EACH_ENTRY_FROM(pos, start, head, member) \ + for (pos = NULL, pos = container_of((start), pos, member); \ + &pos->member != (head); \ + pos = container_of(pos->member.next, pos, member)) + +#define LIST_FOR_EACH_ENTRY_FROM_REV(pos, start, head, member) \ + for (pos = NULL, pos = container_of((start), pos, member); \ + &pos->member != (head); \ + pos = container_of(pos->member.prev, pos, member)) + +#define list_for_each_entry(type, pos, head, member) \ + for (type *pos = LIST_ENTRY(type, (head)->next, member), \ + *__next = LIST_ENTRY(type, pos->member.next, member); \ + &pos->member != (head); \ + pos = LIST_ENTRY(type, pos->member.next, member), \ + list_assert(pos == __next, "use _safe iterator"), \ + __next = LIST_ENTRY(type, __next->member.next, member)) + +#define list_for_each_entry_safe(type, pos, head, member) \ + for (type *pos = LIST_ENTRY(type, (head)->next, member), \ + *__next = LIST_ENTRY(type, pos->member.next, member); \ + &pos->member != (head); \ + pos = __next, \ + __next = LIST_ENTRY(type, __next->member.next, member)) + +#define list_for_each_entry_rev(type, pos, head, member) \ + for (type *pos = LIST_ENTRY(type, (head)->prev, member), \ + *__prev = LIST_ENTRY(type, pos->member.prev, member); \ + &pos->member != (head); \ + pos = LIST_ENTRY(type, pos->member.prev, member), \ + list_assert(pos == __prev, "use _safe iterator"), \ + __prev = LIST_ENTRY(type, __prev->member.prev, member)) + +#define list_for_each_entry_safe_rev(type, pos, head, member) \ + for (type *pos = LIST_ENTRY(type, (head)->prev, member), \ + *__prev = LIST_ENTRY(type, pos->member.prev, member); \ + &pos->member != (head); \ + pos = __prev, \ + __prev = LIST_ENTRY(type, __prev->member.prev, member)) + +#define list_for_each_entry_from(type, pos, start, head, member) \ + for (type *pos = LIST_ENTRY(type, (start), member); \ + &pos->member != (head); \ + pos = LIST_ENTRY(type, pos->member.next, member)) + +#define list_for_each_entry_from_rev(type, pos, start, head, member) \ + for (type *pos = LIST_ENTRY(type, (start), member); \ + &pos->member != (head); \ + pos = LIST_ENTRY(type, pos->member.prev, member)) + +#endif /*_UTIL_LIST_H_*/ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/macros.h b/third_party/rust/glslopt/glsl-optimizer/src/util/macros.h new file mode 100644 index 0000000000..fcace4e351 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/macros.h @@ -0,0 +1,349 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef UTIL_MACROS_H +#define UTIL_MACROS_H + +#include <assert.h> + +#include "c99_compat.h" +#include "c11_compat.h" + +/* Compute the size of an array */ +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +/* For compatibility with Clang's __has_builtin() */ +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +/** + * __builtin_expect macros + */ +#if !defined(HAVE___BUILTIN_EXPECT) +# define __builtin_expect(x, y) (x) +#endif + +#ifndef likely +# ifdef HAVE___BUILTIN_EXPECT +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) +# else +# define likely(x) (x) +# define unlikely(x) (x) +# endif +#endif + + +/** + * Static (compile-time) assertion. + * Basically, use COND to dimension an array. If COND is false/zero the + * array size will be -1 and we'll get a compilation error. + */ +#define STATIC_ASSERT(COND) \ + do { \ + (void) sizeof(char [1 - 2*!(COND)]); \ + } while (0) + + +/** + * Unreachable macro. Useful for suppressing "control reaches end of non-void + * function" warnings. + */ +#if defined(HAVE___BUILTIN_UNREACHABLE) || __has_builtin(__builtin_unreachable) +#define unreachable(str) \ +do { \ + assert(!str); \ + __builtin_unreachable(); \ +} while (0) +#elif defined (_MSC_VER) +#define unreachable(str) \ +do { \ + assert(!str); \ + __assume(0); \ +} while (0) +#else +#define unreachable(str) assert(!str) +#endif + +/** + * Assume macro. Useful for expressing our assumptions to the compiler, + * typically for purposes of silencing warnings. + */ +#if __has_builtin(__builtin_assume) +#define assume(expr) \ +do { \ + assert(expr); \ + __builtin_assume(expr); \ +} while (0) +#elif defined HAVE___BUILTIN_UNREACHABLE +#define assume(expr) ((expr) ? ((void) 0) \ + : (assert(!"assumption failed"), \ + __builtin_unreachable())) +#elif defined (_MSC_VER) +#define assume(expr) __assume(expr) +#else +#define assume(expr) assert(expr) +#endif + +/* Attribute const is used for functions that have no effects other than their + * return value, and only rely on the argument values to compute the return + * value. As a result, calls to it can be CSEed. Note that using memory + * pointed to by the arguments is not allowed for const functions. + */ +#ifdef HAVE_FUNC_ATTRIBUTE_CONST +#define ATTRIBUTE_CONST __attribute__((__const__)) +#else +#define ATTRIBUTE_CONST +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_FLATTEN +#define FLATTEN __attribute__((__flatten__)) +#else +#define FLATTEN +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT +#define PRINTFLIKE(f, a) __attribute__ ((format(__printf__, f, a))) +#else +#define PRINTFLIKE(f, a) +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_MALLOC +#define MALLOCLIKE __attribute__((__malloc__)) +#else +#define MALLOCLIKE +#endif + +/* Forced function inlining */ +/* Note: Clang also sets __GNUC__ (see other cases below) */ +#ifndef ALWAYS_INLINE +# if defined(__GNUC__) +# define ALWAYS_INLINE inline __attribute__((always_inline)) +# elif defined(_MSC_VER) +# define ALWAYS_INLINE __forceinline +# else +# define ALWAYS_INLINE inline +# endif +#endif + +/* Used to optionally mark structures with misaligned elements or size as + * packed, to trade off performance for space. + */ +#ifdef HAVE_FUNC_ATTRIBUTE_PACKED +#define PACKED __attribute__((__packed__)) +#else +#define PACKED +#endif + +/* Attribute pure is used for functions that have no effects other than their + * return value. As a result, calls to it can be dead code eliminated. + */ +#ifdef HAVE_FUNC_ATTRIBUTE_PURE +#define ATTRIBUTE_PURE __attribute__((__pure__)) +#else +#define ATTRIBUTE_PURE +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL +#define ATTRIBUTE_RETURNS_NONNULL __attribute__((__returns_nonnull__)) +#else +#define ATTRIBUTE_RETURNS_NONNULL +#endif + +#ifndef NORETURN +# ifdef _MSC_VER +# define NORETURN __declspec(noreturn) +# elif defined HAVE_FUNC_ATTRIBUTE_NORETURN +# define NORETURN __attribute__((__noreturn__)) +# else +# define NORETURN +# endif +#endif + +#ifdef __cplusplus +/** + * Macro function that evaluates to true if T is a trivially + * destructible type -- that is, if its (non-virtual) destructor + * performs no action and all member variables and base classes are + * trivially destructible themselves. + */ +# if (defined(__clang__) && defined(__has_feature)) +# if __has_feature(has_trivial_destructor) +# define HAS_TRIVIAL_DESTRUCTOR(T) __has_trivial_destructor(T) +# endif +# elif defined(__GNUC__) +# if ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3))) +# define HAS_TRIVIAL_DESTRUCTOR(T) __has_trivial_destructor(T) +# endif +# elif defined(_MSC_VER) && !defined(__INTEL_COMPILER) +# define HAS_TRIVIAL_DESTRUCTOR(T) __has_trivial_destructor(T) +# endif +# ifndef HAS_TRIVIAL_DESTRUCTOR + /* It's always safe (if inefficient) to assume that a + * destructor is non-trivial. + */ +# define HAS_TRIVIAL_DESTRUCTOR(T) (false) +# endif +#endif + +/** + * PUBLIC/USED macros + * + * If we build the library with gcc's -fvisibility=hidden flag, we'll + * use the PUBLIC macro to mark functions that are to be exported. + * + * We also need to define a USED attribute, so the optimizer doesn't + * inline a static function that we later use in an alias. - ajax + */ +#ifndef PUBLIC +# if defined(__GNUC__) +# define PUBLIC __attribute__((visibility("default"))) +# define USED __attribute__((used)) +# elif defined(_MSC_VER) +# define PUBLIC __declspec(dllexport) +# define USED +# else +# define PUBLIC +# define USED +# endif +#endif + +/** + * UNUSED marks variables (or sometimes functions) that have to be defined, + * but are sometimes (or always) unused beyond that. A common case is for + * a function parameter to be used in some build configurations but not others. + * Another case is fallback vfuncs that don't do anything with their params. + * + * Note that this should not be used for identifiers used in `assert()`; + * see ASSERTED below. + */ +#ifdef HAVE_FUNC_ATTRIBUTE_UNUSED +#define UNUSED __attribute__((unused)) +#else +#define UNUSED +#endif + +/** + * Use ASSERTED to indicate that an identifier is unused outside of an `assert()`, + * so that assert-free builds don't get "unused variable" warnings. + */ +#ifdef NDEBUG +#define ASSERTED UNUSED +#else +#define ASSERTED +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_WARN_UNUSED_RESULT +#define MUST_CHECK __attribute__((warn_unused_result)) +#else +#define MUST_CHECK +#endif + +#if defined(__GNUC__) +#define ATTRIBUTE_NOINLINE __attribute__((noinline)) +#else +#define ATTRIBUTE_NOINLINE +#endif + + +/** + * Check that STRUCT::FIELD can hold MAXVAL. We use a lot of bitfields + * in Mesa/gallium. We have to be sure they're of sufficient size to + * hold the largest expected value. + * Note that with MSVC, enums are signed and enum bitfields need one extra + * high bit (always zero) to ensure the max value is handled correctly. + * This macro will detect that with MSVC, but not GCC. + */ +#define ASSERT_BITFIELD_SIZE(STRUCT, FIELD, MAXVAL) \ + do { \ + ASSERTED STRUCT s; \ + s.FIELD = (MAXVAL); \ + assert((int) s.FIELD == (MAXVAL) && "Insufficient bitfield size!"); \ + } while (0) + + +/** Compute ceiling of integer quotient of A divided by B. */ +#define DIV_ROUND_UP( A, B ) ( ((A) + (B) - 1) / (B) ) + +/** Clamp X to [MIN,MAX]. Turn NaN into MIN, arbitrarily. */ +#define CLAMP( X, MIN, MAX ) ( (X)>(MIN) ? ((X)>(MAX) ? (MAX) : (X)) : (MIN) ) + +/** Minimum of two values: */ +#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) + +/** Maximum of two values: */ +#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) + +/** Minimum and maximum of three values: */ +#define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C)) +#define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C)) + +/** Align a value to a power of two */ +#define ALIGN_POT(x, pot_align) (((x) + (pot_align) - 1) & ~((pot_align) - 1)) + +/** + * Macro for declaring an explicit conversion operator. Defaults to an + * implicit conversion if C++11 is not supported. + */ +#if __cplusplus >= 201103L +#define EXPLICIT_CONVERSION explicit +#elif defined(__cplusplus) +#define EXPLICIT_CONVERSION +#endif + +/** Set a single bit */ +#define BITFIELD_BIT(b) (1u << (b)) +/** Set all bits up to excluding bit b */ +#define BITFIELD_MASK(b) \ + ((b) == 32 ? (~0u) : BITFIELD_BIT((b) % 32) - 1) +/** Set count bits starting from bit b */ +#define BITFIELD_RANGE(b, count) \ + (BITFIELD_MASK((b) + (count)) & ~BITFIELD_MASK(b)) + +/** Set a single bit */ +#define BITFIELD64_BIT(b) (1ull << (b)) +/** Set all bits up to excluding bit b */ +#define BITFIELD64_MASK(b) \ + ((b) == 64 ? (~0ull) : BITFIELD64_BIT(b) - 1) +/** Set count bits starting from bit b */ +#define BITFIELD64_RANGE(b, count) \ + (BITFIELD64_MASK((b) + (count)) & ~BITFIELD64_MASK(b)) + +/* TODO: In future we should try to move this to u_debug.h once header + * dependencies are reorganised to allow this. + */ +enum pipe_debug_type +{ + PIPE_DEBUG_TYPE_OUT_OF_MEMORY = 1, + PIPE_DEBUG_TYPE_ERROR, + PIPE_DEBUG_TYPE_SHADER_INFO, + PIPE_DEBUG_TYPE_PERF_INFO, + PIPE_DEBUG_TYPE_INFO, + PIPE_DEBUG_TYPE_FALLBACK, + PIPE_DEBUG_TYPE_CONFORMANCE, +}; + +#endif /* UTIL_MACROS_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/mesa-sha1.c b/third_party/rust/glslopt/glsl-optimizer/src/util/mesa-sha1.c new file mode 100644 index 0000000000..fa9284627b --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/mesa-sha1.c @@ -0,0 +1,51 @@ +/* Copyright © 2007 Carl Worth + * Copyright © 2009 Jeremy Huddleston, Julien Cristau, and Matthieu Herrb + * Copyright © 2009-2010 Mikhail Gusarov + * Copyright © 2012 Yaakov Selkowitz and Keith Packard + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "sha1/sha1.h" +#include "mesa-sha1.h" + +void +_mesa_sha1_compute(const void *data, size_t size, unsigned char result[20]) +{ + struct mesa_sha1 ctx; + + _mesa_sha1_init(&ctx); + _mesa_sha1_update(&ctx, data, size); + _mesa_sha1_final(&ctx, result); +} + +void +_mesa_sha1_format(char *buf, const unsigned char *sha1) +{ + static const char hex_digits[] = "0123456789abcdef"; + int i; + + for (i = 0; i < 40; i += 2) { + buf[i] = hex_digits[sha1[i >> 1] >> 4]; + buf[i + 1] = hex_digits[sha1[i >> 1] & 0x0f]; + } + buf[i] = '\0'; +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/mesa-sha1.h b/third_party/rust/glslopt/glsl-optimizer/src/util/mesa-sha1.h new file mode 100644 index 0000000000..bde50ba1eb --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/mesa-sha1.h @@ -0,0 +1,64 @@ +/* Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef MESA_SHA1_H +#define MESA_SHA1_H + +#include <stdlib.h> +#include "c99_compat.h" +#include "sha1/sha1.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define mesa_sha1 _SHA1_CTX + +static inline void +_mesa_sha1_init(struct mesa_sha1 *ctx) +{ + SHA1Init(ctx); +} + +static inline void +_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, size_t size) +{ + SHA1Update(ctx, (const unsigned char *)data, size); +} + +static inline void +_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20]) +{ + SHA1Final(result, ctx); +} + +void +_mesa_sha1_format(char *buf, const unsigned char *sha1); + +void +_mesa_sha1_compute(const void *data, size_t size, unsigned char result[20]); + +#ifdef __cplusplus +} /* extern C */ +#endif + +#endif diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/mesa-sha1_test.c b/third_party/rust/glslopt/glsl-optimizer/src/util/mesa-sha1_test.c new file mode 100644 index 0000000000..9b3b477c7f --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/mesa-sha1_test.c @@ -0,0 +1,65 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdio.h> +#include <stdbool.h> +#include <string.h> + +#include "macros.h" +#include "mesa-sha1.h" + +#define SHA1_LENGTH 40 + +int main(int argc, char *argv[]) +{ + static const struct { + const char *string; + const char *sha1; + } test_data[] = { + {"Mesa Rocks! 273", "7fb99737373d65a73f049cdabc01e73aa6bc60f3"}, + {"Mesa Rocks! 300", "b2180263e37d3bed6a4be0afe41b1a82ebbcf4c3"}, + {"Mesa Rocks! 583", "7fb9734108a62503e8a149c1051facd7fb112d05"}, + }; + + bool failed = false; + int i; + + for (i = 0; i < ARRAY_SIZE(test_data); i++) { + unsigned char sha1[20]; + _mesa_sha1_compute(test_data[i].string, strlen(test_data[i].string), + sha1); + + char buf[41]; + _mesa_sha1_format(buf, sha1); + + if (memcmp(test_data[i].sha1, buf, SHA1_LENGTH) != 0) { + printf("For string \"%s\", length %zu:\n" + "\tExpected: %s\n\t Got: %s\n", + test_data[i].string, strlen(test_data[i].string), + test_data[i].sha1, buf); + failed = true; + } + } + + return failed; +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/os_memory.h b/third_party/rust/glslopt/glsl-optimizer/src/util/os_memory.h new file mode 100644 index 0000000000..b191cf2058 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/os_memory.h @@ -0,0 +1,74 @@ +/************************************************************************** + * + * Copyright 2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * OS memory management abstractions + */ + + +#ifndef _OS_MEMORY_H_ +#define _OS_MEMORY_H_ + +#if defined(EMBEDDED_DEVICE) + +#ifdef __cplusplus +extern "C" { +#endif + +void * +os_malloc(size_t size); + +void * +os_calloc(size_t count, size_t size); + +void +os_free(void *ptr); + +void * +os_realloc(void *ptr, size_t old_size, size_t new_size); + +void * +os_malloc_aligned(size_t size, size_t alignment); + +void +os_free_aligned(void *ptr); + +void * +os_realloc_aligned(void *ptr, size_t oldsize, size_t newsize, size_t alignemnt); + +#ifdef __cplusplus +} +#endif + +#else + +# include "os_memory_stdc.h" + +#endif + +#endif /* _OS_MEMORY_H_ */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/os_memory_aligned.h b/third_party/rust/glslopt/glsl-optimizer/src/util/os_memory_aligned.h new file mode 100644 index 0000000000..08f12062a7 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/os_memory_aligned.h @@ -0,0 +1,128 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * Memory alignment wrappers. + */ + + +#ifndef _OS_MEMORY_H_ +#error "Must not be included directly. Include os_memory.h instead" +#endif + + +/** + * Add two size_t values with integer overflow check. + * TODO: leverage __builtin_add_overflow where available + */ +static inline bool +add_overflow_size_t(size_t a, size_t b, size_t *res) +{ + *res = a + b; + return *res < a || *res < b; +} + + +#if defined(HAVE_POSIX_MEMALIGN) + +static inline void * +os_malloc_aligned(size_t size, size_t alignment) +{ + void *ptr; + alignment = (alignment + sizeof(void*) - 1) & ~(sizeof(void*) - 1); + if(posix_memalign(&ptr, alignment, size) != 0) + return NULL; + return ptr; +} + +#define os_free_aligned(_ptr) free(_ptr) + +#else + +/** + * Return memory on given byte alignment + */ +static inline void * +os_malloc_aligned(size_t size, size_t alignment) +{ + char *ptr, *buf; + size_t alloc_size; + + /* + * Calculate + * + * alloc_size = size + alignment + sizeof(void *) + * + * while checking for overflow. + */ + if (add_overflow_size_t(size, alignment, &alloc_size) || + add_overflow_size_t(alloc_size, sizeof(void *), &alloc_size)) { + return NULL; + } + + ptr = (char *) os_malloc(alloc_size); + if (!ptr) + return NULL; + + buf = (char *)(((uintptr_t)ptr + sizeof(void *) + alignment - 1) & ~((uintptr_t)(alignment - 1))); + *(char **)(buf - sizeof(void *)) = ptr; + + return buf; +} + + +/** + * Free memory returned by os_malloc_aligned(). + */ +static inline void +os_free_aligned(void *ptr) +{ + if (ptr) { + void **cubbyHole = (void **) ((char *) ptr - sizeof(void *)); + void *realAddr = *cubbyHole; + os_free(realAddr); + } +} + +#endif + +/** + * Reallocate memeory, with alignment + */ +static inline void * +os_realloc_aligned(void *ptr, size_t oldsize, size_t newsize, size_t alignment) +{ + const size_t copySize = MIN2(oldsize, newsize); + void *newBuf = os_malloc_aligned(newsize, alignment); + if (newBuf && ptr && copySize > 0) { + memcpy(newBuf, ptr, copySize); + } + + os_free_aligned(ptr); + return newBuf; +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/os_memory_stdc.h b/third_party/rust/glslopt/glsl-optimizer/src/util/os_memory_stdc.h new file mode 100644 index 0000000000..bda5715998 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/os_memory_stdc.h @@ -0,0 +1,60 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * OS memory management abstractions for the standard C library. + */ + + +#ifndef _OS_MEMORY_H_ +#error "Must not be included directly. Include os_memory.h instead" +#endif + +#include <stdlib.h> + + +#define os_malloc(_size) malloc(_size) +#define os_calloc(_count, _size ) calloc(_count, _size ) +#define os_free(_ptr) free(_ptr) + +#define os_realloc( _old_ptr, _old_size, _new_size) \ + realloc(_old_ptr, _new_size + 0*(_old_size)) + +#if DETECT_OS_WINDOWS + +#include <malloc.h> + +#define os_malloc_aligned(_size, _align) _aligned_malloc(_size, _align) +#define os_free_aligned(_ptr) _aligned_free(_ptr) +#define os_realloc_aligned(_ptr, _oldsize, _newsize, _alignment) _aligned_realloc(_ptr, _newsize, _alignment) + +#else + +#include "os_memory_aligned.h" + +#endif diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/os_misc.c b/third_party/rust/glslopt/glsl-optimizer/src/util/os_misc.c new file mode 100644 index 0000000000..e6894731b6 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/os_misc.c @@ -0,0 +1,184 @@ +/************************************************************************** + * + * Copyright 2008-2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "os_misc.h" + +#include <stdarg.h> + + +#if DETECT_OS_WINDOWS + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers +#endif +#include <windows.h> +#include <stdio.h> +#include <stdlib.h> + +#else + +#include <stdio.h> +#include <stdlib.h> + +#endif + + +#if DETECT_OS_ANDROID +# define LOG_TAG "MESA" +# include <unistd.h> +# include <log/log.h> +#elif DETECT_OS_LINUX || DETECT_OS_CYGWIN || DETECT_OS_SOLARIS || DETECT_OS_HURD +# include <unistd.h> +#elif DETECT_OS_APPLE || DETECT_OS_BSD +# include <sys/sysctl.h> +#elif DETECT_OS_HAIKU +# include <kernel/OS.h> +#elif DETECT_OS_WINDOWS +# include <windows.h> +#else +#error unexpected platform in os_sysinfo.c +#endif + + +void +os_log_message(const char *message) +{ + /* If the GALLIUM_LOG_FILE environment variable is set to a valid filename, + * write all messages to that file. + */ + static FILE *fout = NULL; + + if (!fout) { +#ifdef DEBUG + /* one-time init */ + const char *filename = os_get_option("GALLIUM_LOG_FILE"); + if (filename) { + const char *mode = "w"; + if (filename[0] == '+') { + /* If the filename is prefixed with '+' then open the file for + * appending instead of normal writing. + */ + mode = "a"; + filename++; /* skip the '+' */ + } + fout = fopen(filename, mode); + } +#endif + if (!fout) + fout = stderr; + } + +#if DETECT_OS_WINDOWS + OutputDebugStringA(message); + if(GetConsoleWindow() && !IsDebuggerPresent()) { + fflush(stdout); + fputs(message, fout); + fflush(fout); + } + else if (fout != stderr) { + fputs(message, fout); + fflush(fout); + } +#else /* !DETECT_OS_WINDOWS */ + fflush(stdout); + fputs(message, fout); + fflush(fout); +# if DETECT_OS_ANDROID + LOG_PRI(ANDROID_LOG_ERROR, LOG_TAG, "%s", message); +# endif +#endif +} + + +#if !defined(EMBEDDED_DEVICE) +const char * +os_get_option(const char *name) +{ + return getenv(name); +} +#endif /* !EMBEDDED_DEVICE */ + + +/** + * Return the size of the total physical memory. + * \param size returns the size of the total physical memory + * \return true for success, or false on failure + */ +bool +os_get_total_physical_memory(uint64_t *size) +{ +#if DETECT_OS_LINUX || DETECT_OS_CYGWIN || DETECT_OS_SOLARIS || DETECT_OS_HURD + const long phys_pages = sysconf(_SC_PHYS_PAGES); + const long page_size = sysconf(_SC_PAGE_SIZE); + + if (phys_pages <= 0 || page_size <= 0) + return false; + + *size = (uint64_t)phys_pages * (uint64_t)page_size; + return true; +#elif DETECT_OS_APPLE || DETECT_OS_BSD + size_t len = sizeof(*size); + int mib[2]; + + mib[0] = CTL_HW; +#if DETECT_OS_APPLE + mib[1] = HW_MEMSIZE; +#elif DETECT_OS_NETBSD || DETECT_OS_OPENBSD + mib[1] = HW_PHYSMEM64; +#elif DETECT_OS_FREEBSD + mib[1] = HW_REALMEM; +#elif DETECT_OS_DRAGONFLY + mib[1] = HW_PHYSMEM; +#else +#error Unsupported *BSD +#endif + + return (sysctl(mib, 2, size, &len, NULL, 0) == 0); +#elif DETECT_OS_HAIKU + system_info info; + status_t ret; + + ret = get_system_info(&info); + if (ret != B_OK || info.max_pages <= 0) + return false; + + *size = (uint64_t)info.max_pages * (uint64_t)B_PAGE_SIZE; + return true; +#elif DETECT_OS_WINDOWS + MEMORYSTATUSEX status; + BOOL ret; + + status.dwLength = sizeof(status); + ret = GlobalMemoryStatusEx(&status); + *size = status.ullTotalPhys; + return (ret == TRUE); +#else +#error unexpected platform in os_sysinfo.c + return false; +#endif +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/os_misc.h b/third_party/rust/glslopt/glsl-optimizer/src/util/os_misc.h new file mode 100644 index 0000000000..19c8962d5d --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/os_misc.h @@ -0,0 +1,104 @@ +/************************************************************************** + * + * Copyright 2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * Miscellaneous OS services. + */ + + +#ifndef _OS_MISC_H_ +#define _OS_MISC_H_ + +#include <stdint.h> +#include <stdbool.h> + +#include "detect_os.h" + + +#if DETECT_OS_UNIX +# include <signal.h> /* for kill() */ +# include <unistd.h> /* getpid() */ +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * Trap into the debugger. + */ +#if (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)) && defined(PIPE_CC_GCC) +# define os_break() __asm("int3") +#elif defined(PIPE_CC_MSVC) +# define os_break() __debugbreak() +#elif DETECT_OS_UNIX +# define os_break() kill(getpid(), SIGTRAP) +#else +# define os_break() abort() +#endif + + +/* + * Abort the program. + */ +#if defined(DEBUG) +# define os_abort() do { os_break(); abort(); } while(0) +#else +# define os_abort() abort() +#endif + + +/* + * Output a message. Message should preferably end in a newline. + */ +void +os_log_message(const char *message); + + +/* + * Get an option. Should return NULL if specified option is not set. + */ +const char * +os_get_option(const char *name); + + +/* + * Get the total amount of physical memory available on the system. + */ +bool +os_get_total_physical_memory(uint64_t *size); + + +#ifdef __cplusplus +} +#endif + + +#endif /* _OS_MISC_H_ */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/os_time.h b/third_party/rust/glslopt/glsl-optimizer/src/util/os_time.h new file mode 100644 index 0000000000..049ab118db --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/os_time.h @@ -0,0 +1,130 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent time-manipulation functions. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#ifndef _OS_TIME_H_ +#define _OS_TIME_H_ + +#include <stdbool.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* must be equal to PIPE_TIMEOUT_INFINITE */ +#define OS_TIMEOUT_INFINITE 0xffffffffffffffffull + +/* + * Get the current time in nanoseconds from an unknown base. + */ +int64_t +os_time_get_nano(void); + + +/* + * Get the current time in microseconds from an unknown base. + */ +static inline int64_t +os_time_get(void) +{ + return os_time_get_nano() / 1000; +} + + +/* + * Sleep. + */ +void +os_time_sleep(int64_t usecs); + + +/* + * Helper function for detecting time outs, taking in account overflow. + * + * Returns true if the current time has elapsed beyond the specified interval. + */ +static inline bool +os_time_timeout(int64_t start, + int64_t end, + int64_t curr) +{ + if (start <= end) + return !(start <= curr && curr < end); + else + return !((start <= curr) || (curr < end)); +} + + +/** + * Convert a relative timeout in nanoseconds into an absolute timeout, + * in other words, it returns current time + timeout. + * os_time_get_nano() must be monotonic. + * OS_TIMEOUT_INFINITE is passed through unchanged. If the calculation + * overflows, OS_TIMEOUT_INFINITE is returned. + */ +int64_t +os_time_get_absolute_timeout(uint64_t timeout); + + +/** + * Wait until the variable at the given memory location is zero. + * + * \param var variable + * \param timeout timeout in ns, can be anything from 0 (no wait) to + * OS_TIMEOUT_INFINITE (wait forever) + * \return true if the variable is zero + */ +bool +os_wait_until_zero(volatile int *var, uint64_t timeout); + + +/** + * Wait until the variable at the given memory location is zero. + * The timeout is the absolute time when the waiting should stop. If it is + * less than or equal to the current time, it only returns the status and + * doesn't wait. OS_TIMEOUT_INFINITE waits forever. This requires that + * os_time_get_nano is monotonic. + * + * \param var variable + * \param timeout the time in ns when the waiting should stop + * \return true if the variable is zero + */ +bool +os_wait_until_zero_abs_timeout(volatile int *var, int64_t timeout); + +#ifdef __cplusplus +} +#endif + +#endif /* _OS_TIME_H_ */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/ralloc.c b/third_party/rust/glslopt/glsl-optimizer/src/util/ralloc.c new file mode 100644 index 0000000000..7b7b018558 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/ralloc.c @@ -0,0 +1,921 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdlib.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> + +/* Some versions of MinGW are missing _vscprintf's declaration, although they + * still provide the symbol in the import library. */ +#ifdef __MINGW32__ +_CRTIMP int _vscprintf(const char *format, va_list argptr); +#endif + +#include "ralloc.h" + +#ifndef va_copy +#ifdef __va_copy +#define va_copy(dest, src) __va_copy((dest), (src)) +#else +#define va_copy(dest, src) (dest) = (src) +#endif +#endif + +#define CANARY 0x5A1106 + +/* Align the header's size so that ralloc() allocations will return with the + * same alignment as a libc malloc would have (8 on 32-bit GLIBC, 16 on + * 64-bit), avoiding performance penalities on x86 and alignment faults on + * ARM. + */ +struct +#ifdef _MSC_VER +#if _WIN64 +__declspec(align(16)) +#else + __declspec(align(8)) +#endif +#elif defined(__LP64__) + __attribute__((aligned(16))) +#else + __attribute__((aligned(8))) +#endif + ralloc_header +{ +#ifndef NDEBUG + /* A canary value used to determine whether a pointer is ralloc'd. */ + unsigned canary; +#endif + + struct ralloc_header *parent; + + /* The first child (head of a linked list) */ + struct ralloc_header *child; + + /* Linked list of siblings */ + struct ralloc_header *prev; + struct ralloc_header *next; + + void (*destructor)(void *); +}; + +typedef struct ralloc_header ralloc_header; + +static void unlink_block(ralloc_header *info); +static void unsafe_free(ralloc_header *info); + +static ralloc_header * +get_header(const void *ptr) +{ + ralloc_header *info = (ralloc_header *) (((char *) ptr) - + sizeof(ralloc_header)); + assert(info->canary == CANARY); + return info; +} + +#define PTR_FROM_HEADER(info) (((char *) info) + sizeof(ralloc_header)) + +static void +add_child(ralloc_header *parent, ralloc_header *info) +{ + if (parent != NULL) { + info->parent = parent; + info->next = parent->child; + parent->child = info; + + if (info->next != NULL) + info->next->prev = info; + } +} + +void * +ralloc_context(const void *ctx) +{ + return ralloc_size(ctx, 0); +} + +void * +ralloc_size(const void *ctx, size_t size) +{ + void *block = malloc(size + sizeof(ralloc_header)); + ralloc_header *info; + ralloc_header *parent; + + if (unlikely(block == NULL)) + return NULL; + + info = (ralloc_header *) block; + /* measurements have shown that calloc is slower (because of + * the multiplication overflow checking?), so clear things + * manually + */ + info->parent = NULL; + info->child = NULL; + info->prev = NULL; + info->next = NULL; + info->destructor = NULL; + + parent = ctx != NULL ? get_header(ctx) : NULL; + + add_child(parent, info); + +#ifndef NDEBUG + info->canary = CANARY; +#endif + + return PTR_FROM_HEADER(info); +} + +void * +rzalloc_size(const void *ctx, size_t size) +{ + void *ptr = ralloc_size(ctx, size); + + if (likely(ptr)) + memset(ptr, 0, size); + + return ptr; +} + +/* helper function - assumes ptr != NULL */ +static void * +resize(void *ptr, size_t size) +{ + ralloc_header *child, *old, *info; + + old = get_header(ptr); + info = realloc(old, size + sizeof(ralloc_header)); + + if (info == NULL) + return NULL; + + /* Update parent and sibling's links to the reallocated node. */ + if (info != old && info->parent != NULL) { + if (info->parent->child == old) + info->parent->child = info; + + if (info->prev != NULL) + info->prev->next = info; + + if (info->next != NULL) + info->next->prev = info; + } + + /* Update child->parent links for all children */ + for (child = info->child; child != NULL; child = child->next) + child->parent = info; + + return PTR_FROM_HEADER(info); +} + +void * +reralloc_size(const void *ctx, void *ptr, size_t size) +{ + if (unlikely(ptr == NULL)) + return ralloc_size(ctx, size); + + assert(ralloc_parent(ptr) == ctx); + return resize(ptr, size); +} + +void * +rerzalloc_size(const void *ctx, void *ptr, size_t old_size, size_t new_size) +{ + if (unlikely(ptr == NULL)) + return rzalloc_size(ctx, new_size); + + assert(ralloc_parent(ptr) == ctx); + ptr = resize(ptr, new_size); + + if (new_size > old_size) + memset((char *)ptr + old_size, 0, new_size - old_size); + + return ptr; +} + +void * +ralloc_array_size(const void *ctx, size_t size, unsigned count) +{ + if (count > SIZE_MAX/size) + return NULL; + + return ralloc_size(ctx, size * count); +} + +void * +rzalloc_array_size(const void *ctx, size_t size, unsigned count) +{ + if (count > SIZE_MAX/size) + return NULL; + + return rzalloc_size(ctx, size * count); +} + +void * +reralloc_array_size(const void *ctx, void *ptr, size_t size, unsigned count) +{ + if (count > SIZE_MAX/size) + return NULL; + + return reralloc_size(ctx, ptr, size * count); +} + +void * +rerzalloc_array_size(const void *ctx, void *ptr, size_t size, + unsigned old_count, unsigned new_count) +{ + if (new_count > SIZE_MAX/size) + return NULL; + + return rerzalloc_size(ctx, ptr, size * old_count, size * new_count); +} + +void +ralloc_free(void *ptr) +{ + ralloc_header *info; + + if (ptr == NULL) + return; + + info = get_header(ptr); + unlink_block(info); + unsafe_free(info); +} + +static void +unlink_block(ralloc_header *info) +{ + /* Unlink from parent & siblings */ + if (info->parent != NULL) { + if (info->parent->child == info) + info->parent->child = info->next; + + if (info->prev != NULL) + info->prev->next = info->next; + + if (info->next != NULL) + info->next->prev = info->prev; + } + info->parent = NULL; + info->prev = NULL; + info->next = NULL; +} + +static void +unsafe_free(ralloc_header *info) +{ + /* Recursively free any children...don't waste time unlinking them. */ + ralloc_header *temp; + while (info->child != NULL) { + temp = info->child; + info->child = temp->next; + unsafe_free(temp); + } + + /* Free the block itself. Call the destructor first, if any. */ + if (info->destructor != NULL) + info->destructor(PTR_FROM_HEADER(info)); + + free(info); +} + +void +ralloc_steal(const void *new_ctx, void *ptr) +{ + ralloc_header *info, *parent; + + if (unlikely(ptr == NULL)) + return; + + info = get_header(ptr); + parent = new_ctx ? get_header(new_ctx) : NULL; + + unlink_block(info); + + add_child(parent, info); +} + +void +ralloc_adopt(const void *new_ctx, void *old_ctx) +{ + ralloc_header *new_info, *old_info, *child; + + if (unlikely(old_ctx == NULL)) + return; + + old_info = get_header(old_ctx); + new_info = get_header(new_ctx); + + /* If there are no children, bail. */ + if (unlikely(old_info->child == NULL)) + return; + + /* Set all the children's parent to new_ctx; get a pointer to the last child. */ + for (child = old_info->child; child->next != NULL; child = child->next) { + child->parent = new_info; + } + child->parent = new_info; + + /* Connect the two lists together; parent them to new_ctx; make old_ctx empty. */ + child->next = new_info->child; + if (child->next) + child->next->prev = child; + new_info->child = old_info->child; + old_info->child = NULL; +} + +void * +ralloc_parent(const void *ptr) +{ + ralloc_header *info; + + if (unlikely(ptr == NULL)) + return NULL; + + info = get_header(ptr); + return info->parent ? PTR_FROM_HEADER(info->parent) : NULL; +} + +void +ralloc_set_destructor(const void *ptr, void(*destructor)(void *)) +{ + ralloc_header *info = get_header(ptr); + info->destructor = destructor; +} + +char * +ralloc_strdup(const void *ctx, const char *str) +{ + size_t n; + char *ptr; + + if (unlikely(str == NULL)) + return NULL; + + n = strlen(str); + ptr = ralloc_array(ctx, char, n + 1); + memcpy(ptr, str, n); + ptr[n] = '\0'; + return ptr; +} + +char * +ralloc_strndup(const void *ctx, const char *str, size_t max) +{ + size_t n; + char *ptr; + + if (unlikely(str == NULL)) + return NULL; + + n = strnlen(str, max); + ptr = ralloc_array(ctx, char, n + 1); + memcpy(ptr, str, n); + ptr[n] = '\0'; + return ptr; +} + +/* helper routine for strcat/strncat - n is the exact amount to copy */ +static bool +cat(char **dest, const char *str, size_t n) +{ + char *both; + size_t existing_length; + assert(dest != NULL && *dest != NULL); + + existing_length = strlen(*dest); + both = resize(*dest, existing_length + n + 1); + if (unlikely(both == NULL)) + return false; + + memcpy(both + existing_length, str, n); + both[existing_length + n] = '\0'; + + *dest = both; + return true; +} + + +bool +ralloc_strcat(char **dest, const char *str) +{ + return cat(dest, str, strlen(str)); +} + +bool +ralloc_strncat(char **dest, const char *str, size_t n) +{ + return cat(dest, str, strnlen(str, n)); +} + +bool +ralloc_str_append(char **dest, const char *str, + size_t existing_length, size_t str_size) +{ + char *both; + assert(dest != NULL && *dest != NULL); + + both = resize(*dest, existing_length + str_size + 1); + if (unlikely(both == NULL)) + return false; + + memcpy(both + existing_length, str, str_size); + both[existing_length + str_size] = '\0'; + + *dest = both; + + return true; +} + +char * +ralloc_asprintf(const void *ctx, const char *fmt, ...) +{ + char *ptr; + va_list args; + va_start(args, fmt); + ptr = ralloc_vasprintf(ctx, fmt, args); + va_end(args); + return ptr; +} + +size_t +printf_length(const char *fmt, va_list untouched_args) +{ + int size; + char junk; + + /* Make a copy of the va_list so the original caller can still use it */ + va_list args; + va_copy(args, untouched_args); + +#ifdef _WIN32 + /* We need to use _vcsprintf to calculate the size as vsnprintf returns -1 + * if the number of characters to write is greater than count. + */ + size = _vscprintf(fmt, args); + (void)junk; +#else + size = vsnprintf(&junk, 1, fmt, args); +#endif + assert(size >= 0); + + va_end(args); + + return size; +} + +char * +ralloc_vasprintf(const void *ctx, const char *fmt, va_list args) +{ + size_t size = printf_length(fmt, args) + 1; + + char *ptr = ralloc_size(ctx, size); + if (ptr != NULL) + vsnprintf(ptr, size, fmt, args); + + return ptr; +} + +bool +ralloc_asprintf_append(char **str, const char *fmt, ...) +{ + bool success; + va_list args; + va_start(args, fmt); + success = ralloc_vasprintf_append(str, fmt, args); + va_end(args); + return success; +} + +bool +ralloc_vasprintf_append(char **str, const char *fmt, va_list args) +{ + size_t existing_length; + assert(str != NULL); + existing_length = *str ? strlen(*str) : 0; + return ralloc_vasprintf_rewrite_tail(str, &existing_length, fmt, args); +} + +bool +ralloc_asprintf_rewrite_tail(char **str, size_t *start, const char *fmt, ...) +{ + bool success; + va_list args; + va_start(args, fmt); + success = ralloc_vasprintf_rewrite_tail(str, start, fmt, args); + va_end(args); + return success; +} + +bool +ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt, + va_list args) +{ + size_t new_length; + char *ptr; + + assert(str != NULL); + + if (unlikely(*str == NULL)) { + // Assuming a NULL context is probably bad, but it's expected behavior. + *str = ralloc_vasprintf(NULL, fmt, args); + *start = strlen(*str); + return true; + } + + new_length = printf_length(fmt, args); + + ptr = resize(*str, *start + new_length + 1); + if (unlikely(ptr == NULL)) + return false; + + vsnprintf(ptr + *start, new_length + 1, fmt, args); + *str = ptr; + *start += new_length; + return true; +} + +/*************************************************************************** + * Linear allocator for short-lived allocations. + *************************************************************************** + * + * The allocator consists of a parent node (2K buffer), which requires + * a ralloc parent, and child nodes (allocations). Child nodes can't be freed + * directly, because the parent doesn't track them. You have to release + * the parent node in order to release all its children. + * + * The allocator uses a fixed-sized buffer with a monotonically increasing + * offset after each allocation. If the buffer is all used, another buffer + * is allocated, sharing the same ralloc parent, so all buffers are at + * the same level in the ralloc hierarchy. + * + * The linear parent node is always the first buffer and keeps track of all + * other buffers. + */ + +#define MIN_LINEAR_BUFSIZE 2048 +#define SUBALLOC_ALIGNMENT 8 +#define LMAGIC 0x87b9c7d3 + +struct +#ifdef _MSC_VER + __declspec(align(8)) +#elif defined(__LP64__) + __attribute__((aligned(16))) +#else + __attribute__((aligned(8))) +#endif + linear_header { +#ifndef NDEBUG + unsigned magic; /* for debugging */ +#endif + unsigned offset; /* points to the first unused byte in the buffer */ + unsigned size; /* size of the buffer */ + void *ralloc_parent; /* new buffers will use this */ + struct linear_header *next; /* next buffer if we have more */ + struct linear_header *latest; /* the only buffer that has free space */ + + /* After this structure, the buffer begins. + * Each suballocation consists of linear_size_chunk as its header followed + * by the suballocation, so it goes: + * + * - linear_size_chunk + * - allocated space + * - linear_size_chunk + * - allocated space + * etc. + * + * linear_size_chunk is only needed by linear_realloc. + */ +}; + +struct linear_size_chunk { + unsigned size; /* for realloc */ + unsigned _padding; +}; + +typedef struct linear_header linear_header; +typedef struct linear_size_chunk linear_size_chunk; + +#define LINEAR_PARENT_TO_HEADER(parent) \ + (linear_header*) \ + ((char*)(parent) - sizeof(linear_size_chunk) - sizeof(linear_header)) + +/* Allocate the linear buffer with its header. */ +static linear_header * +create_linear_node(void *ralloc_ctx, unsigned min_size) +{ + linear_header *node; + + min_size += sizeof(linear_size_chunk); + + if (likely(min_size < MIN_LINEAR_BUFSIZE)) + min_size = MIN_LINEAR_BUFSIZE; + + node = ralloc_size(ralloc_ctx, sizeof(linear_header) + min_size); + if (unlikely(!node)) + return NULL; + +#ifndef NDEBUG + node->magic = LMAGIC; +#endif + node->offset = 0; + node->size = min_size; + node->ralloc_parent = ralloc_ctx; + node->next = NULL; + node->latest = node; + return node; +} + +void * +linear_alloc_child(void *parent, unsigned size) +{ + linear_header *first = LINEAR_PARENT_TO_HEADER(parent); + linear_header *latest = first->latest; + linear_header *new_node; + linear_size_chunk *ptr; + unsigned full_size; + + assert(first->magic == LMAGIC); + assert(!latest->next); + + size = ALIGN_POT(size, SUBALLOC_ALIGNMENT); + full_size = sizeof(linear_size_chunk) + size; + + if (unlikely(latest->offset + full_size > latest->size)) { + /* allocate a new node */ + new_node = create_linear_node(latest->ralloc_parent, size); + if (unlikely(!new_node)) + return NULL; + + first->latest = new_node; + latest->latest = new_node; + latest->next = new_node; + latest = new_node; + } + + ptr = (linear_size_chunk *)((char*)&latest[1] + latest->offset); + ptr->size = size; + latest->offset += full_size; + + assert((uintptr_t)&ptr[1] % SUBALLOC_ALIGNMENT == 0); + return &ptr[1]; +} + +void * +linear_alloc_parent(void *ralloc_ctx, unsigned size) +{ + linear_header *node; + + if (unlikely(!ralloc_ctx)) + return NULL; + + size = ALIGN_POT(size, SUBALLOC_ALIGNMENT); + + node = create_linear_node(ralloc_ctx, size); + if (unlikely(!node)) + return NULL; + + return linear_alloc_child((char*)node + + sizeof(linear_header) + + sizeof(linear_size_chunk), size); +} + +void * +linear_zalloc_child(void *parent, unsigned size) +{ + void *ptr = linear_alloc_child(parent, size); + + if (likely(ptr)) + memset(ptr, 0, size); + return ptr; +} + +void * +linear_zalloc_parent(void *parent, unsigned size) +{ + void *ptr = linear_alloc_parent(parent, size); + + if (likely(ptr)) + memset(ptr, 0, size); + return ptr; +} + +void +linear_free_parent(void *ptr) +{ + linear_header *node; + + if (unlikely(!ptr)) + return; + + node = LINEAR_PARENT_TO_HEADER(ptr); + assert(node->magic == LMAGIC); + + while (node) { + void *ptr = node; + + node = node->next; + ralloc_free(ptr); + } +} + +void +ralloc_steal_linear_parent(void *new_ralloc_ctx, void *ptr) +{ + linear_header *node; + + if (unlikely(!ptr)) + return; + + node = LINEAR_PARENT_TO_HEADER(ptr); + assert(node->magic == LMAGIC); + + while (node) { + ralloc_steal(new_ralloc_ctx, node); + node->ralloc_parent = new_ralloc_ctx; + node = node->next; + } +} + +void * +ralloc_parent_of_linear_parent(void *ptr) +{ + linear_header *node = LINEAR_PARENT_TO_HEADER(ptr); + assert(node->magic == LMAGIC); + return node->ralloc_parent; +} + +void * +linear_realloc(void *parent, void *old, unsigned new_size) +{ + unsigned old_size = 0; + ralloc_header *new_ptr; + + new_ptr = linear_alloc_child(parent, new_size); + + if (unlikely(!old)) + return new_ptr; + + old_size = ((linear_size_chunk*)old)[-1].size; + + if (likely(new_ptr && old_size)) + memcpy(new_ptr, old, MIN2(old_size, new_size)); + + return new_ptr; +} + +/* All code below is pretty much copied from ralloc and only the alloc + * calls are different. + */ + +char * +linear_strdup(void *parent, const char *str) +{ + unsigned n; + char *ptr; + + if (unlikely(!str)) + return NULL; + + n = strlen(str); + ptr = linear_alloc_child(parent, n + 1); + if (unlikely(!ptr)) + return NULL; + + memcpy(ptr, str, n); + ptr[n] = '\0'; + return ptr; +} + +char * +linear_asprintf(void *parent, const char *fmt, ...) +{ + char *ptr; + va_list args; + va_start(args, fmt); + ptr = linear_vasprintf(parent, fmt, args); + va_end(args); + return ptr; +} + +char * +linear_vasprintf(void *parent, const char *fmt, va_list args) +{ + unsigned size = printf_length(fmt, args) + 1; + + char *ptr = linear_alloc_child(parent, size); + if (ptr != NULL) + vsnprintf(ptr, size, fmt, args); + + return ptr; +} + +bool +linear_asprintf_append(void *parent, char **str, const char *fmt, ...) +{ + bool success; + va_list args; + va_start(args, fmt); + success = linear_vasprintf_append(parent, str, fmt, args); + va_end(args); + return success; +} + +bool +linear_vasprintf_append(void *parent, char **str, const char *fmt, va_list args) +{ + size_t existing_length; + assert(str != NULL); + existing_length = *str ? strlen(*str) : 0; + return linear_vasprintf_rewrite_tail(parent, str, &existing_length, fmt, args); +} + +bool +linear_asprintf_rewrite_tail(void *parent, char **str, size_t *start, + const char *fmt, ...) +{ + bool success; + va_list args; + va_start(args, fmt); + success = linear_vasprintf_rewrite_tail(parent, str, start, fmt, args); + va_end(args); + return success; +} + +bool +linear_vasprintf_rewrite_tail(void *parent, char **str, size_t *start, + const char *fmt, va_list args) +{ + size_t new_length; + char *ptr; + + assert(str != NULL); + + if (unlikely(*str == NULL)) { + *str = linear_vasprintf(parent, fmt, args); + *start = strlen(*str); + return true; + } + + new_length = printf_length(fmt, args); + + ptr = linear_realloc(parent, *str, *start + new_length + 1); + if (unlikely(ptr == NULL)) + return false; + + vsnprintf(ptr + *start, new_length + 1, fmt, args); + *str = ptr; + *start += new_length; + return true; +} + +/* helper routine for strcat/strncat - n is the exact amount to copy */ +static bool +linear_cat(void *parent, char **dest, const char *str, unsigned n) +{ + char *both; + unsigned existing_length; + assert(dest != NULL && *dest != NULL); + + existing_length = strlen(*dest); + both = linear_realloc(parent, *dest, existing_length + n + 1); + if (unlikely(both == NULL)) + return false; + + memcpy(both + existing_length, str, n); + both[existing_length + n] = '\0'; + + *dest = both; + return true; +} + +bool +linear_strcat(void *parent, char **dest, const char *str) +{ + return linear_cat(parent, dest, str, strlen(str)); +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/ralloc.h b/third_party/rust/glslopt/glsl-optimizer/src/util/ralloc.h new file mode 100644 index 0000000000..e84ba0f8c6 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/ralloc.h @@ -0,0 +1,609 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ralloc.h + * + * ralloc: a recursive memory allocator + * + * The ralloc memory allocator creates a hierarchy of allocated + * objects. Every allocation is in reference to some parent, and + * every allocated object can in turn be used as the parent of a + * subsequent allocation. This allows for extremely convenient + * discarding of an entire tree/sub-tree of allocations by calling + * ralloc_free on any particular object to free it and all of its + * children. + * + * The conceptual working of ralloc was directly inspired by Andrew + * Tridgell's talloc, but ralloc is an independent implementation + * released under the MIT license and tuned for Mesa. + * + * talloc is more sophisticated than ralloc in that it includes reference + * counting and useful debugging features. However, it is released under + * a non-permissive open source license. + */ + +#ifndef RALLOC_H +#define RALLOC_H + +#include <stddef.h> +#include <stdarg.h> +#include <stdbool.h> + +#include "macros.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \def ralloc(ctx, type) + * Allocate a new object chained off of the given context. + * + * This is equivalent to: + * \code + * ((type *) ralloc_size(ctx, sizeof(type)) + * \endcode + */ +#define ralloc(ctx, type) ((type *) ralloc_size(ctx, sizeof(type))) + +/** + * \def rzalloc(ctx, type) + * Allocate a new object out of the given context and initialize it to zero. + * + * This is equivalent to: + * \code + * ((type *) rzalloc_size(ctx, sizeof(type)) + * \endcode + */ +#define rzalloc(ctx, type) ((type *) rzalloc_size(ctx, sizeof(type))) + +/** + * Allocate a new ralloc context. + * + * While any ralloc'd pointer can be used as a context, sometimes it is useful + * to simply allocate a context with no associated memory. + * + * It is equivalent to: + * \code + * ((type *) ralloc_size(ctx, 0) + * \endcode + */ +void *ralloc_context(const void *ctx); + +/** + * Allocate memory chained off of the given context. + * + * This is the core allocation routine which is used by all others. It + * simply allocates storage for \p size bytes and returns the pointer, + * similar to \c malloc. + */ +void *ralloc_size(const void *ctx, size_t size) MALLOCLIKE; + +/** + * Allocate zero-initialized memory chained off of the given context. + * + * This is similar to \c calloc with a size of 1. + */ +void *rzalloc_size(const void *ctx, size_t size) MALLOCLIKE; + +/** + * Resize a piece of ralloc-managed memory, preserving data. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the memory to be resized. May be NULL. + * \param size The amount of memory to allocate, in bytes. + */ +void *reralloc_size(const void *ctx, void *ptr, size_t size); + +/** + * Resize a ralloc-managed array, preserving data and initializing any newly + * allocated data to zero. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the memory to be resized. May be NULL. + * \param old_size The amount of memory in the previous allocation, in bytes. + * \param new_size The amount of memory to allocate, in bytes. + */ +void *rerzalloc_size(const void *ctx, void *ptr, + size_t old_size, size_t new_size); + +/// \defgroup array Array Allocators @{ + +/** + * \def ralloc_array(ctx, type, count) + * Allocate an array of objects chained off the given context. + * + * Similar to \c calloc, but does not initialize the memory to zero. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * This is equivalent to: + * \code + * ((type *) ralloc_array_size(ctx, sizeof(type), count) + * \endcode + */ +#define ralloc_array(ctx, type, count) \ + ((type *) ralloc_array_size(ctx, sizeof(type), count)) + +/** + * \def rzalloc_array(ctx, type, count) + * Allocate a zero-initialized array chained off the given context. + * + * Similar to \c calloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * This is equivalent to: + * \code + * ((type *) rzalloc_array_size(ctx, sizeof(type), count) + * \endcode + */ +#define rzalloc_array(ctx, type, count) \ + ((type *) rzalloc_array_size(ctx, sizeof(type), count)) + +/** + * \def reralloc(ctx, ptr, type, count) + * Resize a ralloc-managed array, preserving data. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the array to be resized. May be NULL. + * \param type The element type. + * \param count The number of elements to allocate. + */ +#define reralloc(ctx, ptr, type, count) \ + ((type *) reralloc_array_size(ctx, ptr, sizeof(type), count)) + +/** + * \def rerzalloc(ctx, ptr, type, count) + * Resize a ralloc-managed array, preserving data and initializing any newly + * allocated data to zero. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the array to be resized. May be NULL. + * \param type The element type. + * \param old_count The number of elements in the previous allocation. + * \param new_count The number of elements to allocate. + */ +#define rerzalloc(ctx, ptr, type, old_count, new_count) \ + ((type *) rerzalloc_array_size(ctx, ptr, sizeof(type), old_count, new_count)) + +/** + * Allocate memory for an array chained off the given context. + * + * Similar to \c calloc, but does not initialize the memory to zero. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \p size and \p count. This is necessary for security. + */ +void *ralloc_array_size(const void *ctx, size_t size, unsigned count) MALLOCLIKE; + +/** + * Allocate a zero-initialized array chained off the given context. + * + * Similar to \c calloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \p size and \p count. This is necessary for security. + */ +void *rzalloc_array_size(const void *ctx, size_t size, unsigned count) MALLOCLIKE; + +/** + * Resize a ralloc-managed array, preserving data. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the array to be resized. May be NULL. + * \param size The size of an individual element. + * \param count The number of elements to allocate. + * + * \return True unless allocation failed. + */ +void *reralloc_array_size(const void *ctx, void *ptr, size_t size, + unsigned count); + +/** + * Resize a ralloc-managed array, preserving data and initializing any newly + * allocated data to zero. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the array to be resized. May be NULL. + * \param size The size of an individual element. + * \param old_count The number of elements in the previous allocation. + * \param new_count The number of elements to allocate. + * + * \return True unless allocation failed. + */ +void *rerzalloc_array_size(const void *ctx, void *ptr, size_t size, + unsigned old_count, unsigned new_count); +/// @} + +/** + * Free a piece of ralloc-managed memory. + * + * This will also free the memory of any children allocated this context. + */ +void ralloc_free(void *ptr); + +/** + * "Steal" memory from one context, changing it to another. + * + * This changes \p ptr's context to \p new_ctx. This is quite useful if + * memory is allocated out of a temporary context. + */ +void ralloc_steal(const void *new_ctx, void *ptr); + +/** + * Reparent all children from one context to another. + * + * This effectively calls ralloc_steal(new_ctx, child) for all children of \p old_ctx. + */ +void ralloc_adopt(const void *new_ctx, void *old_ctx); + +/** + * Return the given pointer's ralloc context. + */ +void *ralloc_parent(const void *ptr); + +/** + * Set a callback to occur just before an object is freed. + */ +void ralloc_set_destructor(const void *ptr, void(*destructor)(void *)); + +/// \defgroup array String Functions @{ +/** + * Duplicate a string, allocating the memory from the given context. + */ +char *ralloc_strdup(const void *ctx, const char *str) MALLOCLIKE; + +/** + * Duplicate a string, allocating the memory from the given context. + * + * Like \c strndup, at most \p n characters are copied. If \p str is longer + * than \p n characters, \p n are copied, and a termining \c '\0' byte is added. + */ +char *ralloc_strndup(const void *ctx, const char *str, size_t n) MALLOCLIKE; + +/** + * Concatenate two strings, allocating the necessary space. + * + * This appends \p str to \p *dest, similar to \c strcat, using ralloc_resize + * to expand \p *dest to the appropriate size. \p dest will be updated to the + * new pointer unless allocation fails. + * + * The result will always be null-terminated. + * + * \return True unless allocation failed. + */ +bool ralloc_strcat(char **dest, const char *str); + +/** + * Concatenate two strings, allocating the necessary space. + * + * This appends at most \p n bytes of \p str to \p *dest, using ralloc_resize + * to expand \p *dest to the appropriate size. \p dest will be updated to the + * new pointer unless allocation fails. + * + * The result will always be null-terminated; \p str does not need to be null + * terminated if it is longer than \p n. + * + * \return True unless allocation failed. + */ +bool ralloc_strncat(char **dest, const char *str, size_t n); + +/** + * Concatenate two strings, allocating the necessary space. + * + * This appends \p n bytes of \p str to \p *dest, using ralloc_resize + * to expand \p *dest to the appropriate size. \p dest will be updated to the + * new pointer unless allocation fails. + * + * The result will always be null-terminated. + * + * This function differs from ralloc_strcat() and ralloc_strncat() in that it + * does not do any strlen() calls which can become costly on large strings. + * + * \return True unless allocation failed. + */ +bool +ralloc_str_append(char **dest, const char *str, + size_t existing_length, size_t str_size); + +/** + * Print to a string. + * + * This is analogous to \c sprintf, but allocates enough space (using \p ctx + * as the context) for the resulting string. + * + * \return The newly allocated string. + */ +char *ralloc_asprintf (const void *ctx, const char *fmt, ...) PRINTFLIKE(2, 3) MALLOCLIKE; + +/* Return the length of the string that would be generated by a printf-style + * format and argument list, not including the \0 byte. + */ +size_t printf_length(const char *fmt, va_list untouched_args); + +/** + * Print to a string, given a va_list. + * + * This is analogous to \c vsprintf, but allocates enough space (using \p ctx + * as the context) for the resulting string. + * + * \return The newly allocated string. + */ +char *ralloc_vasprintf(const void *ctx, const char *fmt, va_list args) MALLOCLIKE; + +/** + * Rewrite the tail of an existing string, starting at a given index. + * + * Overwrites the contents of *str starting at \p start with newly formatted + * text, including a new null-terminator. Allocates more memory as necessary. + * + * This can be used to append formatted text when the length of the existing + * string is already known, saving a strlen() call. + * + * \sa ralloc_asprintf_append + * + * \param str The string to be updated. + * \param start The index to start appending new data at. + * \param fmt A printf-style formatting string + * + * \p str will be updated to the new pointer unless allocation fails. + * \p start will be increased by the length of the newly formatted text. + * + * \return True unless allocation failed. + */ +bool ralloc_asprintf_rewrite_tail(char **str, size_t *start, + const char *fmt, ...) + PRINTFLIKE(3, 4); + +/** + * Rewrite the tail of an existing string, starting at a given index. + * + * Overwrites the contents of *str starting at \p start with newly formatted + * text, including a new null-terminator. Allocates more memory as necessary. + * + * This can be used to append formatted text when the length of the existing + * string is already known, saving a strlen() call. + * + * \sa ralloc_vasprintf_append + * + * \param str The string to be updated. + * \param start The index to start appending new data at. + * \param fmt A printf-style formatting string + * \param args A va_list containing the data to be formatted + * + * \p str will be updated to the new pointer unless allocation fails. + * \p start will be increased by the length of the newly formatted text. + * + * \return True unless allocation failed. + */ +bool ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt, + va_list args); + +/** + * Append formatted text to the supplied string. + * + * This is equivalent to + * \code + * ralloc_asprintf_rewrite_tail(str, strlen(*str), fmt, ...) + * \endcode + * + * \sa ralloc_asprintf + * \sa ralloc_asprintf_rewrite_tail + * \sa ralloc_strcat + * + * \p str will be updated to the new pointer unless allocation fails. + * + * \return True unless allocation failed. + */ +bool ralloc_asprintf_append (char **str, const char *fmt, ...) + PRINTFLIKE(2, 3); + +/** + * Append formatted text to the supplied string, given a va_list. + * + * This is equivalent to + * \code + * ralloc_vasprintf_rewrite_tail(str, strlen(*str), fmt, args) + * \endcode + * + * \sa ralloc_vasprintf + * \sa ralloc_vasprintf_rewrite_tail + * \sa ralloc_strcat + * + * \p str will be updated to the new pointer unless allocation fails. + * + * \return True unless allocation failed. + */ +bool ralloc_vasprintf_append(char **str, const char *fmt, va_list args); +/// @} + +/** + * Declare C++ new and delete operators which use ralloc. + * + * Placing this macro in the body of a class makes it possible to do: + * + * TYPE *var = new(mem_ctx) TYPE(...); + * delete var; + * + * which is more idiomatic in C++ than calling ralloc. + */ +#define DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(TYPE, ALLOC_FUNC) \ +private: \ + static void _ralloc_destructor(void *p) \ + { \ + reinterpret_cast<TYPE *>(p)->TYPE::~TYPE(); \ + } \ +public: \ + static void* operator new(size_t size, void *mem_ctx) \ + { \ + void *p = ALLOC_FUNC(mem_ctx, size); \ + assert(p != NULL); \ + if (!HAS_TRIVIAL_DESTRUCTOR(TYPE)) \ + ralloc_set_destructor(p, _ralloc_destructor); \ + return p; \ + } \ + \ + static void operator delete(void *p) \ + { \ + /* The object's destructor is guaranteed to have already been \ + * called by the delete operator at this point -- Make sure it's \ + * not called again. \ + */ \ + if (!HAS_TRIVIAL_DESTRUCTOR(TYPE)) \ + ralloc_set_destructor(p, NULL); \ + ralloc_free(p); \ + } + +#define DECLARE_RALLOC_CXX_OPERATORS(type) \ + DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, ralloc_size) + +#define DECLARE_RZALLOC_CXX_OPERATORS(type) \ + DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, rzalloc_size) + +#define DECLARE_LINEAR_ALLOC_CXX_OPERATORS(type) \ + DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, linear_alloc_child) + +#define DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(type) \ + DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, linear_zalloc_child) + + +/** + * Do a fast allocation from the linear buffer, also known as the child node + * from the allocator's point of view. It can't be freed directly. You have + * to free the parent or the ralloc parent. + * + * \param parent parent node of the linear allocator + * \param size size to allocate (max 32 bits) + */ +void *linear_alloc_child(void *parent, unsigned size); + +/** + * Allocate a parent node that will hold linear buffers. The returned + * allocation is actually the first child node, but it's also the handle + * of the parent node. Use it for all child node allocations. + * + * \param ralloc_ctx ralloc context, must not be NULL + * \param size size to allocate (max 32 bits) + */ +void *linear_alloc_parent(void *ralloc_ctx, unsigned size); + +/** + * Same as linear_alloc_child, but also clears memory. + */ +void *linear_zalloc_child(void *parent, unsigned size); + +/** + * Same as linear_alloc_parent, but also clears memory. + */ +void *linear_zalloc_parent(void *ralloc_ctx, unsigned size); + +/** + * Free the linear parent node. This will free all child nodes too. + * Freeing the ralloc parent will also free this. + */ +void linear_free_parent(void *ptr); + +/** + * Same as ralloc_steal, but steals the linear parent node. + */ +void ralloc_steal_linear_parent(void *new_ralloc_ctx, void *ptr); + +/** + * Return the ralloc parent of the linear parent node. + */ +void *ralloc_parent_of_linear_parent(void *ptr); + +/** + * Same as realloc except that the linear allocator doesn't free child nodes, + * so it's reduced to memory duplication. It's used in places where + * reallocation is required. Don't use it often. It's much slower than + * realloc. + */ +void *linear_realloc(void *parent, void *old, unsigned new_size); + +/* The functions below have the same semantics as their ralloc counterparts, + * except that they always allocate a linear child node. + */ +char *linear_strdup(void *parent, const char *str); +char *linear_asprintf(void *parent, const char *fmt, ...); +char *linear_vasprintf(void *parent, const char *fmt, va_list args); +bool linear_asprintf_append(void *parent, char **str, const char *fmt, ...); +bool linear_vasprintf_append(void *parent, char **str, const char *fmt, + va_list args); +bool linear_asprintf_rewrite_tail(void *parent, char **str, size_t *start, + const char *fmt, ...); +bool linear_vasprintf_rewrite_tail(void *parent, char **str, size_t *start, + const char *fmt, va_list args); +bool linear_strcat(void *parent, char **dest, const char *str); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/rounding.h b/third_party/rust/glslopt/glsl-optimizer/src/util/rounding.h new file mode 100644 index 0000000000..e329d43824 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/rounding.h @@ -0,0 +1,148 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _ROUNDING_H +#define _ROUNDING_H + +#include "c99_math.h" + +#include <limits.h> +#include <stdint.h> + +#if defined(__SSE__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1)) || defined(_M_X64) +#include <xmmintrin.h> +#include <emmintrin.h> +#endif + +#ifdef __SSE4_1__ +#include <smmintrin.h> +#endif + +/* The C standard library has functions round()/rint()/nearbyint() that round + * their arguments according to the rounding mode set in the floating-point + * control register. While there are trunc()/ceil()/floor() functions that do + * a specific operation without modifying the rounding mode, there is no + * roundeven() in any version of C. + * + * Technical Specification 18661 (ISO/IEC TS 18661-1:2014) adds roundeven(), + * but it's unfortunately not implemented by glibc. + * + * This implementation differs in that it does not raise the inexact exception. + * + * We use rint() to implement these functions, with the assumption that the + * floating-point rounding mode has not been changed from the default Round + * to Nearest. + */ + +/** + * \brief Rounds \c x to the nearest integer, with ties to the even integer. + */ +static inline float +_mesa_roundevenf(float x) +{ +#ifdef __SSE4_1__ + float ret; + __m128 m = _mm_load_ss(&x); + m = _mm_round_ss(m, m, _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC); + _mm_store_ss(&ret, m); + return ret; +#else + return rintf(x); +#endif +} + +/** + * \brief Rounds \c x to the nearest integer, with ties to the even integer. + */ +static inline double +_mesa_roundeven(double x) +{ +#ifdef __SSE4_1__ + double ret; + __m128d m = _mm_load_sd(&x); + m = _mm_round_sd(m, m, _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC); + _mm_store_sd(&ret, m); + return ret; +#else + return rint(x); +#endif +} + +/** + * \brief Rounds \c x to the nearest integer, with ties to the even integer, + * and returns the value as a long int. + */ +static inline long +_mesa_lroundevenf(float x) +{ +#if defined(__SSE__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1)) || defined(_M_X64) +#if LONG_MAX == INT64_MAX + return _mm_cvtss_si64(_mm_load_ss(&x)); +#elif LONG_MAX == INT32_MAX + return _mm_cvtss_si32(_mm_load_ss(&x)); +#else +#error "Unsupported long size" +#endif +#else + return lrintf(x); +#endif +} + + +/** + * \brief Rounds \c x to the nearest integer, with ties to the even integer, + * and returns the value as a long int. + */ +static inline long +_mesa_lroundeven(double x) +{ +#if defined(__SSE2__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(_M_X64) +#if LONG_MAX == INT64_MAX + return _mm_cvtsd_si64(_mm_load_sd(&x)); +#elif LONG_MAX == INT32_MAX + return _mm_cvtsd_si32(_mm_load_sd(&x)); +#else +#error "Unsupported long size" +#endif +#else + return lrint(x); +#endif +} + +/** + * \brief Rounds \c x to the nearest integer, with ties to the even integer, + * and returns the value as an int64_t. + */ +static inline int64_t +_mesa_i64roundevenf(float x) +{ +#if LONG_MAX == INT64_MAX + return _mesa_lroundevenf(x); +#elif LONG_MAX == INT32_MAX + return llrintf(x); +#else +#error "Unsupported long size" +#endif +} + +#endif diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/set.c b/third_party/rust/glslopt/glsl-optimizer/src/util/set.c new file mode 100644 index 0000000000..ffe0fe808e --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/set.c @@ -0,0 +1,572 @@ +/* + * Copyright © 2009-2012 Intel Corporation + * Copyright © 1988-2004 Keith Packard and Bart Massey. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Except as contained in this notice, the names of the authors + * or their institutions shall not be used in advertising or + * otherwise to promote the sale, use or other dealings in this + * Software without prior written authorization from the + * authors. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * Keith Packard <keithp@keithp.com> + */ + +#include <stdlib.h> +#include <assert.h> +#include <string.h> + +#include "hash_table.h" +#include "macros.h" +#include "ralloc.h" +#include "set.h" +#include "fast_urem_by_const.h" + +/* + * From Knuth -- a good choice for hash/rehash values is p, p-2 where + * p and p-2 are both prime. These tables are sized to have an extra 10% + * free to avoid exponential performance degradation as the hash table fills + */ + +static const uint32_t deleted_key_value; +static const void *deleted_key = &deleted_key_value; + +static const struct { + uint32_t max_entries, size, rehash; + uint64_t size_magic, rehash_magic; +} hash_sizes[] = { +#define ENTRY(max_entries, size, rehash) \ + { max_entries, size, rehash, \ + REMAINDER_MAGIC(size), REMAINDER_MAGIC(rehash) } + + ENTRY(2, 5, 3 ), + ENTRY(4, 7, 5 ), + ENTRY(8, 13, 11 ), + ENTRY(16, 19, 17 ), + ENTRY(32, 43, 41 ), + ENTRY(64, 73, 71 ), + ENTRY(128, 151, 149 ), + ENTRY(256, 283, 281 ), + ENTRY(512, 571, 569 ), + ENTRY(1024, 1153, 1151 ), + ENTRY(2048, 2269, 2267 ), + ENTRY(4096, 4519, 4517 ), + ENTRY(8192, 9013, 9011 ), + ENTRY(16384, 18043, 18041 ), + ENTRY(32768, 36109, 36107 ), + ENTRY(65536, 72091, 72089 ), + ENTRY(131072, 144409, 144407 ), + ENTRY(262144, 288361, 288359 ), + ENTRY(524288, 576883, 576881 ), + ENTRY(1048576, 1153459, 1153457 ), + ENTRY(2097152, 2307163, 2307161 ), + ENTRY(4194304, 4613893, 4613891 ), + ENTRY(8388608, 9227641, 9227639 ), + ENTRY(16777216, 18455029, 18455027 ), + ENTRY(33554432, 36911011, 36911009 ), + ENTRY(67108864, 73819861, 73819859 ), + ENTRY(134217728, 147639589, 147639587 ), + ENTRY(268435456, 295279081, 295279079 ), + ENTRY(536870912, 590559793, 590559791 ), + ENTRY(1073741824, 1181116273, 1181116271 ), + ENTRY(2147483648ul, 2362232233ul, 2362232231ul ) +}; + +ASSERTED static inline bool +key_pointer_is_reserved(const void *key) +{ + return key == NULL || key == deleted_key; +} + +static int +entry_is_free(struct set_entry *entry) +{ + return entry->key == NULL; +} + +static int +entry_is_deleted(struct set_entry *entry) +{ + return entry->key == deleted_key; +} + +static int +entry_is_present(struct set_entry *entry) +{ + return entry->key != NULL && entry->key != deleted_key; +} + +struct set * +_mesa_set_create(void *mem_ctx, + uint32_t (*key_hash_function)(const void *key), + bool (*key_equals_function)(const void *a, + const void *b)) +{ + struct set *ht; + + ht = ralloc(mem_ctx, struct set); + if (ht == NULL) + return NULL; + + ht->size_index = 0; + ht->size = hash_sizes[ht->size_index].size; + ht->rehash = hash_sizes[ht->size_index].rehash; + ht->size_magic = hash_sizes[ht->size_index].size_magic; + ht->rehash_magic = hash_sizes[ht->size_index].rehash_magic; + ht->max_entries = hash_sizes[ht->size_index].max_entries; + ht->key_hash_function = key_hash_function; + ht->key_equals_function = key_equals_function; + ht->table = rzalloc_array(ht, struct set_entry, ht->size); + ht->entries = 0; + ht->deleted_entries = 0; + + if (ht->table == NULL) { + ralloc_free(ht); + return NULL; + } + + return ht; +} + +struct set * +_mesa_set_clone(struct set *set, void *dst_mem_ctx) +{ + struct set *clone; + + clone = ralloc(dst_mem_ctx, struct set); + if (clone == NULL) + return NULL; + + memcpy(clone, set, sizeof(struct set)); + + clone->table = ralloc_array(clone, struct set_entry, clone->size); + if (clone->table == NULL) { + ralloc_free(clone); + return NULL; + } + + memcpy(clone->table, set->table, clone->size * sizeof(struct set_entry)); + + return clone; +} + +/** + * Frees the given set. + * + * If delete_function is passed, it gets called on each entry present before + * freeing. + */ +void +_mesa_set_destroy(struct set *ht, void (*delete_function)(struct set_entry *entry)) +{ + if (!ht) + return; + + if (delete_function) { + set_foreach (ht, entry) { + delete_function(entry); + } + } + ralloc_free(ht->table); + ralloc_free(ht); +} + +/** + * Clears all values from the given set. + * + * If delete_function is passed, it gets called on each entry present before + * the set is cleared. + */ +void +_mesa_set_clear(struct set *set, void (*delete_function)(struct set_entry *entry)) +{ + if (!set) + return; + + set_foreach (set, entry) { + if (delete_function) + delete_function(entry); + entry->key = deleted_key; + } + + set->entries = set->deleted_entries = 0; +} + +/** + * Finds a set entry with the given key and hash of that key. + * + * Returns NULL if no entry is found. + */ +static struct set_entry * +set_search(const struct set *ht, uint32_t hash, const void *key) +{ + assert(!key_pointer_is_reserved(key)); + + uint32_t size = ht->size; + uint32_t start_address = util_fast_urem32(hash, size, ht->size_magic); + uint32_t double_hash = util_fast_urem32(hash, ht->rehash, + ht->rehash_magic) + 1; + uint32_t hash_address = start_address; + do { + struct set_entry *entry = ht->table + hash_address; + + if (entry_is_free(entry)) { + return NULL; + } else if (entry_is_present(entry) && entry->hash == hash) { + if (ht->key_equals_function(key, entry->key)) { + return entry; + } + } + + hash_address += double_hash; + if (hash_address >= size) + hash_address -= size; + } while (hash_address != start_address); + + return NULL; +} + +struct set_entry * +_mesa_set_search(const struct set *set, const void *key) +{ + assert(set->key_hash_function); + return set_search(set, set->key_hash_function(key), key); +} + +struct set_entry * +_mesa_set_search_pre_hashed(const struct set *set, uint32_t hash, + const void *key) +{ + assert(set->key_hash_function == NULL || + hash == set->key_hash_function(key)); + return set_search(set, hash, key); +} + +static void +set_add_rehash(struct set *ht, uint32_t hash, const void *key) +{ + uint32_t size = ht->size; + uint32_t start_address = util_fast_urem32(hash, size, ht->size_magic); + uint32_t double_hash = util_fast_urem32(hash, ht->rehash, + ht->rehash_magic) + 1; + uint32_t hash_address = start_address; + do { + struct set_entry *entry = ht->table + hash_address; + if (likely(entry->key == NULL)) { + entry->hash = hash; + entry->key = key; + return; + } + + hash_address = hash_address + double_hash; + if (hash_address >= size) + hash_address -= size; + } while (true); +} + +static void +set_rehash(struct set *ht, unsigned new_size_index) +{ + struct set old_ht; + struct set_entry *table; + + if (new_size_index >= ARRAY_SIZE(hash_sizes)) + return; + + table = rzalloc_array(ht, struct set_entry, + hash_sizes[new_size_index].size); + if (table == NULL) + return; + + old_ht = *ht; + + ht->table = table; + ht->size_index = new_size_index; + ht->size = hash_sizes[ht->size_index].size; + ht->rehash = hash_sizes[ht->size_index].rehash; + ht->size_magic = hash_sizes[ht->size_index].size_magic; + ht->rehash_magic = hash_sizes[ht->size_index].rehash_magic; + ht->max_entries = hash_sizes[ht->size_index].max_entries; + ht->entries = 0; + ht->deleted_entries = 0; + + set_foreach(&old_ht, entry) { + set_add_rehash(ht, entry->hash, entry->key); + } + + ht->entries = old_ht.entries; + + ralloc_free(old_ht.table); +} + +void +_mesa_set_resize(struct set *set, uint32_t entries) +{ + /* You can't shrink a set below its number of entries */ + if (set->entries > entries) + entries = set->entries; + + unsigned size_index = 0; + while (hash_sizes[size_index].max_entries < entries) + size_index++; + + set_rehash(set, size_index); +} + +/** + * Find a matching entry for the given key, or insert it if it doesn't already + * exist. + * + * Note that insertion may rearrange the table on a resize or rehash, + * so previously found hash_entries are no longer valid after this function. + */ +static struct set_entry * +set_search_or_add(struct set *ht, uint32_t hash, const void *key, bool *found) +{ + struct set_entry *available_entry = NULL; + + assert(!key_pointer_is_reserved(key)); + + if (ht->entries >= ht->max_entries) { + set_rehash(ht, ht->size_index + 1); + } else if (ht->deleted_entries + ht->entries >= ht->max_entries) { + set_rehash(ht, ht->size_index); + } + + uint32_t size = ht->size; + uint32_t start_address = util_fast_urem32(hash, size, ht->size_magic); + uint32_t double_hash = util_fast_urem32(hash, ht->rehash, + ht->rehash_magic) + 1; + uint32_t hash_address = start_address; + do { + struct set_entry *entry = ht->table + hash_address; + + if (!entry_is_present(entry)) { + /* Stash the first available entry we find */ + if (available_entry == NULL) + available_entry = entry; + if (entry_is_free(entry)) + break; + } + + if (!entry_is_deleted(entry) && + entry->hash == hash && + ht->key_equals_function(key, entry->key)) { + if (found) + *found = true; + return entry; + } + + hash_address = hash_address + double_hash; + if (hash_address >= size) + hash_address -= size; + } while (hash_address != start_address); + + if (available_entry) { + /* There is no matching entry, create it. */ + if (entry_is_deleted(available_entry)) + ht->deleted_entries--; + available_entry->hash = hash; + available_entry->key = key; + ht->entries++; + if (found) + *found = false; + return available_entry; + } + + /* We could hit here if a required resize failed. An unchecked-malloc + * application could ignore this result. + */ + return NULL; +} + +/** + * Inserts the key with the given hash into the table. + * + * Note that insertion may rearrange the table on a resize or rehash, + * so previously found hash_entries are no longer valid after this function. + */ +static struct set_entry * +set_add(struct set *ht, uint32_t hash, const void *key) +{ + struct set_entry *entry = set_search_or_add(ht, hash, key, NULL); + + if (unlikely(!entry)) + return NULL; + + /* Note: If a matching entry already exists, this will replace it. This is + * a relatively common feature of hash tables, with the alternative + * generally being "insert the new value as well, and return it first when + * the key is searched for". + * + * Note that the hash table doesn't have a delete callback. If freeing of + * old keys is required to avoid memory leaks, use the alternative + * _mesa_set_search_or_add function and implement the replacement yourself. + */ + entry->key = key; + return entry; +} + +struct set_entry * +_mesa_set_add(struct set *set, const void *key) +{ + assert(set->key_hash_function); + return set_add(set, set->key_hash_function(key), key); +} + +struct set_entry * +_mesa_set_add_pre_hashed(struct set *set, uint32_t hash, const void *key) +{ + assert(set->key_hash_function == NULL || + hash == set->key_hash_function(key)); + return set_add(set, hash, key); +} + +struct set_entry * +_mesa_set_search_and_add(struct set *set, const void *key, bool *replaced) +{ + assert(set->key_hash_function); + return _mesa_set_search_and_add_pre_hashed(set, + set->key_hash_function(key), + key, replaced); +} + +struct set_entry * +_mesa_set_search_and_add_pre_hashed(struct set *set, uint32_t hash, + const void *key, bool *replaced) +{ + assert(set->key_hash_function == NULL || + hash == set->key_hash_function(key)); + struct set_entry *entry = set_search_or_add(set, hash, key, replaced); + + if (unlikely(!entry)) + return NULL; + + /* This implements the replacement, same as _mesa_set_add(). The user will + * be notified if we're overwriting a found entry. + */ + entry->key = key; + return entry; +} + +struct set_entry * +_mesa_set_search_or_add(struct set *set, const void *key) +{ + assert(set->key_hash_function); + return set_search_or_add(set, set->key_hash_function(key), key, NULL); +} + +struct set_entry * +_mesa_set_search_or_add_pre_hashed(struct set *set, uint32_t hash, + const void *key) +{ + assert(set->key_hash_function == NULL || + hash == set->key_hash_function(key)); + return set_search_or_add(set, hash, key, NULL); +} + +/** + * This function deletes the given hash table entry. + * + * Note that deletion doesn't otherwise modify the table, so an iteration over + * the table deleting entries is safe. + */ +void +_mesa_set_remove(struct set *ht, struct set_entry *entry) +{ + if (!entry) + return; + + entry->key = deleted_key; + ht->entries--; + ht->deleted_entries++; +} + +/** + * Removes the entry with the corresponding key, if exists. + */ +void +_mesa_set_remove_key(struct set *set, const void *key) +{ + _mesa_set_remove(set, _mesa_set_search(set, key)); +} + +/** + * This function is an iterator over the hash table. + * + * Pass in NULL for the first entry, as in the start of a for loop. Note that + * an iteration over the table is O(table_size) not O(entries). + */ +struct set_entry * +_mesa_set_next_entry(const struct set *ht, struct set_entry *entry) +{ + if (entry == NULL) + entry = ht->table; + else + entry = entry + 1; + + for (; entry != ht->table + ht->size; entry++) { + if (entry_is_present(entry)) { + return entry; + } + } + + return NULL; +} + +struct set_entry * +_mesa_set_random_entry(struct set *ht, + int (*predicate)(struct set_entry *entry)) +{ + struct set_entry *entry; + uint32_t i = rand() % ht->size; + + if (ht->entries == 0) + return NULL; + + for (entry = ht->table + i; entry != ht->table + ht->size; entry++) { + if (entry_is_present(entry) && + (!predicate || predicate(entry))) { + return entry; + } + } + + for (entry = ht->table; entry != ht->table + i; entry++) { + if (entry_is_present(entry) && + (!predicate || predicate(entry))) { + return entry; + } + } + + return NULL; +} + +/** + * Helper to create a set with pointer keys. + */ +struct set * +_mesa_pointer_set_create(void *mem_ctx) +{ + return _mesa_set_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/set.h b/third_party/rust/glslopt/glsl-optimizer/src/util/set.h new file mode 100644 index 0000000000..55857aca7a --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/set.h @@ -0,0 +1,127 @@ +/* + * Copyright © 2009-2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#ifndef _SET_H +#define _SET_H + +#include <inttypes.h> +#include <stdbool.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct set_entry { + uint32_t hash; + const void *key; +}; + +struct set { + void *mem_ctx; + struct set_entry *table; + uint32_t (*key_hash_function)(const void *key); + bool (*key_equals_function)(const void *a, const void *b); + uint32_t size; + uint32_t rehash; + uint64_t size_magic; + uint64_t rehash_magic; + uint32_t max_entries; + uint32_t size_index; + uint32_t entries; + uint32_t deleted_entries; +}; + +struct set * +_mesa_set_create(void *mem_ctx, + uint32_t (*key_hash_function)(const void *key), + bool (*key_equals_function)(const void *a, + const void *b)); +struct set * +_mesa_set_clone(struct set *set, void *dst_mem_ctx); + +void +_mesa_set_destroy(struct set *set, + void (*delete_function)(struct set_entry *entry)); +void +_mesa_set_resize(struct set *set, uint32_t entries); +void +_mesa_set_clear(struct set *set, + void (*delete_function)(struct set_entry *entry)); + +struct set_entry * +_mesa_set_add(struct set *set, const void *key); +struct set_entry * +_mesa_set_add_pre_hashed(struct set *set, uint32_t hash, const void *key); + +struct set_entry * +_mesa_set_search_or_add(struct set *set, const void *key); +struct set_entry * +_mesa_set_search_or_add_pre_hashed(struct set *set, uint32_t hash, + const void *key); + +struct set_entry * +_mesa_set_search(const struct set *set, const void *key); +struct set_entry * +_mesa_set_search_pre_hashed(const struct set *set, uint32_t hash, + const void *key); + +struct set_entry * +_mesa_set_search_and_add(struct set *set, const void *key, bool *replaced); +struct set_entry * +_mesa_set_search_and_add_pre_hashed(struct set *set, uint32_t hash, + const void *key, bool *replaced); + +void +_mesa_set_remove(struct set *set, struct set_entry *entry); +void +_mesa_set_remove_key(struct set *set, const void *key); + +struct set_entry * +_mesa_set_next_entry(const struct set *set, struct set_entry *entry); + +struct set_entry * +_mesa_set_random_entry(struct set *set, + int (*predicate)(struct set_entry *entry)); + +struct set * +_mesa_pointer_set_create(void *mem_ctx); + +/** + * This foreach function is safe against deletion, but not against + * insertion (which may rehash the set, making entry a dangling + * pointer). + */ +#define set_foreach(set, entry) \ + for (struct set_entry *entry = _mesa_set_next_entry(set, NULL); \ + entry != NULL; \ + entry = _mesa_set_next_entry(set, entry)) + +#ifdef __cplusplus +} /* extern C */ +#endif + +#endif /* _SET_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/sha1/README b/third_party/rust/glslopt/glsl-optimizer/src/util/sha1/README new file mode 100644 index 0000000000..f30acf984e --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/sha1/README @@ -0,0 +1,62 @@ +This local copy of a SHA1 implementation based on the sources below. + +Why: + - Some libraries suffer from race condition and other issues. For example see +commit ade3108bb5b0 ("util: Fix race condition on libgcrypt initialization"). + + - Fold the handling and detection of _eight_ implementations at configure +stage and _seven_ different codepaths. + + - Have a single, uniform, code used by developers, testers and users. + + - Avoid conflicts when using software which ships with it's own SHA1 library. +The latter of which conflicting with the one mesa is build against. + + + +Source: +The SHA1 implementation is copied verbatim from the following links. +At the time of checkout HEAD is 1.25 and 1.24 respectively. + +http://cvsweb.openbsd.org/cgi-bin/cvsweb/~checkout~/src/lib/libc/hash/sha1.c?rev=HEAD +http://cvsweb.openbsd.org/cgi-bin/cvsweb/~checkout~/src/include/sha1.h?rev=HEAD + + +Notes: + - The files should not have any local changes. If there are any they should be +clearly documented below and one should aim to upstream them where possible. + + - Files will be periodically syncronised with the respective upstream sources. +Updates will be made regularly, but since the code is _not_ aimed as a +cryptography solution any issues found should not be considered security ones. + + +Local changes: + - Removed __bounded__ attribute qualifiers. Unavailable on platforms targeted +by Mesa. Upstream status: TBD (N/A ?) + + - Pick the sha1.h header from the current folder, by using "" over <> in the +include directive. Upstream status: TBD + + - Remove unused function prototypes - SHA1End, SHA1File, SHA1FileChunk and +SHA1Data. Upstream status: TBD + + - Use stdint.h integer types - u_int{8,16,32}_t -> uint{8,16,32}_t and +u_int -> uint32_t, change header include. Upstream status: TBD + + - Revert sha1.c rev 1.26 change (introduce DEF_WEAK). +Upstream status: TBD (N/A ?) + + - Add stdint.h include in sha1.h for uint*_t types. Upstream status: TBD + + - Add stddef.h include in sha1.h for size_t type. Upstream status: TBD + + - Use memset over explicit_bzero, since memset_s once isn't widely available. +Upstream status: TBD (N/A ?) + + - Manually expand __BEGIN_DECLS/__END_DECLS and make sure that they include +the struct declaration. +Upstream status: TBD + + - Add non-typedef struct name. +Upstream status: TBD diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/sha1/sha1.c b/third_party/rust/glslopt/glsl-optimizer/src/util/sha1/sha1.c new file mode 100644 index 0000000000..4fe2aa723c --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/sha1/sha1.c @@ -0,0 +1,174 @@ +/* $OpenBSD: sha1.c,v 1.26 2015/09/11 09:18:27 guenther Exp $ */ + +/* + * SHA-1 in C + * By Steve Reid <steve@edmweb.com> + * 100% Public Domain + * + * Test Vectors (from FIPS PUB 180-1) + * "abc" + * A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D + * "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" + * 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1 + * A million repetitions of "a" + * 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F + */ + +#include <stdint.h> +#include <string.h> +#include "u_endian.h" +#include "sha1.h" + +#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits)))) + +/* + * blk0() and blk() perform the initial expand. + * I got the idea of expanding during the round function from SSLeay + */ +#if UTIL_ARCH_LITTLE_ENDIAN +# define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \ + |(rol(block->l[i],8)&0x00FF00FF)) +#else +# define blk0(i) block->l[i] +#endif +#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \ + ^block->l[(i+2)&15]^block->l[i&15],1)) + +/* + * (R0+R1), R2, R3, R4 are the different operations (rounds) used in SHA1 + */ +#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30); +#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30); +#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30); +#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30); +#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30); + +typedef union { + uint8_t c[64]; + uint32_t l[16]; +} CHAR64LONG16; + +/* + * Hash a single 512-bit block. This is the core of the algorithm. + */ +void +SHA1Transform(uint32_t state[5], const uint8_t buffer[SHA1_BLOCK_LENGTH]) +{ + uint32_t a, b, c, d, e; + uint8_t workspace[SHA1_BLOCK_LENGTH]; + CHAR64LONG16 *block = (CHAR64LONG16 *)workspace; + + (void)memcpy(block, buffer, SHA1_BLOCK_LENGTH); + + /* Copy context->state[] to working vars */ + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + + /* 4 rounds of 20 operations each. Loop unrolled. */ + R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); + R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); + R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); + R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); + R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); + R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); + R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); + R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); + R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); + R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); + R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); + R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); + R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); + R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); + R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); + R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); + R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); + R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); + R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); + R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); + + /* Add the working vars back into context.state[] */ + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + + /* Wipe variables */ + a = b = c = d = e = 0; +} + + +/* + * SHA1Init - Initialize new context + */ +void +SHA1Init(SHA1_CTX *context) +{ + + /* SHA1 initialization constants */ + context->count = 0; + context->state[0] = 0x67452301; + context->state[1] = 0xEFCDAB89; + context->state[2] = 0x98BADCFE; + context->state[3] = 0x10325476; + context->state[4] = 0xC3D2E1F0; +} + + +/* + * Run your data through this. + */ +void +SHA1Update(SHA1_CTX *context, const uint8_t *data, size_t len) +{ + size_t i, j; + + j = (size_t)((context->count >> 3) & 63); + context->count += (len << 3); + if ((j + len) > 63) { + (void)memcpy(&context->buffer[j], data, (i = 64-j)); + SHA1Transform(context->state, context->buffer); + for ( ; i + 63 < len; i += 64) + SHA1Transform(context->state, (uint8_t *)&data[i]); + j = 0; + } else { + i = 0; + } + (void)memcpy(&context->buffer[j], &data[i], len - i); +} + + +/* + * Add padding and return the message digest. + */ +void +SHA1Pad(SHA1_CTX *context) +{ + uint8_t finalcount[8]; + uint32_t i; + + for (i = 0; i < 8; i++) { + finalcount[i] = (uint8_t)((context->count >> + ((7 - (i & 7)) * 8)) & 255); /* Endian independent */ + } + SHA1Update(context, (uint8_t *)"\200", 1); + while ((context->count & 504) != 448) + SHA1Update(context, (uint8_t *)"\0", 1); + SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */ +} + +void +SHA1Final(uint8_t digest[SHA1_DIGEST_LENGTH], SHA1_CTX *context) +{ + uint32_t i; + + SHA1Pad(context); + for (i = 0; i < SHA1_DIGEST_LENGTH; i++) { + digest[i] = (uint8_t) + ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255); + } + memset(context, 0, sizeof(*context)); +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/sha1/sha1.h b/third_party/rust/glslopt/glsl-optimizer/src/util/sha1/sha1.h new file mode 100644 index 0000000000..029a0ae87f --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/sha1/sha1.h @@ -0,0 +1,53 @@ +/* $OpenBSD: sha1.h,v 1.24 2012/12/05 23:19:57 deraadt Exp $ */ + +/* + * SHA-1 in C + * By Steve Reid <steve@edmweb.com> + * 100% Public Domain + */ + +#ifndef _SHA1_H +#define _SHA1_H + +#include <stddef.h> +#include <stdint.h> + +#define SHA1_BLOCK_LENGTH 64 +#define SHA1_DIGEST_LENGTH 20 +#define SHA1_DIGEST_STRING_LENGTH (SHA1_DIGEST_LENGTH * 2 + 1) + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _SHA1_CTX { + uint32_t state[5]; + uint64_t count; + uint8_t buffer[SHA1_BLOCK_LENGTH]; +} SHA1_CTX; + +void SHA1Init(SHA1_CTX *); +void SHA1Pad(SHA1_CTX *); +void SHA1Transform(uint32_t [5], const uint8_t [SHA1_BLOCK_LENGTH]); +void SHA1Update(SHA1_CTX *, const uint8_t *, size_t); +void SHA1Final(uint8_t [SHA1_DIGEST_LENGTH], SHA1_CTX *); + +#define HTONDIGEST(x) do { \ + x[0] = htonl(x[0]); \ + x[1] = htonl(x[1]); \ + x[2] = htonl(x[2]); \ + x[3] = htonl(x[3]); \ + x[4] = htonl(x[4]); } while (0) + +#define NTOHDIGEST(x) do { \ + x[0] = ntohl(x[0]); \ + x[1] = ntohl(x[1]); \ + x[2] = ntohl(x[2]); \ + x[3] = ntohl(x[3]); \ + x[4] = ntohl(x[4]); } while (0) + +#ifdef __cplusplus +} +#endif + +#endif /* _SHA1_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/simple_mtx.h b/third_party/rust/glslopt/glsl-optimizer/src/util/simple_mtx.h new file mode 100644 index 0000000000..e332816b98 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/simple_mtx.h @@ -0,0 +1,148 @@ +/* + * Copyright © 2015 Intel + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _SIMPLE_MTX_H +#define _SIMPLE_MTX_H + +#include "util/futex.h" +#include "util/macros.h" + +#include "c11/threads.h" + +#if UTIL_FUTEX_SUPPORTED + +/* mtx_t - Fast, simple mutex + * + * While modern pthread mutexes are very fast (implemented using futex), they + * still incur a call to an external DSO and overhead of the generality and + * features of pthread mutexes. Most mutexes in mesa only needs lock/unlock, + * and the idea here is that we can inline the atomic operation and make the + * fast case just two intructions. Mutexes are subtle and finicky to + * implement, so we carefully copy the implementation from Ulrich Dreppers + * well-written and well-reviewed paper: + * + * "Futexes Are Tricky" + * http://www.akkadia.org/drepper/futex.pdf + * + * We implement "mutex3", which gives us a mutex that has no syscalls on + * uncontended lock or unlock. Further, the uncontended case boils down to a + * locked cmpxchg and an untaken branch, the uncontended unlock is just a + * locked decr and an untaken branch. We use __builtin_expect() to indicate + * that contention is unlikely so that gcc will put the contention code out of + * the main code flow. + * + * A fast mutex only supports lock/unlock, can't be recursive or used with + * condition variables. + */ + +typedef struct { + uint32_t val; +} simple_mtx_t; + +#define _SIMPLE_MTX_INITIALIZER_NP { 0 } + +#define _SIMPLE_MTX_INVALID_VALUE 0xd0d0d0d0 + +static inline void +simple_mtx_init(simple_mtx_t *mtx, ASSERTED int type) +{ + assert(type == mtx_plain); + + mtx->val = 0; +} + +static inline void +simple_mtx_destroy(ASSERTED simple_mtx_t *mtx) +{ +#ifndef NDEBUG + mtx->val = _SIMPLE_MTX_INVALID_VALUE; +#endif +} + +static inline void +simple_mtx_lock(simple_mtx_t *mtx) +{ + uint32_t c; + + c = __sync_val_compare_and_swap(&mtx->val, 0, 1); + + assert(c != _SIMPLE_MTX_INVALID_VALUE); + + if (__builtin_expect(c != 0, 0)) { + if (c != 2) + c = __sync_lock_test_and_set(&mtx->val, 2); + while (c != 0) { + futex_wait(&mtx->val, 2, NULL); + c = __sync_lock_test_and_set(&mtx->val, 2); + } + } +} + +static inline void +simple_mtx_unlock(simple_mtx_t *mtx) +{ + uint32_t c; + + c = __sync_fetch_and_sub(&mtx->val, 1); + + assert(c != _SIMPLE_MTX_INVALID_VALUE); + + if (__builtin_expect(c != 1, 0)) { + mtx->val = 0; + futex_wake(&mtx->val, 1); + } +} + +#else + +typedef mtx_t simple_mtx_t; + +#define _SIMPLE_MTX_INITIALIZER_NP _MTX_INITIALIZER_NP + +static inline void +simple_mtx_init(simple_mtx_t *mtx, int type) +{ + mtx_init(mtx, type); +} + +static inline void +simple_mtx_destroy(simple_mtx_t *mtx) +{ + mtx_destroy(mtx); +} + +static inline void +simple_mtx_lock(simple_mtx_t *mtx) +{ + mtx_lock(mtx); +} + +static inline void +simple_mtx_unlock(simple_mtx_t *mtx) +{ + mtx_unlock(mtx); +} + +#endif + +#endif diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/softfloat.c b/third_party/rust/glslopt/glsl-optimizer/src/util/softfloat.c new file mode 100644 index 0000000000..591128efd4 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/softfloat.c @@ -0,0 +1,1475 @@ +/* + * License for Berkeley SoftFloat Release 3e + * + * John R. Hauser + * 2018 January 20 + * + * The following applies to the whole of SoftFloat Release 3e as well as to + * each source file individually. + * + * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the + * University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions, and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * The functions listed in this file are modified versions of the ones + * from the Berkeley SoftFloat 3e Library. + * + * Their implementation correctness has been checked with the Berkeley + * TestFloat Release 3e tool for x86_64. + */ + +#include "rounding.h" +#include "bitscan.h" +#include "softfloat.h" + +#if defined(BIG_ENDIAN) +#define word_incr -1 +#define index_word(total, n) ((total) - 1 - (n)) +#define index_word_hi(total) 0 +#define index_word_lo(total) ((total) - 1) +#define index_multiword_hi(total, n) 0 +#define index_multiword_lo(total, n) ((total) - (n)) +#define index_multiword_hi_but(total, n) 0 +#define index_multiword_lo_but(total, n) (n) +#else +#define word_incr 1 +#define index_word(total, n) (n) +#define index_word_hi(total) ((total) - 1) +#define index_word_lo(total) 0 +#define index_multiword_hi(total, n) ((total) - (n)) +#define index_multiword_lo(total, n) 0 +#define index_multiword_hi_but(total, n) (n) +#define index_multiword_lo_but(total, n) 0 +#endif + +typedef union { double f; int64_t i; uint64_t u; } di_type; +typedef union { float f; int32_t i; uint32_t u; } fi_type; + +const uint8_t count_leading_zeros8[256] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/** + * \brief Shifts 'a' right by the number of bits given in 'dist', which must be in + * the range 1 to 63. If any nonzero bits are shifted off, they are "jammed" + * into the least-significant bit of the shifted value by setting the + * least-significant bit to 1. This shifted-and-jammed value is returned. + * + * From softfloat_shortShiftRightJam64() + */ +static inline +uint64_t _mesa_short_shift_right_jam64(uint64_t a, uint8_t dist) +{ + return a >> dist | ((a & (((uint64_t) 1 << dist) - 1)) != 0); +} + +/** + * \brief Shifts 'a' right by the number of bits given in 'dist', which must not + * be zero. If any nonzero bits are shifted off, they are "jammed" into the + * least-significant bit of the shifted value by setting the least-significant + * bit to 1. This shifted-and-jammed value is returned. + * The value of 'dist' can be arbitrarily large. In particular, if 'dist' is + * greater than 64, the result will be either 0 or 1, depending on whether 'a' + * is zero or nonzero. + * + * From softfloat_shiftRightJam64() + */ +static inline +uint64_t _mesa_shift_right_jam64(uint64_t a, uint32_t dist) +{ + return + (dist < 63) ? a >> dist | ((uint64_t) (a << (-dist & 63)) != 0) : (a != 0); +} + +/** + * \brief Shifts 'a' right by the number of bits given in 'dist', which must not be + * zero. If any nonzero bits are shifted off, they are "jammed" into the + * least-significant bit of the shifted value by setting the least-significant + * bit to 1. This shifted-and-jammed value is returned. + * The value of 'dist' can be arbitrarily large. In particular, if 'dist' is + * greater than 32, the result will be either 0 or 1, depending on whether 'a' + * is zero or nonzero. + * + * From softfloat_shiftRightJam32() + */ +static inline +uint32_t _mesa_shift_right_jam32(uint32_t a, uint16_t dist) +{ + return + (dist < 31) ? a >> dist | ((uint32_t) (a << (-dist & 31)) != 0) : (a != 0); +} + +/** + * \brief Extracted from softfloat_roundPackToF64() + */ +static inline +double _mesa_roundtozero_f64(int64_t s, int64_t e, int64_t m) +{ + di_type result; + + if ((uint64_t) e >= 0x7fd) { + if (e < 0) { + m = _mesa_shift_right_jam64(m, -e); + e = 0; + } else if ((e > 0x7fd) || (0x8000000000000000 <= m)) { + e = 0x7ff; + m = 0; + result.u = (s << 63) + (e << 52) + m; + result.u -= 1; + return result.f; + } + } + + m >>= 10; + if (m == 0) + e = 0; + + result.u = (s << 63) + (e << 52) + m; + return result.f; +} + +/** + * \brief Extracted from softfloat_roundPackToF32() + */ +static inline +float _mesa_round_f32(int32_t s, int32_t e, int32_t m, bool rtz) +{ + fi_type result; + uint8_t round_increment = rtz ? 0 : 0x40; + + if ((uint32_t) e >= 0xfd) { + if (e < 0) { + m = _mesa_shift_right_jam32(m, -e); + e = 0; + } else if ((e > 0xfd) || (0x80000000 <= m + round_increment)) { + e = 0xff; + m = 0; + result.u = (s << 31) + (e << 23) + m; + result.u -= !round_increment; + return result.f; + } + } + + uint8_t round_bits; + round_bits = m & 0x7f; + m = ((uint32_t) m + round_increment) >> 7; + m &= ~(uint32_t) (! (round_bits ^ 0x40) & !rtz); + if (m == 0) + e = 0; + + result.u = (s << 31) + (e << 23) + m; + return result.f; +} + +/** + * \brief Extracted from softfloat_roundPackToF16() + */ +static inline +uint16_t _mesa_roundtozero_f16(int16_t s, int16_t e, int16_t m) +{ + if ((uint16_t) e >= 0x1d) { + if (e < 0) { + m = _mesa_shift_right_jam32(m, -e); + e = 0; + } else if ((e > 0x1d) || (0x8000 <= m)) { + e = 0x1f; + m = 0; + return (s << 15) + (e << 10) + m - 1; + } + } + + m >>= 4; + if (m == 0) + e = 0; + + return (s << 15) + (e << 10) + m; +} + +/** + * \brief Shifts the N-bit unsigned integer pointed to by 'a' left by the number of + * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' + * must be in the range 1 to 31. Any nonzero bits shifted off are lost. The + * shifted N-bit result is stored at the location pointed to by 'm_out'. Each + * of 'a' and 'm_out' points to a 'size_words'-long array of 32-bit elements + * that concatenate in the platform's normal endian order to form an N-bit + * integer. + * + * From softfloat_shortShiftLeftM() + */ +static inline void +_mesa_short_shift_left_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out) +{ + uint8_t neg_dist; + unsigned index, last_index; + uint32_t part_word, a_word; + + neg_dist = -dist; + index = index_word_hi(size_words); + last_index = index_word_lo(size_words); + part_word = a[index] << dist; + while (index != last_index) { + a_word = a[index - word_incr]; + m_out[index] = part_word | a_word >> (neg_dist & 31); + index -= word_incr; + part_word = a_word << dist; + } + m_out[index] = part_word; +} + +/** + * \brief Shifts the N-bit unsigned integer pointed to by 'a' left by the number of + * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' + * must not be zero. Any nonzero bits shifted off are lost. The shifted + * N-bit result is stored at the location pointed to by 'm_out'. Each of 'a' + * and 'm_out' points to a 'size_words'-long array of 32-bit elements that + * concatenate in the platform's normal endian order to form an N-bit + * integer. The value of 'dist' can be arbitrarily large. In particular, if + * 'dist' is greater than N, the stored result will be 0. + * + * From softfloat_shiftLeftM() + */ +static inline void +_mesa_shift_left_m(uint8_t size_words, const uint32_t *a, uint32_t dist, uint32_t *m_out) +{ + uint32_t word_dist; + uint8_t inner_dist; + uint8_t i; + + word_dist = dist >> 5; + if (word_dist < size_words) { + a += index_multiword_lo_but(size_words, word_dist); + inner_dist = dist & 31; + if (inner_dist) { + _mesa_short_shift_left_m(size_words - word_dist, a, inner_dist, + m_out + index_multiword_hi_but(size_words, word_dist)); + if (!word_dist) + return; + } else { + uint32_t *dest = m_out + index_word_hi(size_words); + a += index_word_hi(size_words - word_dist); + for (i = size_words - word_dist; i; --i) { + *dest = *a; + a -= word_incr; + dest -= word_incr; + } + } + m_out += index_multiword_lo(size_words, word_dist); + } else { + word_dist = size_words; + } + do { + *m_out++ = 0; + --word_dist; + } while (word_dist); +} + +/** + * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of + * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' + * must be in the range 1 to 31. Any nonzero bits shifted off are lost. The + * shifted N-bit result is stored at the location pointed to by 'm_out'. Each + * of 'a' and 'm_out' points to a 'size_words'-long array of 32-bit elements + * that concatenate in the platform's normal endian order to form an N-bit + * integer. + * + * From softfloat_shortShiftRightM() + */ +static inline void +_mesa_short_shift_right_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out) +{ + uint8_t neg_dist; + unsigned index, last_index; + uint32_t part_word, a_word; + + neg_dist = -dist; + index = index_word_lo(size_words); + last_index = index_word_hi(size_words); + part_word = a[index] >> dist; + while (index != last_index) { + a_word = a[index + word_incr]; + m_out[index] = a_word << (neg_dist & 31) | part_word; + index += word_incr; + part_word = a_word >> dist; + } + m_out[index] = part_word; +} + +/** + * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of + * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' + * must be in the range 1 to 31. If any nonzero bits are shifted off, they + * are "jammed" into the least-significant bit of the shifted value by setting + * the least-significant bit to 1. This shifted-and-jammed N-bit result is + * stored at the location pointed to by 'm_out'. Each of 'a' and 'm_out' + * points to a 'size_words'-long array of 32-bit elements that concatenate in + * the platform's normal endian order to form an N-bit integer. + * + * + * From softfloat_shortShiftRightJamM() + */ +static inline void +_mesa_short_shift_right_jam_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out) +{ + uint8_t neg_dist; + unsigned index, last_index; + uint64_t part_word, a_word; + + neg_dist = -dist; + index = index_word_lo(size_words); + last_index = index_word_hi(size_words); + a_word = a[index]; + part_word = a_word >> dist; + if (part_word << dist != a_word ) + part_word |= 1; + while (index != last_index) { + a_word = a[index + word_incr]; + m_out[index] = a_word << (neg_dist & 31) | part_word; + index += word_incr; + part_word = a_word >> dist; + } + m_out[index] = part_word; +} + +/** + * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of + * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' + * must not be zero. If any nonzero bits are shifted off, they are "jammed" + * into the least-significant bit of the shifted value by setting the + * least-significant bit to 1. This shifted-and-jammed N-bit result is stored + * at the location pointed to by 'm_out'. Each of 'a' and 'm_out' points to a + * 'size_words'-long array of 32-bit elements that concatenate in the + * platform's normal endian order to form an N-bit integer. The value of + * 'dist' can be arbitrarily large. In particular, if 'dist' is greater than + * N, the stored result will be either 0 or 1, depending on whether the + * original N bits are all zeros. + * + * From softfloat_shiftRightJamM() + */ +static inline void +_mesa_shift_right_jam_m(uint8_t size_words, const uint32_t *a, uint32_t dist, uint32_t *m_out) +{ + uint32_t word_jam, word_dist, *tmp; + uint8_t i, inner_dist; + + word_jam = 0; + word_dist = dist >> 5; + if (word_dist) { + if (size_words < word_dist) + word_dist = size_words; + tmp = (uint32_t *) (a + index_multiword_lo(size_words, word_dist)); + i = word_dist; + do { + word_jam = *tmp++; + if (word_jam) + break; + --i; + } while (i); + tmp = m_out; + } + if (word_dist < size_words) { + a += index_multiword_hi_but(size_words, word_dist); + inner_dist = dist & 31; + if (inner_dist) { + _mesa_short_shift_right_jam_m(size_words - word_dist, a, inner_dist, + m_out + index_multiword_lo_but(size_words, word_dist)); + if (!word_dist) { + if (word_jam) + m_out[index_word_lo(size_words)] |= 1; + return; + } + } else { + a += index_word_lo(size_words - word_dist); + tmp = m_out + index_word_lo(size_words); + for (i = size_words - word_dist; i; --i) { + *tmp = *a; + a += word_incr; + tmp += word_incr; + } + } + tmp = m_out + index_multiword_hi(size_words, word_dist); + } + do { + *tmp++ = 0; + --word_dist; + } while (word_dist); + if (word_jam) + m_out[index_word_lo(size_words)] |= 1; +} + +/** + * \brief Calculate a + b but rounding to zero. + * + * Notice that this mainly differs from the original Berkeley SoftFloat 3e + * implementation in that we don't really treat NaNs, Zeroes nor the + * signalling flags. Any NaN is good for us and the sign of the Zero is not + * important. + * + * From f64_add() + */ +double +_mesa_double_add_rtz(double a, double b) +{ + const di_type a_di = {a}; + uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; + uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; + uint64_t a_flt_s = (a_di.u >> 63) & 0x1; + const di_type b_di = {b}; + uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; + uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; + uint64_t b_flt_s = (b_di.u >> 63) & 0x1; + int64_t s, e, m = 0; + + s = a_flt_s; + + const int64_t exp_diff = a_flt_e - b_flt_e; + + /* Handle special cases */ + + if (a_flt_s != b_flt_s) { + return _mesa_double_sub_rtz(a, -b); + } else if ((a_flt_e == 0) && (a_flt_m == 0)) { + /* 'a' is zero, return 'b' */ + return b; + } else if ((b_flt_e == 0) && (b_flt_m == 0)) { + /* 'b' is zero, return 'a' */ + return a; + } else if (a_flt_e == 0x7ff && a_flt_m != 0) { + /* 'a' is a NaN, return NaN */ + return a; + } else if (b_flt_e == 0x7ff && b_flt_m != 0) { + /* 'b' is a NaN, return NaN */ + return b; + } else if (a_flt_e == 0x7ff && a_flt_m == 0) { + /* Inf + x = Inf */ + return a; + } else if (b_flt_e == 0x7ff && b_flt_m == 0) { + /* x + Inf = Inf */ + return b; + } else if (exp_diff == 0 && a_flt_e == 0) { + di_type result_di; + result_di.u = a_di.u + b_flt_m; + return result_di.f; + } else if (exp_diff == 0) { + e = a_flt_e; + m = 0x0020000000000000 + a_flt_m + b_flt_m; + m <<= 9; + } else if (exp_diff < 0) { + a_flt_m <<= 9; + b_flt_m <<= 9; + e = b_flt_e; + + if (a_flt_e != 0) + a_flt_m += 0x2000000000000000; + else + a_flt_m <<= 1; + + a_flt_m = _mesa_shift_right_jam64(a_flt_m, -exp_diff); + m = 0x2000000000000000 + a_flt_m + b_flt_m; + if (m < 0x4000000000000000) { + --e; + m <<= 1; + } + } else { + a_flt_m <<= 9; + b_flt_m <<= 9; + e = a_flt_e; + + if (b_flt_e != 0) + b_flt_m += 0x2000000000000000; + else + b_flt_m <<= 1; + + b_flt_m = _mesa_shift_right_jam64(b_flt_m, exp_diff); + m = 0x2000000000000000 + a_flt_m + b_flt_m; + if (m < 0x4000000000000000) { + --e; + m <<= 1; + } + } + + return _mesa_roundtozero_f64(s, e, m); +} + +/** + * \brief Returns the number of leading 0 bits before the most-significant 1 bit of + * 'a'. If 'a' is zero, 64 is returned. + */ +static inline unsigned +_mesa_count_leading_zeros64(uint64_t a) +{ + return 64 - util_last_bit64(a); +} + +/** + * \brief Returns the number of leading 0 bits before the most-significant 1 bit of + * 'a'. If 'a' is zero, 32 is returned. + */ +static inline unsigned +_mesa_count_leading_zeros32(uint32_t a) +{ + return 32 - util_last_bit(a); +} + +static inline double +_mesa_norm_round_pack_f64(int64_t s, int64_t e, int64_t m) +{ + int8_t shift_dist; + + shift_dist = _mesa_count_leading_zeros64(m) - 1; + e -= shift_dist; + if ((10 <= shift_dist) && ((unsigned) e < 0x7fd)) { + di_type result; + result.u = (s << 63) + ((m ? e : 0) << 52) + (m << (shift_dist - 10)); + return result.f; + } else { + return _mesa_roundtozero_f64(s, e, m << shift_dist); + } +} + +/** + * \brief Replaces the N-bit unsigned integer pointed to by 'm_out' by the + * 2s-complement of itself, where N = 'size_words' * 32. Argument 'm_out' + * points to a 'size_words'-long array of 32-bit elements that concatenate in + * the platform's normal endian order to form an N-bit integer. + * + * From softfloat_negXM() + */ +static inline void +_mesa_neg_x_m(uint8_t size_words, uint32_t *m_out) +{ + unsigned index, last_index; + uint8_t carry; + uint32_t word; + + index = index_word_lo(size_words); + last_index = index_word_hi(size_words); + carry = 1; + for (;;) { + word = ~m_out[index] + carry; + m_out[index] = word; + if (index == last_index) + break; + index += word_incr; + if (word) + carry = 0; + } +} + +/** + * \brief Adds the two N-bit integers pointed to by 'a' and 'b', where N = + * 'size_words' * 32. The addition is modulo 2^N, so any carry out is + * lost. The N-bit sum is stored at the location pointed to by 'm_out'. Each + * of 'a', 'b', and 'm_out' points to a 'size_words'-long array of 32-bit + * elements that concatenate in the platform's normal endian order to form an + * N-bit integer. + * + * From softfloat_addM() + */ +static inline void +_mesa_add_m(uint8_t size_words, const uint32_t *a, const uint32_t *b, uint32_t *m_out) +{ + unsigned index, last_index; + uint8_t carry; + uint32_t a_word, word; + + index = index_word_lo(size_words); + last_index = index_word_hi(size_words); + carry = 0; + for (;;) { + a_word = a[index]; + word = a_word + b[index] + carry; + m_out[index] = word; + if (index == last_index) + break; + if (word != a_word) + carry = (word < a_word); + index += word_incr; + } +} + +/** + * \brief Subtracts the two N-bit integers pointed to by 'a' and 'b', where N = + * 'size_words' * 32. The subtraction is modulo 2^N, so any borrow out (carry + * out) is lost. The N-bit difference is stored at the location pointed to by + * 'm_out'. Each of 'a', 'b', and 'm_out' points to a 'size_words'-long array + * of 32-bit elements that concatenate in the platform's normal endian order + * to form an N-bit integer. + * + * From softfloat_subM() + */ +static inline void +_mesa_sub_m(uint8_t size_words, const uint32_t *a, const uint32_t *b, uint32_t *m_out) +{ + unsigned index, last_index; + uint8_t borrow; + uint32_t a_word, b_word; + + index = index_word_lo(size_words); + last_index = index_word_hi(size_words); + borrow = 0; + for (;;) { + a_word = a[index]; + b_word = b[index]; + m_out[index] = a_word - b_word - borrow; + if (index == last_index) + break; + borrow = borrow ? (a_word <= b_word) : (a_word < b_word); + index += word_incr; + } +} + +/* Calculate a - b but rounding to zero. + * + * Notice that this mainly differs from the original Berkeley SoftFloat 3e + * implementation in that we don't really treat NaNs, Zeroes nor the + * signalling flags. Any NaN is good for us and the sign of the Zero is not + * important. + * + * From f64_sub() + */ +double +_mesa_double_sub_rtz(double a, double b) +{ + const di_type a_di = {a}; + uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; + uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; + uint64_t a_flt_s = (a_di.u >> 63) & 0x1; + const di_type b_di = {b}; + uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; + uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; + uint64_t b_flt_s = (b_di.u >> 63) & 0x1; + int64_t s, e, m = 0; + int64_t m_diff = 0; + unsigned shift_dist = 0; + + s = a_flt_s; + + const int64_t exp_diff = a_flt_e - b_flt_e; + + /* Handle special cases */ + + if (a_flt_s != b_flt_s) { + return _mesa_double_add_rtz(a, -b); + } else if ((a_flt_e == 0) && (a_flt_m == 0)) { + /* 'a' is zero, return '-b' */ + return -b; + } else if ((b_flt_e == 0) && (b_flt_m == 0)) { + /* 'b' is zero, return 'a' */ + return a; + } else if (a_flt_e == 0x7ff && a_flt_m != 0) { + /* 'a' is a NaN, return NaN */ + return a; + } else if (b_flt_e == 0x7ff && b_flt_m != 0) { + /* 'b' is a NaN, return NaN */ + return b; + } else if (a_flt_e == 0x7ff && a_flt_m == 0) { + if (b_flt_e == 0x7ff && b_flt_m == 0) { + /* Inf - Inf = NaN */ + di_type result; + e = 0x7ff; + result.u = (s << 63) + (e << 52) + 0x1; + return result.f; + } + /* Inf - x = Inf */ + return a; + } else if (b_flt_e == 0x7ff && b_flt_m == 0) { + /* x - Inf = -Inf */ + return -b; + } else if (exp_diff == 0) { + m_diff = a_flt_m - b_flt_m; + + if (m_diff == 0) + return 0; + if (a_flt_e) + --a_flt_e; + if (m_diff < 0) { + s = !s; + m_diff = -m_diff; + } + + shift_dist = _mesa_count_leading_zeros64(m_diff) - 11; + e = a_flt_e - shift_dist; + if (e < 0) { + shift_dist = a_flt_e; + e = 0; + } + + di_type result; + result.u = (s << 63) + (e << 52) + (m_diff << shift_dist); + return result.f; + } else if (exp_diff < 0) { + a_flt_m <<= 10; + b_flt_m <<= 10; + s = !s; + + a_flt_m += (a_flt_e) ? 0x4000000000000000 : a_flt_m; + a_flt_m = _mesa_shift_right_jam64(a_flt_m, -exp_diff); + b_flt_m |= 0x4000000000000000; + e = b_flt_e; + m = b_flt_m - a_flt_m; + } else { + a_flt_m <<= 10; + b_flt_m <<= 10; + + b_flt_m += (b_flt_e) ? 0x4000000000000000 : b_flt_m; + b_flt_m = _mesa_shift_right_jam64(b_flt_m, exp_diff); + a_flt_m |= 0x4000000000000000; + e = a_flt_e; + m = a_flt_m - b_flt_m; + } + + return _mesa_norm_round_pack_f64(s, e - 1, m); +} + +static inline void +_mesa_norm_subnormal_mantissa_f64(uint64_t m, uint64_t *exp, uint64_t *m_out) +{ + int shift_dist; + + shift_dist = _mesa_count_leading_zeros64(m) - 11; + *exp = 1 - shift_dist; + *m_out = m << shift_dist; +} + +static inline void +_mesa_norm_subnormal_mantissa_f32(uint32_t m, uint32_t *exp, uint32_t *m_out) +{ + int shift_dist; + + shift_dist = _mesa_count_leading_zeros32(m) - 8; + *exp = 1 - shift_dist; + *m_out = m << shift_dist; +} + +/** + * \brief Multiplies 'a' and 'b' and stores the 128-bit product at the location + * pointed to by 'zPtr'. Argument 'zPtr' points to an array of four 32-bit + * elements that concatenate in the platform's normal endian order to form a + * 128-bit integer. + * + * From softfloat_mul64To128M() + */ +static inline void +_mesa_softfloat_mul_f64_to_f128_m(uint64_t a, uint64_t b, uint32_t *m_out) +{ + uint32_t a32, a0, b32, b0; + uint64_t z0, mid1, z64, mid; + + a32 = a >> 32; + a0 = a; + b32 = b >> 32; + b0 = b; + z0 = (uint64_t) a0 * b0; + mid1 = (uint64_t) a32 * b0; + mid = mid1 + (uint64_t) a0 * b32; + z64 = (uint64_t) a32 * b32; + z64 += (uint64_t) (mid < mid1) << 32 | mid >> 32; + mid <<= 32; + z0 += mid; + m_out[index_word(4, 1)] = z0 >> 32; + m_out[index_word(4, 0)] = z0; + z64 += (z0 < mid); + m_out[index_word(4, 3)] = z64 >> 32; + m_out[index_word(4, 2)] = z64; +} + +/* Calculate a * b but rounding to zero. + * + * Notice that this mainly differs from the original Berkeley SoftFloat 3e + * implementation in that we don't really treat NaNs, Zeroes nor the + * signalling flags. Any NaN is good for us and the sign of the Zero is not + * important. + * + * From f64_mul() + */ +double +_mesa_double_mul_rtz(double a, double b) +{ + const di_type a_di = {a}; + uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; + uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; + uint64_t a_flt_s = (a_di.u >> 63) & 0x1; + const di_type b_di = {b}; + uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; + uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; + uint64_t b_flt_s = (b_di.u >> 63) & 0x1; + int64_t s, e, m = 0; + + s = a_flt_s ^ b_flt_s; + + if (a_flt_e == 0x7ff) { + if (a_flt_m != 0) { + /* 'a' is a NaN, return NaN */ + return a; + } else if (b_flt_e == 0x7ff && b_flt_m != 0) { + /* 'b' is a NaN, return NaN */ + return b; + } + + if (!(b_flt_e | b_flt_m)) { + /* Inf * 0 = NaN */ + di_type result; + e = 0x7ff; + result.u = (s << 63) + (e << 52) + 0x1; + return result.f; + } + /* Inf * x = Inf */ + di_type result; + e = 0x7ff; + result.u = (s << 63) + (e << 52) + 0; + return result.f; + } + + if (b_flt_e == 0x7ff) { + if (b_flt_m != 0) { + /* 'b' is a NaN, return NaN */ + return b; + } + if (!(a_flt_e | a_flt_m)) { + /* 0 * Inf = NaN */ + di_type result; + e = 0x7ff; + result.u = (s << 63) + (e << 52) + 0x1; + return result.f; + } + /* x * Inf = Inf */ + di_type result; + e = 0x7ff; + result.u = (s << 63) + (e << 52) + 0; + return result.f; + } + + if (a_flt_e == 0) { + if (a_flt_m == 0) { + /* 'a' is zero. Return zero */ + di_type result; + result.u = (s << 63) + 0; + return result.f; + } + _mesa_norm_subnormal_mantissa_f64(a_flt_m , &a_flt_e, &a_flt_m); + } + if (b_flt_e == 0) { + if (b_flt_m == 0) { + /* 'b' is zero. Return zero */ + di_type result; + result.u = (s << 63) + 0; + return result.f; + } + _mesa_norm_subnormal_mantissa_f64(b_flt_m , &b_flt_e, &b_flt_m); + } + + e = a_flt_e + b_flt_e - 0x3ff; + a_flt_m = (a_flt_m | 0x0010000000000000) << 10; + b_flt_m = (b_flt_m | 0x0010000000000000) << 11; + + uint32_t m_128[4]; + _mesa_softfloat_mul_f64_to_f128_m(a_flt_m, b_flt_m, m_128); + + m = (uint64_t) m_128[index_word(4, 3)] << 32 | m_128[index_word(4, 2)]; + if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) + m |= 1; + + if (m < 0x4000000000000000) { + --e; + m <<= 1; + } + + return _mesa_roundtozero_f64(s, e, m); +} + + +/** + * \brief Calculate a * b + c but rounding to zero. + * + * Notice that this mainly differs from the original Berkeley SoftFloat 3e + * implementation in that we don't really treat NaNs, Zeroes nor the + * signalling flags. Any NaN is good for us and the sign of the Zero is not + * important. + * + * From f64_mulAdd() + */ +double +_mesa_double_fma_rtz(double a, double b, double c) +{ + const di_type a_di = {a}; + uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; + uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; + uint64_t a_flt_s = (a_di.u >> 63) & 0x1; + const di_type b_di = {b}; + uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; + uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; + uint64_t b_flt_s = (b_di.u >> 63) & 0x1; + const di_type c_di = {c}; + uint64_t c_flt_m = c_di.u & 0x0fffffffffffff; + uint64_t c_flt_e = (c_di.u >> 52) & 0x7ff; + uint64_t c_flt_s = (c_di.u >> 63) & 0x1; + int64_t s, e, m = 0; + + c_flt_s ^= 0; + s = a_flt_s ^ b_flt_s ^ 0; + + if (a_flt_e == 0x7ff) { + if (a_flt_m != 0) { + /* 'a' is a NaN, return NaN */ + return a; + } else if (b_flt_e == 0x7ff && b_flt_m != 0) { + /* 'b' is a NaN, return NaN */ + return b; + } else if (c_flt_e == 0x7ff && c_flt_m != 0) { + /* 'c' is a NaN, return NaN */ + return c; + } + + if (!(b_flt_e | b_flt_m)) { + /* Inf * 0 + y = NaN */ + di_type result; + e = 0x7ff; + result.u = (s << 63) + (e << 52) + 0x1; + return result.f; + } + + if ((c_flt_e == 0x7ff && c_flt_m == 0) && (s != c_flt_s)) { + /* Inf * x - Inf = NaN */ + di_type result; + e = 0x7ff; + result.u = (s << 63) + (e << 52) + 0x1; + return result.f; + } + + /* Inf * x + y = Inf */ + di_type result; + e = 0x7ff; + result.u = (s << 63) + (e << 52) + 0; + return result.f; + } + + if (b_flt_e == 0x7ff) { + if (b_flt_m != 0) { + /* 'b' is a NaN, return NaN */ + return b; + } else if (c_flt_e == 0x7ff && c_flt_m != 0) { + /* 'c' is a NaN, return NaN */ + return c; + } + + if (!(a_flt_e | a_flt_m)) { + /* 0 * Inf + y = NaN */ + di_type result; + e = 0x7ff; + result.u = (s << 63) + (e << 52) + 0x1; + return result.f; + } + + if ((c_flt_e == 0x7ff && c_flt_m == 0) && (s != c_flt_s)) { + /* x * Inf - Inf = NaN */ + di_type result; + e = 0x7ff; + result.u = (s << 63) + (e << 52) + 0x1; + return result.f; + } + + /* x * Inf + y = Inf */ + di_type result; + e = 0x7ff; + result.u = (s << 63) + (e << 52) + 0; + return result.f; + } + + if (c_flt_e == 0x7ff) { + if (c_flt_m != 0) { + /* 'c' is a NaN, return NaN */ + return c; + } + + /* x * y + Inf = Inf */ + return c; + } + + if (a_flt_e == 0) { + if (a_flt_m == 0) { + /* 'a' is zero, return 'c' */ + return c; + } + _mesa_norm_subnormal_mantissa_f64(a_flt_m , &a_flt_e, &a_flt_m); + } + + if (b_flt_e == 0) { + if (b_flt_m == 0) { + /* 'b' is zero, return 'c' */ + return c; + } + _mesa_norm_subnormal_mantissa_f64(b_flt_m , &b_flt_e, &b_flt_m); + } + + e = a_flt_e + b_flt_e - 0x3fe; + a_flt_m = (a_flt_m | 0x0010000000000000) << 10; + b_flt_m = (b_flt_m | 0x0010000000000000) << 11; + + uint32_t m_128[4]; + _mesa_softfloat_mul_f64_to_f128_m(a_flt_m, b_flt_m, m_128); + + m = (uint64_t) m_128[index_word(4, 3)] << 32 | m_128[index_word(4, 2)]; + + int64_t shift_dist = 0; + if (!(m & 0x4000000000000000)) { + --e; + shift_dist = -1; + } + + if (c_flt_e == 0) { + if (c_flt_m == 0) { + /* 'c' is zero, return 'a * b' */ + if (shift_dist) + m <<= 1; + + if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) + m |= 1; + return _mesa_roundtozero_f64(s, e - 1, m); + } + _mesa_norm_subnormal_mantissa_f64(c_flt_m , &c_flt_e, &c_flt_m); + } + c_flt_m = (c_flt_m | 0x0010000000000000) << 10; + + uint32_t c_flt_m_128[4]; + int64_t exp_diff = e - c_flt_e; + if (exp_diff < 0) { + e = c_flt_e; + if ((s == c_flt_s) || (exp_diff < -1)) { + shift_dist -= exp_diff; + if (shift_dist) { + m = _mesa_shift_right_jam64(m, shift_dist); + } + } else { + if (!shift_dist) { + _mesa_short_shift_right_m(4, m_128, 1, m_128); + } + } + } else { + if (shift_dist) + _mesa_add_m(4, m_128, m_128, m_128); + if (!exp_diff) { + m = (uint64_t) m_128[index_word(4, 3)] << 32 + | m_128[index_word(4, 2)]; + } else { + c_flt_m_128[index_word(4, 3)] = c_flt_m >> 32; + c_flt_m_128[index_word(4, 2)] = c_flt_m; + c_flt_m_128[index_word(4, 1)] = 0; + c_flt_m_128[index_word(4, 0)] = 0; + _mesa_shift_right_jam_m(4, c_flt_m_128, exp_diff, c_flt_m_128); + } + } + + if (s == c_flt_s) { + if (exp_diff <= 0) { + m += c_flt_m; + } else { + _mesa_add_m(4, m_128, c_flt_m_128, m_128); + m = (uint64_t) m_128[index_word(4, 3)] << 32 + | m_128[index_word(4, 2)]; + } + if (m & 0x8000000000000000) { + e++; + m = _mesa_short_shift_right_jam64(m, 1); + } + } else { + if (exp_diff < 0) { + s = c_flt_s; + if (exp_diff < -1) { + m = c_flt_m - m; + if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) { + m = (m - 1) | 1; + } + if (!(m & 0x4000000000000000)) { + --e; + m <<= 1; + } + return _mesa_roundtozero_f64(s, e - 1, m); + } else { + c_flt_m_128[index_word(4, 3)] = c_flt_m >> 32; + c_flt_m_128[index_word(4, 2)] = c_flt_m; + c_flt_m_128[index_word(4, 1)] = 0; + c_flt_m_128[index_word(4, 0)] = 0; + _mesa_sub_m(4, c_flt_m_128, m_128, m_128); + } + } else if (!exp_diff) { + m -= c_flt_m; + if (!m && !m_128[index_word(4, 1)] && !m_128[index_word(4, 0)]) { + /* Return zero */ + di_type result; + result.u = (s << 63) + 0; + return result.f; + } + m_128[index_word(4, 3)] = m >> 32; + m_128[index_word(4, 2)] = m; + if (m & 0x8000000000000000) { + s = !s; + _mesa_neg_x_m(4, m_128); + } + } else { + _mesa_sub_m(4, m_128, c_flt_m_128, m_128); + if (1 < exp_diff) { + m = (uint64_t) m_128[index_word(4, 3)] << 32 + | m_128[index_word(4, 2)]; + if (!(m & 0x4000000000000000)) { + --e; + m <<= 1; + } + if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) + m |= 1; + return _mesa_roundtozero_f64(s, e - 1, m); + } + } + + shift_dist = 0; + m = (uint64_t) m_128[index_word(4, 3)] << 32 + | m_128[index_word(4, 2)]; + if (!m) { + shift_dist = 64; + m = (uint64_t) m_128[index_word(4, 1)] << 32 + | m_128[index_word(4, 0)]; + } + shift_dist += _mesa_count_leading_zeros64(m) - 1; + if (shift_dist) { + e -= shift_dist; + _mesa_shift_left_m(4, m_128, shift_dist, m_128); + m = (uint64_t) m_128[index_word(4, 3)] << 32 + | m_128[index_word(4, 2)]; + } + } + + if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) + m |= 1; + return _mesa_roundtozero_f64(s, e - 1, m); +} + + +/** + * \brief Calculate a * b + c but rounding to zero. + * + * Notice that this mainly differs from the original Berkeley SoftFloat 3e + * implementation in that we don't really treat NaNs, Zeroes nor the + * signalling flags. Any NaN is good for us and the sign of the Zero is not + * important. + * + * From f32_mulAdd() + */ +float +_mesa_float_fma_rtz(float a, float b, float c) +{ + const fi_type a_fi = {a}; + uint32_t a_flt_m = a_fi.u & 0x07fffff; + uint32_t a_flt_e = (a_fi.u >> 23) & 0xff; + uint32_t a_flt_s = (a_fi.u >> 31) & 0x1; + const fi_type b_fi = {b}; + uint32_t b_flt_m = b_fi.u & 0x07fffff; + uint32_t b_flt_e = (b_fi.u >> 23) & 0xff; + uint32_t b_flt_s = (b_fi.u >> 31) & 0x1; + const fi_type c_fi = {c}; + uint32_t c_flt_m = c_fi.u & 0x07fffff; + uint32_t c_flt_e = (c_fi.u >> 23) & 0xff; + uint32_t c_flt_s = (c_fi.u >> 31) & 0x1; + int32_t s, e, m = 0; + + c_flt_s ^= 0; + s = a_flt_s ^ b_flt_s ^ 0; + + if (a_flt_e == 0xff) { + if (a_flt_m != 0) { + /* 'a' is a NaN, return NaN */ + return a; + } else if (b_flt_e == 0xff && b_flt_m != 0) { + /* 'b' is a NaN, return NaN */ + return b; + } else if (c_flt_e == 0xff && c_flt_m != 0) { + /* 'c' is a NaN, return NaN */ + return c; + } + + if (!(b_flt_e | b_flt_m)) { + /* Inf * 0 + y = NaN */ + fi_type result; + e = 0xff; + result.u = (s << 31) + (e << 23) + 0x1; + return result.f; + } + + if ((c_flt_e == 0xff && c_flt_m == 0) && (s != c_flt_s)) { + /* Inf * x - Inf = NaN */ + fi_type result; + e = 0xff; + result.u = (s << 31) + (e << 23) + 0x1; + return result.f; + } + + /* Inf * x + y = Inf */ + fi_type result; + e = 0xff; + result.u = (s << 31) + (e << 23) + 0; + return result.f; + } + + if (b_flt_e == 0xff) { + if (b_flt_m != 0) { + /* 'b' is a NaN, return NaN */ + return b; + } else if (c_flt_e == 0xff && c_flt_m != 0) { + /* 'c' is a NaN, return NaN */ + return c; + } + + if (!(a_flt_e | a_flt_m)) { + /* 0 * Inf + y = NaN */ + fi_type result; + e = 0xff; + result.u = (s << 31) + (e << 23) + 0x1; + return result.f; + } + + if ((c_flt_e == 0xff && c_flt_m == 0) && (s != c_flt_s)) { + /* x * Inf - Inf = NaN */ + fi_type result; + e = 0xff; + result.u = (s << 31) + (e << 23) + 0x1; + return result.f; + } + + /* x * Inf + y = Inf */ + fi_type result; + e = 0xff; + result.u = (s << 31) + (e << 23) + 0; + return result.f; + } + + if (c_flt_e == 0xff) { + if (c_flt_m != 0) { + /* 'c' is a NaN, return NaN */ + return c; + } + + /* x * y + Inf = Inf */ + return c; + } + + if (a_flt_e == 0) { + if (a_flt_m == 0) { + /* 'a' is zero, return 'c' */ + return c; + } + _mesa_norm_subnormal_mantissa_f32(a_flt_m , &a_flt_e, &a_flt_m); + } + + if (b_flt_e == 0) { + if (b_flt_m == 0) { + /* 'b' is zero, return 'c' */ + return c; + } + _mesa_norm_subnormal_mantissa_f32(b_flt_m , &b_flt_e, &b_flt_m); + } + + e = a_flt_e + b_flt_e - 0x7e; + a_flt_m = (a_flt_m | 0x00800000) << 7; + b_flt_m = (b_flt_m | 0x00800000) << 7; + + uint64_t m_64 = (uint64_t) a_flt_m * b_flt_m; + if (m_64 < 0x2000000000000000) { + --e; + m_64 <<= 1; + } + + if (c_flt_e == 0) { + if (c_flt_m == 0) { + /* 'c' is zero, return 'a * b' */ + m = _mesa_short_shift_right_jam64(m_64, 31); + return _mesa_round_f32(s, e - 1, m, true); + } + _mesa_norm_subnormal_mantissa_f32(c_flt_m , &c_flt_e, &c_flt_m); + } + c_flt_m = (c_flt_m | 0x00800000) << 6; + + int16_t exp_diff = e - c_flt_e; + if (s == c_flt_s) { + if (exp_diff <= 0) { + e = c_flt_e; + m = c_flt_m + _mesa_shift_right_jam64(m_64, 32 - exp_diff); + } else { + m_64 += _mesa_shift_right_jam64((uint64_t) c_flt_m << 32, exp_diff); + m = _mesa_short_shift_right_jam64(m_64, 32); + } + if (m < 0x40000000) { + --e; + m <<= 1; + } + } else { + uint64_t c_flt_m_64 = (uint64_t) c_flt_m << 32; + if (exp_diff < 0) { + s = c_flt_s; + e = c_flt_e; + m_64 = c_flt_m_64 - _mesa_shift_right_jam64(m_64, -exp_diff); + } else if (!exp_diff) { + m_64 -= c_flt_m_64; + if (!m_64) { + /* Return zero */ + fi_type result; + result.u = (s << 31) + 0; + return result.f; + } + if (m_64 & 0x8000000000000000) { + s = !s; + m_64 = -m_64; + } + } else { + m_64 -= _mesa_shift_right_jam64(c_flt_m_64, exp_diff); + } + int8_t shift_dist = _mesa_count_leading_zeros64(m_64) - 1; + e -= shift_dist; + shift_dist -= 32; + if (shift_dist < 0) { + m = _mesa_short_shift_right_jam64(m_64, -shift_dist); + } else { + m = (uint32_t) m_64 << shift_dist; + } + } + + return _mesa_round_f32(s, e, m, true); +} + + +/** + * \brief Converts from 64bits to 32bits float and rounds according to + * instructed. + * + * From f64_to_f32() + */ +float +_mesa_double_to_f32(double val, bool rtz) +{ + const di_type di = {val}; + uint64_t flt_m = di.u & 0x0fffffffffffff; + uint64_t flt_e = (di.u >> 52) & 0x7ff; + uint64_t flt_s = (di.u >> 63) & 0x1; + int32_t s, e, m = 0; + + s = flt_s; + + if (flt_e == 0x7ff) { + if (flt_m != 0) { + /* 'val' is a NaN, return NaN */ + fi_type result; + e = 0xff; + m = 0x1; + result.u = (s << 31) + (e << 23) + m; + return result.f; + } + + /* 'val' is Inf, return Inf */ + fi_type result; + e = 0xff; + result.u = (s << 31) + (e << 23) + m; + return result.f; + } + + if (!(flt_e | flt_m)) { + /* 'val' is zero, return zero */ + fi_type result; + e = 0; + result.u = (s << 31) + (e << 23) + m; + return result.f; + } + + m = _mesa_short_shift_right_jam64(flt_m, 22); + if ( ! (flt_e | m) ) { + /* 'val' is denorm, return zero */ + fi_type result; + e = 0; + result.u = (s << 31) + (e << 23) + m; + return result.f; + } + + return _mesa_round_f32(s, flt_e - 0x381, m | 0x40000000, rtz); +} + + +/** + * \brief Converts from 32bits to 16bits float and rounds the result to zero. + * + * From f32_to_f16() + */ +uint16_t +_mesa_float_to_half_rtz(float val) +{ + const fi_type fi = {val}; + const uint32_t flt_m = fi.u & 0x7fffff; + const uint32_t flt_e = (fi.u >> 23) & 0xff; + const uint32_t flt_s = (fi.u >> 31) & 0x1; + int16_t s, e, m = 0; + + s = flt_s; + + if (flt_e == 0xff) { + if (flt_m != 0) { + /* 'val' is a NaN, return NaN */ + e = 0x1f; + m = 0x1; + return (s << 15) + (e << 10) + m; + } + + /* 'val' is Inf, return Inf */ + e = 0x1f; + return (s << 15) + (e << 10) + m; + } + + if (!(flt_e | flt_m)) { + /* 'val' is zero, return zero */ + e = 0; + return (s << 15) + (e << 10) + m; + } + + m = flt_m >> 9 | ((flt_m & 0x1ff) != 0); + if ( ! (flt_e | m) ) { + /* 'val' is denorm, return zero */ + e = 0; + return (s << 15) + (e << 10) + m; + } + + return _mesa_roundtozero_f16(s, flt_e - 0x71, m | 0x4000); +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/softfloat.h b/third_party/rust/glslopt/glsl-optimizer/src/util/softfloat.h new file mode 100644 index 0000000000..4e48c6548b --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/softfloat.h @@ -0,0 +1,65 @@ +/* + * License for Berkeley SoftFloat Release 3e + * + * John R. Hauser + * 2018 January 20 + * + * The following applies to the whole of SoftFloat Release 3e as well as to + * each source file individually. + * + * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the + * University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions, and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * The functions listed in this file are modified versions of the ones + * from the Berkeley SoftFloat 3e Library. + */ + +#ifndef _SOFTFLOAT_H_ +#define _SOFTFLOAT_H_ + +#include <stdbool.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +double _mesa_double_add_rtz(double a, double b); +double _mesa_double_sub_rtz(double a, double b); +double _mesa_double_mul_rtz(double a, double b); +double _mesa_double_fma_rtz(double a, double b, double c); +float _mesa_float_fma_rtz(float a, float b, float c); +float _mesa_double_to_f32(double x, bool rtz); +uint16_t _mesa_float_to_half_rtz(float x); + +#ifdef __cplusplus +} /* extern C */ +#endif + +#endif /* _SOFTFLOAT_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/string_buffer.c b/third_party/rust/glslopt/glsl-optimizer/src/util/string_buffer.c new file mode 100644 index 0000000000..31ebe3cb7e --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/string_buffer.c @@ -0,0 +1,148 @@ +/* + * Copyright © 2017 Thomas Helland + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "string_buffer.h" + +static bool +ensure_capacity(struct _mesa_string_buffer *str, uint32_t needed_capacity) +{ + if (needed_capacity <= str->capacity) + return true; + + /* Too small, double until we can fit the new string */ + uint32_t new_capacity = str->capacity * 2; + while (needed_capacity > new_capacity) + new_capacity *= 2; + + str->buf = reralloc_array_size(str, str->buf, sizeof(char), new_capacity); + if (str->buf == NULL) + return false; + + str->capacity = new_capacity; + return true; +} + +struct _mesa_string_buffer * +_mesa_string_buffer_create(void *mem_ctx, uint32_t initial_capacity) +{ + struct _mesa_string_buffer *str; + str = ralloc(mem_ctx, struct _mesa_string_buffer); + + if (str == NULL) + return NULL; + + /* If no initial capacity is set then set it to something */ + str->capacity = initial_capacity ? initial_capacity : 32; + str->buf = ralloc_array(str, char, str->capacity); + + if (!str->buf) { + ralloc_free(str); + return NULL; + } + + str->length = 0; + str->buf[str->length] = '\0'; + return str; +} + +bool +_mesa_string_buffer_append_all(struct _mesa_string_buffer *str, + uint32_t num_args, ...) +{ + int i; + char* s; + va_list args; + va_start(args, num_args); + for (i = 0; i < num_args; i++) { + s = va_arg(args, char*); + if (!_mesa_string_buffer_append_len(str, s, strlen(s))) { + va_end(args); + return false; + } + } + va_end(args); + return true; +} + +bool +_mesa_string_buffer_append_len(struct _mesa_string_buffer *str, + const char *c, uint32_t len) +{ + uint32_t needed_length = str->length + len + 1; + + /* Check if we're overflowing uint32_t */ + if (needed_length < str->length) + return false; + + if (!ensure_capacity(str, needed_length)) + return false; + + memcpy(str->buf + str->length, c, len); + str->length += len; + str->buf[str->length] = '\0'; + return true; +} + +bool +_mesa_string_buffer_vprintf(struct _mesa_string_buffer *str, + const char *format, va_list args) +{ + /* We're looping two times to avoid duplicating code */ + for (uint32_t i = 0; i < 2; i++) { + va_list arg_copy; + va_copy(arg_copy, args); + uint32_t space_left = str->capacity - str->length; + + int32_t len = vsnprintf(str->buf + str->length, + space_left, format, arg_copy); + va_end(arg_copy); + + /* Error in vsnprintf() or measured len overflows size_t */ + if (unlikely(len < 0 || str->length + len + 1 < str->length)) + return false; + + /* There was enough space for the string; we're done */ + if (len < space_left) { + str->length += len; + return true; + } + + /* Not enough space, resize and retry */ + ensure_capacity(str, str->length + len + 1); + } + + return false; +} + +bool +_mesa_string_buffer_printf(struct _mesa_string_buffer *str, + const char *format, ...) +{ + bool res; + va_list args; + va_start(args, format); + res = _mesa_string_buffer_vprintf(str, format, args); + va_end(args); + return res; +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/string_buffer.h b/third_party/rust/glslopt/glsl-optimizer/src/util/string_buffer.h new file mode 100644 index 0000000000..eaaf5f33df --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/string_buffer.h @@ -0,0 +1,104 @@ +/* + * Copyright © 2017 Thomas Helland + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ +#ifndef _STRING_BUFFER_H +#define _STRING_BUFFER_H + +#include "ralloc.h" +#include "u_string.h" +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct _mesa_string_buffer { + char *buf; + uint32_t length; + uint32_t capacity; +}; + +struct _mesa_string_buffer * +_mesa_string_buffer_create(void *mem_ctx, uint32_t initial_capacity); + +static inline void +_mesa_string_buffer_destroy(struct _mesa_string_buffer *str) +{ + ralloc_free(str); +} + +bool +_mesa_string_buffer_append_all(struct _mesa_string_buffer *str, + uint32_t num_args, ...); +bool +_mesa_string_buffer_append_len(struct _mesa_string_buffer *str, + const char *c, uint32_t len); + +static inline bool +_mesa_string_buffer_append_char(struct _mesa_string_buffer *str, char c) +{ + return _mesa_string_buffer_append_len(str, &c, 1); +} + +static inline bool +_mesa_string_buffer_append(struct _mesa_string_buffer *str, const char *c) +{ + return _mesa_string_buffer_append_len(str, c, strlen(c)); +} + +static inline void +_mesa_string_buffer_clear(struct _mesa_string_buffer *str) +{ + str->length = 0; + str->buf[str->length] = '\0'; +} + +static inline void +_mesa_string_buffer_crimp_to_fit(struct _mesa_string_buffer *str) +{ + char *crimped = + (char *) reralloc_array_size(str, str->buf, sizeof(char), + str->capacity); + if (!crimped) + return; + + str->capacity = str->length + 1; + str->buf = crimped; +} + +bool +_mesa_string_buffer_vprintf(struct _mesa_string_buffer *str, + const char *format, va_list args); + +bool +_mesa_string_buffer_printf(struct _mesa_string_buffer *str, + const char *format, ...); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* _STRING_BUFFER_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/strndup.h b/third_party/rust/glslopt/glsl-optimizer/src/util/strndup.h new file mode 100644 index 0000000000..dcaa429dbe --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/strndup.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef STRNDUP_H +#define STRNDUP_H + +#if defined(_WIN32) + +#include <stdlib.h> // size_t +#include <string.h> + +#ifdef __cplusplus +extern "C" { +#endif + +static inline char * +strndup(const char *str, size_t max) +{ + size_t n; + char *ptr; + + if (!str) + return NULL; + + n = strnlen(str, max); + ptr = (char *) calloc(n + 1, sizeof(char)); + if (!ptr) + return NULL; + + memcpy(ptr, str, n); + return ptr; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _WIN32 */ + +#endif /* STRNDUP_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/strtod.c b/third_party/rust/glslopt/glsl-optimizer/src/util/strtod.c new file mode 100644 index 0000000000..de695d64b4 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/strtod.c @@ -0,0 +1,85 @@ +/* + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include <stdlib.h> + +#if defined(_GNU_SOURCE) && defined(HAVE_STRTOD_L) +#include <locale.h> +#ifdef HAVE_XLOCALE_H +#include <xlocale.h> +#endif +static locale_t loc; +#endif + +#include "strtod.h" + + +void +_mesa_locale_init(void) +{ +#if defined(_GNU_SOURCE) && defined(HAVE_STRTOD_L) + loc = newlocale(LC_CTYPE_MASK, "C", NULL); +#endif +} + +void +_mesa_locale_fini(void) +{ +#if defined(_GNU_SOURCE) && defined(HAVE_STRTOD_L) + freelocale(loc); +#endif +} + +/** + * Wrapper around strtod which uses the "C" locale so the decimal + * point is always '.' + */ +double +_mesa_strtod(const char *s, char **end) +{ +#if defined(_GNU_SOURCE) && defined(HAVE_STRTOD_L) + return strtod_l(s, end, loc); +#else + return strtod(s, end); +#endif +} + + +/** + * Wrapper around strtof which uses the "C" locale so the decimal + * point is always '.' + */ +float +_mesa_strtof(const char *s, char **end) +{ +#if defined(_GNU_SOURCE) && defined(HAVE_STRTOD_L) + return strtof_l(s, end, loc); +#elif defined(HAVE_STRTOF) + return strtof(s, end); +#else + return (float) strtod(s, end); +#endif +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/strtod.h b/third_party/rust/glslopt/glsl-optimizer/src/util/strtod.h new file mode 100644 index 0000000000..60e15cfa0e --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/strtod.h @@ -0,0 +1,52 @@ +/* + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef STRTOD_H +#define STRTOD_H + +#ifdef __cplusplus +extern "C" { +#endif + +extern void +_mesa_locale_init(void); + +extern void +_mesa_locale_fini(void); + +extern double +_mesa_strtod(const char *s, char **end); + +extern float +_mesa_strtof(const char *s, char **end); + + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/u_atomic.h b/third_party/rust/glslopt/glsl-optimizer/src/util/u_atomic.h new file mode 100644 index 0000000000..db56835e9d --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/u_atomic.h @@ -0,0 +1,268 @@ +/** + * Many similar implementations exist. See for example libwsbm + * or the linux kernel include/atomic.h + * + * No copyright claimed on this file. + * + */ + +#include "no_extern_c.h" + +#ifndef U_ATOMIC_H +#define U_ATOMIC_H + +#include <stdbool.h> +#include <stdint.h> + +/* Favor OS-provided implementations. + * + * Where no OS-provided implementation is available, fall back to + * locally coded assembly, compiler intrinsic or ultimately a + * mutex-based implementation. + */ +#if defined(__sun) +#define PIPE_ATOMIC_OS_SOLARIS +#elif defined(_MSC_VER) +#define PIPE_ATOMIC_MSVC_INTRINSIC +#elif defined(__GNUC__) +#define PIPE_ATOMIC_GCC_INTRINSIC +#else +#error "Unsupported platform" +#endif + + +/* Implementation using GCC-provided synchronization intrinsics + */ +#if defined(PIPE_ATOMIC_GCC_INTRINSIC) + +#define PIPE_ATOMIC "GCC Sync Intrinsics" + +#if defined(USE_GCC_ATOMIC_BUILTINS) + +/* The builtins with explicit memory model are available since GCC 4.7. */ +#define p_atomic_set(_v, _i) __atomic_store_n((_v), (_i), __ATOMIC_RELEASE) +#define p_atomic_read(_v) __atomic_load_n((_v), __ATOMIC_ACQUIRE) +#define p_atomic_dec_zero(v) (__atomic_sub_fetch((v), 1, __ATOMIC_ACQ_REL) == 0) +#define p_atomic_inc(v) (void) __atomic_add_fetch((v), 1, __ATOMIC_ACQ_REL) +#define p_atomic_dec(v) (void) __atomic_sub_fetch((v), 1, __ATOMIC_ACQ_REL) +#define p_atomic_add(v, i) (void) __atomic_add_fetch((v), (i), __ATOMIC_ACQ_REL) +#define p_atomic_inc_return(v) __atomic_add_fetch((v), 1, __ATOMIC_ACQ_REL) +#define p_atomic_dec_return(v) __atomic_sub_fetch((v), 1, __ATOMIC_ACQ_REL) +#define p_atomic_add_return(v, i) __atomic_add_fetch((v), (i), __ATOMIC_ACQ_REL) +#define p_atomic_xchg(v, i) __atomic_exchange_n((v), (i), __ATOMIC_ACQ_REL) +#define PIPE_NATIVE_ATOMIC_XCHG + +#else + +#define p_atomic_set(_v, _i) (*(_v) = (_i)) +#define p_atomic_read(_v) (*(_v)) +#define p_atomic_dec_zero(v) (__sync_sub_and_fetch((v), 1) == 0) +#define p_atomic_inc(v) (void) __sync_add_and_fetch((v), 1) +#define p_atomic_dec(v) (void) __sync_sub_and_fetch((v), 1) +#define p_atomic_add(v, i) (void) __sync_add_and_fetch((v), (i)) +#define p_atomic_inc_return(v) __sync_add_and_fetch((v), 1) +#define p_atomic_dec_return(v) __sync_sub_and_fetch((v), 1) +#define p_atomic_add_return(v, i) __sync_add_and_fetch((v), (i)) + +#endif + +/* There is no __atomic_* compare and exchange that returns the current value. + * Also, GCC 5.4 seems unable to optimize a compound statement expression that + * uses an additional stack variable with __atomic_compare_exchange[_n]. + */ +#define p_atomic_cmpxchg(v, old, _new) \ + __sync_val_compare_and_swap((v), (old), (_new)) + +#endif + + + +/* Unlocked version for single threaded environments, such as some + * windows kernel modules. + */ +#if defined(PIPE_ATOMIC_OS_UNLOCKED) + +#define PIPE_ATOMIC "Unlocked" + +#define p_atomic_set(_v, _i) (*(_v) = (_i)) +#define p_atomic_read(_v) (*(_v)) +#define p_atomic_dec_zero(_v) (p_atomic_dec_return(_v) == 0) +#define p_atomic_inc(_v) ((void) p_atomic_inc_return(_v)) +#define p_atomic_dec(_v) ((void) p_atomic_dec_return(_v)) +#define p_atomic_add(_v, _i) ((void) p_atomic_add_return((_v), (_i))) +#define p_atomic_inc_return(_v) (++(*(_v))) +#define p_atomic_dec_return(_v) (--(*(_v))) +#define p_atomic_add_return(_v, _i) (*(_v) = *(_v) + (_i)) +#define p_atomic_cmpxchg(_v, _old, _new) (*(_v) == (_old) ? (*(_v) = (_new), (_old)) : *(_v)) + +#endif + + +#if defined(PIPE_ATOMIC_MSVC_INTRINSIC) + +#define PIPE_ATOMIC "MSVC Intrinsics" + +/* We use the Windows header's Interlocked*64 functions instead of the + * _Interlocked*64 intrinsics wherever we can, as support for the latter varies + * with target CPU, whereas Windows headers take care of all portability + * issues: using intrinsics where available, falling back to library + * implementations where not. + */ +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif +#include <windows.h> +#include <intrin.h> +#include <assert.h> + +/* MSVC supports decltype keyword, but it's only supported on C++ and doesn't + * quite work here; and if a C++-only solution is worthwhile, then it would be + * better to use templates / function overloading, instead of decltype magic. + * Therefore, we rely on implicit casting to LONGLONG for the functions that return + */ + +#define p_atomic_set(_v, _i) (*(_v) = (_i)) +#define p_atomic_read(_v) (*(_v)) + +#define p_atomic_dec_zero(_v) \ + (p_atomic_dec_return(_v) == 0) + +#define p_atomic_inc(_v) \ + ((void) p_atomic_inc_return(_v)) + +#define p_atomic_inc_return(_v) (\ + sizeof *(_v) == sizeof(short) ? _InterlockedIncrement16((short *) (_v)) : \ + sizeof *(_v) == sizeof(long) ? _InterlockedIncrement ((long *) (_v)) : \ + sizeof *(_v) == sizeof(__int64) ? InterlockedIncrement64 ((__int64 *)(_v)) : \ + (assert(!"should not get here"), 0)) + +#define p_atomic_dec(_v) \ + ((void) p_atomic_dec_return(_v)) + +#define p_atomic_dec_return(_v) (\ + sizeof *(_v) == sizeof(short) ? _InterlockedDecrement16((short *) (_v)) : \ + sizeof *(_v) == sizeof(long) ? _InterlockedDecrement ((long *) (_v)) : \ + sizeof *(_v) == sizeof(__int64) ? InterlockedDecrement64 ((__int64 *)(_v)) : \ + (assert(!"should not get here"), 0)) + +#define p_atomic_add(_v, _i) \ + ((void) p_atomic_add_return((_v), (_i))) + +#define p_atomic_add_return(_v, _i) (\ + sizeof *(_v) == sizeof(char) ? _InterlockedExchangeAdd8 ((char *) (_v), (_i)) : \ + sizeof *(_v) == sizeof(short) ? _InterlockedExchangeAdd16((short *) (_v), (_i)) : \ + sizeof *(_v) == sizeof(long) ? _InterlockedExchangeAdd ((long *) (_v), (_i)) : \ + sizeof *(_v) == sizeof(__int64) ? InterlockedExchangeAdd64((__int64 *)(_v), (_i)) : \ + (assert(!"should not get here"), 0)) + +#define p_atomic_cmpxchg(_v, _old, _new) (\ + sizeof *(_v) == sizeof(char) ? _InterlockedCompareExchange8 ((char *) (_v), (char) (_new), (char) (_old)) : \ + sizeof *(_v) == sizeof(short) ? _InterlockedCompareExchange16((short *) (_v), (short) (_new), (short) (_old)) : \ + sizeof *(_v) == sizeof(long) ? _InterlockedCompareExchange ((long *) (_v), (long) (_new), (long) (_old)) : \ + sizeof *(_v) == sizeof(__int64) ? InterlockedCompareExchange64 ((__int64 *)(_v), (__int64)(_new), (__int64)(_old)) : \ + (assert(!"should not get here"), 0)) + +#endif + +#if defined(PIPE_ATOMIC_OS_SOLARIS) + +#define PIPE_ATOMIC "Solaris OS atomic functions" + +#include <atomic.h> +#include <assert.h> + +#define p_atomic_set(_v, _i) (*(_v) = (_i)) +#define p_atomic_read(_v) (*(_v)) + +#define p_atomic_dec_zero(v) (\ + sizeof(*v) == sizeof(uint8_t) ? atomic_dec_8_nv ((uint8_t *)(v)) == 0 : \ + sizeof(*v) == sizeof(uint16_t) ? atomic_dec_16_nv((uint16_t *)(v)) == 0 : \ + sizeof(*v) == sizeof(uint32_t) ? atomic_dec_32_nv((uint32_t *)(v)) == 0 : \ + sizeof(*v) == sizeof(uint64_t) ? atomic_dec_64_nv((uint64_t *)(v)) == 0 : \ + (assert(!"should not get here"), 0)) + +#define p_atomic_inc(v) (void) (\ + sizeof(*v) == sizeof(uint8_t) ? atomic_inc_8 ((uint8_t *)(v)) : \ + sizeof(*v) == sizeof(uint16_t) ? atomic_inc_16((uint16_t *)(v)) : \ + sizeof(*v) == sizeof(uint32_t) ? atomic_inc_32((uint32_t *)(v)) : \ + sizeof(*v) == sizeof(uint64_t) ? atomic_inc_64((uint64_t *)(v)) : \ + (assert(!"should not get here"), 0)) + +#define p_atomic_inc_return(v) (__typeof(*v))( \ + sizeof(*v) == sizeof(uint8_t) ? atomic_inc_8_nv ((uint8_t *)(v)) : \ + sizeof(*v) == sizeof(uint16_t) ? atomic_inc_16_nv((uint16_t *)(v)) : \ + sizeof(*v) == sizeof(uint32_t) ? atomic_inc_32_nv((uint32_t *)(v)) : \ + sizeof(*v) == sizeof(uint64_t) ? atomic_inc_64_nv((uint64_t *)(v)) : \ + (assert(!"should not get here"), 0)) + +#define p_atomic_dec(v) (void) ( \ + sizeof(*v) == sizeof(uint8_t) ? atomic_dec_8 ((uint8_t *)(v)) : \ + sizeof(*v) == sizeof(uint16_t) ? atomic_dec_16((uint16_t *)(v)) : \ + sizeof(*v) == sizeof(uint32_t) ? atomic_dec_32((uint32_t *)(v)) : \ + sizeof(*v) == sizeof(uint64_t) ? atomic_dec_64((uint64_t *)(v)) : \ + (assert(!"should not get here"), 0)) + +#define p_atomic_dec_return(v) (__typeof(*v))( \ + sizeof(*v) == sizeof(uint8_t) ? atomic_dec_8_nv ((uint8_t *)(v)) : \ + sizeof(*v) == sizeof(uint16_t) ? atomic_dec_16_nv((uint16_t *)(v)) : \ + sizeof(*v) == sizeof(uint32_t) ? atomic_dec_32_nv((uint32_t *)(v)) : \ + sizeof(*v) == sizeof(uint64_t) ? atomic_dec_64_nv((uint64_t *)(v)) : \ + (assert(!"should not get here"), 0)) + +#define p_atomic_add(v, i) (void) ( \ + sizeof(*v) == sizeof(uint8_t) ? atomic_add_8 ((uint8_t *)(v), (i)) : \ + sizeof(*v) == sizeof(uint16_t) ? atomic_add_16((uint16_t *)(v), (i)) : \ + sizeof(*v) == sizeof(uint32_t) ? atomic_add_32((uint32_t *)(v), (i)) : \ + sizeof(*v) == sizeof(uint64_t) ? atomic_add_64((uint64_t *)(v), (i)) : \ + (assert(!"should not get here"), 0)) + +#define p_atomic_add_return(v, i) (__typeof(*v)) ( \ + sizeof(*v) == sizeof(uint8_t) ? atomic_add_8_nv ((uint8_t *)(v), (i)) : \ + sizeof(*v) == sizeof(uint16_t) ? atomic_add_16_nv((uint16_t *)(v), (i)) : \ + sizeof(*v) == sizeof(uint32_t) ? atomic_add_32_nv((uint32_t *)(v), (i)) : \ + sizeof(*v) == sizeof(uint64_t) ? atomic_add_64_nv((uint64_t *)(v), (i)) : \ + (assert(!"should not get here"), 0)) + +#define p_atomic_cmpxchg(v, old, _new) (__typeof(*v))( \ + sizeof(*v) == sizeof(uint8_t) ? atomic_cas_8 ((uint8_t *)(v), (uint8_t )(old), (uint8_t )(_new)) : \ + sizeof(*v) == sizeof(uint16_t) ? atomic_cas_16((uint16_t *)(v), (uint16_t)(old), (uint16_t)(_new)) : \ + sizeof(*v) == sizeof(uint32_t) ? atomic_cas_32((uint32_t *)(v), (uint32_t)(old), (uint32_t)(_new)) : \ + sizeof(*v) == sizeof(uint64_t) ? atomic_cas_64((uint64_t *)(v), (uint64_t)(old), (uint64_t)(_new)) : \ + (assert(!"should not get here"), 0)) + +#endif + +#ifndef PIPE_ATOMIC +#error "No pipe_atomic implementation selected" +#endif + +#ifndef PIPE_NATIVE_ATOMIC_XCHG +static inline uint32_t p_atomic_xchg_32(uint32_t *v, uint32_t i) +{ + uint32_t actual = p_atomic_read(v); + uint32_t expected; + do { + expected = actual; + actual = p_atomic_cmpxchg(v, expected, i); + } while (expected != actual); + return actual; +} + +static inline uint64_t p_atomic_xchg_64(uint64_t *v, uint64_t i) +{ + uint64_t actual = p_atomic_read(v); + uint64_t expected; + do { + expected = actual; + actual = p_atomic_cmpxchg(v, expected, i); + } while (expected != actual); + return actual; +} + +#define p_atomic_xchg(v, i) (__typeof(*(v)))( \ + sizeof(*(v)) == sizeof(uint32_t) ? p_atomic_xchg_32((uint32_t *)(v), (uint32_t)(i)) : \ + sizeof(*(v)) == sizeof(uint64_t) ? p_atomic_xchg_64((uint64_t *)(v), (uint64_t)(i)) : \ + (assert(!"should not get here"), 0)) +#endif + +#endif /* U_ATOMIC_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/u_debug.c b/third_party/rust/glslopt/glsl-optimizer/src/util/u_debug.c new file mode 100644 index 0000000000..e1c8265965 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/u_debug.c @@ -0,0 +1,440 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * Copyright (c) 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_config.h" + +#include "util/u_debug.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_math.h" +#include <inttypes.h> + +#include <stdio.h> +#include <limits.h> /* CHAR_BIT */ +#include <ctype.h> /* isalnum */ + +#ifdef _WIN32 +#include <windows.h> +#include <stdlib.h> +#endif + + +void +_debug_vprintf(const char *format, va_list ap) +{ + static char buf[4096] = {'\0'}; +#if DETECT_OS_WINDOWS || defined(EMBEDDED_DEVICE) + /* We buffer until we find a newline. */ + size_t len = strlen(buf); + int ret = vsnprintf(buf + len, sizeof(buf) - len, format, ap); + if (ret > (int)(sizeof(buf) - len - 1) || strchr(buf + len, '\n')) { + os_log_message(buf); + buf[0] = '\0'; + } +#else + vsnprintf(buf, sizeof(buf), format, ap); + os_log_message(buf); +#endif +} + + +void +_pipe_debug_message(struct pipe_debug_callback *cb, + unsigned *id, + enum pipe_debug_type type, + const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + if (cb && cb->debug_message) + cb->debug_message(cb->data, id, type, fmt, args); + va_end(args); +} + + +void +debug_disable_error_message_boxes(void) +{ +#ifdef _WIN32 + /* When Windows' error message boxes are disabled for this process (as is + * typically the case when running tests in an automated fashion) we disable + * CRT message boxes too. + */ + UINT uMode = SetErrorMode(0); + SetErrorMode(uMode); + if (uMode & SEM_FAILCRITICALERRORS) { + /* Disable assertion failure message box. + * http://msdn.microsoft.com/en-us/library/sas1dkb2.aspx + */ + _set_error_mode(_OUT_TO_STDERR); +#ifdef _MSC_VER + /* Disable abort message box. + * http://msdn.microsoft.com/en-us/library/e631wekh.aspx + */ + _set_abort_behavior(0, _WRITE_ABORT_MSG | _CALL_REPORTFAULT); +#endif + } +#endif /* _WIN32 */ +} + + +#ifdef DEBUG +void +debug_print_blob(const char *name, const void *blob, unsigned size) +{ + const unsigned *ublob = (const unsigned *)blob; + unsigned i; + + debug_printf("%s (%d dwords%s)\n", name, size/4, + size%4 ? "... plus a few bytes" : ""); + + for (i = 0; i < size/4; i++) { + debug_printf("%d:\t%08x\n", i, ublob[i]); + } +} +#endif + + +static bool +debug_get_option_should_print(void) +{ + static bool first = true; + static bool value = false; + + if (!first) + return value; + + /* Oh hey this will call into this function, + * but its cool since we set first to false + */ + first = false; + value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", false); + /* XXX should we print this option? Currently it wont */ + return value; +} + + +const char * +debug_get_option(const char *name, const char *dfault) +{ + const char *result; + + result = os_get_option(name); + if (!result) + result = dfault; + + if (debug_get_option_should_print()) + debug_printf("%s: %s = %s\n", __FUNCTION__, name, + result ? result : "(null)"); + + return result; +} + + +bool +debug_get_bool_option(const char *name, bool dfault) +{ + const char *str = os_get_option(name); + bool result; + + if (str == NULL) + result = dfault; + else if (!strcmp(str, "n")) + result = false; + else if (!strcmp(str, "no")) + result = false; + else if (!strcmp(str, "0")) + result = false; + else if (!strcmp(str, "f")) + result = false; + else if (!strcmp(str, "F")) + result = false; + else if (!strcmp(str, "false")) + result = false; + else if (!strcmp(str, "FALSE")) + result = false; + else + result = true; + + if (debug_get_option_should_print()) + debug_printf("%s: %s = %s\n", __FUNCTION__, name, + result ? "TRUE" : "FALSE"); + + return result; +} + + +long +debug_get_num_option(const char *name, long dfault) +{ + long result; + const char *str; + + str = os_get_option(name); + if (!str) { + result = dfault; + } else { + char *endptr; + + result = strtol(str, &endptr, 0); + if (str == endptr) { + /* Restore the default value when no digits were found. */ + result = dfault; + } + } + + if (debug_get_option_should_print()) + debug_printf("%s: %s = %li\n", __FUNCTION__, name, result); + + return result; +} + + +static bool +str_has_option(const char *str, const char *name) +{ + /* Empty string. */ + if (!*str) { + return false; + } + + /* OPTION=all */ + if (!strcmp(str, "all")) { + return true; + } + + /* Find 'name' in 'str' surrounded by non-alphanumeric characters. */ + { + const char *start = str; + unsigned name_len = strlen(name); + + /* 'start' is the beginning of the currently-parsed word, + * we increment 'str' each iteration. + * if we find either the end of string or a non-alphanumeric character, + * we compare 'start' up to 'str-1' with 'name'. */ + + while (1) { + if (!*str || !(isalnum(*str) || *str == '_')) { + if (str-start == name_len && + !memcmp(start, name, name_len)) { + return true; + } + + if (!*str) { + return false; + } + + start = str+1; + } + + str++; + } + } + + return false; +} + + +uint64_t +debug_get_flags_option(const char *name, + const struct debug_named_value *flags, + uint64_t dfault) +{ + uint64_t result; + const char *str; + const struct debug_named_value *orig = flags; + unsigned namealign = 0; + + str = os_get_option(name); + if (!str) + result = dfault; + else if (!strcmp(str, "help")) { + result = dfault; + _debug_printf("%s: help for %s:\n", __FUNCTION__, name); + for (; flags->name; ++flags) + namealign = MAX2(namealign, strlen(flags->name)); + for (flags = orig; flags->name; ++flags) + _debug_printf("| %*s [0x%0*"PRIx64"]%s%s\n", namealign, flags->name, + (int)sizeof(uint64_t)*CHAR_BIT/4, flags->value, + flags->desc ? " " : "", flags->desc ? flags->desc : ""); + } + else { + result = 0; + while (flags->name) { + if (str_has_option(str, flags->name)) + result |= flags->value; + ++flags; + } + } + + if (debug_get_option_should_print()) { + if (str) { + debug_printf("%s: %s = 0x%"PRIx64" (%s)\n", + __FUNCTION__, name, result, str); + } else { + debug_printf("%s: %s = 0x%"PRIx64"\n", __FUNCTION__, name, result); + } + } + + return result; +} + + +void +_debug_assert_fail(const char *expr, const char *file, unsigned line, + const char *function) +{ + _debug_printf("%s:%u:%s: Assertion `%s' failed.\n", + file, line, function, expr); + os_abort(); +} + + +const char * +debug_dump_enum(const struct debug_named_value *names, + unsigned long value) +{ + static char rest[64]; + + while (names->name) { + if (names->value == value) + return names->name; + ++names; + } + + snprintf(rest, sizeof(rest), "0x%08lx", value); + return rest; +} + + +const char * +debug_dump_enum_noprefix(const struct debug_named_value *names, + const char *prefix, + unsigned long value) +{ + static char rest[64]; + + while (names->name) { + if (names->value == value) { + const char *name = names->name; + while (*name == *prefix) { + name++; + prefix++; + } + return name; + } + ++names; + } + + snprintf(rest, sizeof(rest), "0x%08lx", value); + return rest; +} + + +const char * +debug_dump_flags(const struct debug_named_value *names, unsigned long value) +{ + static char output[4096]; + static char rest[256]; + int first = 1; + + output[0] = '\0'; + + while (names->name) { + if ((names->value & value) == names->value) { + if (!first) + strncat(output, "|", sizeof(output) - strlen(output) - 1); + else + first = 0; + strncat(output, names->name, sizeof(output) - strlen(output) - 1); + output[sizeof(output) - 1] = '\0'; + value &= ~names->value; + } + ++names; + } + + if (value) { + if (!first) + strncat(output, "|", sizeof(output) - strlen(output) - 1); + else + first = 0; + + snprintf(rest, sizeof(rest), "0x%08lx", value); + strncat(output, rest, sizeof(output) - strlen(output) - 1); + output[sizeof(output) - 1] = '\0'; + } + + if (first) + return "0"; + + return output; +} + + + +#ifdef DEBUG +int fl_indent = 0; +const char* fl_function[1024]; + +int +debug_funclog_enter(const char* f, UNUSED const int line, + UNUSED const char* file) +{ + int i; + + for (i = 0; i < fl_indent; i++) + debug_printf(" "); + debug_printf("%s\n", f); + + assert(fl_indent < 1023); + fl_function[fl_indent++] = f; + + return 0; +} + +void +debug_funclog_exit(const char* f, UNUSED const int line, + UNUSED const char* file) +{ + --fl_indent; + assert(fl_indent >= 0); + assert(fl_function[fl_indent] == f); +} + +void +debug_funclog_enter_exit(const char* f, UNUSED const int line, + UNUSED const char* file) +{ + int i; + for (i = 0; i < fl_indent; i++) + debug_printf(" "); + debug_printf("%s\n", f); +} +#endif diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/u_debug.h b/third_party/rust/glslopt/glsl-optimizer/src/util/u_debug.h new file mode 100644 index 0000000000..a9e44bf24e --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/u_debug.h @@ -0,0 +1,460 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Cross-platform debugging helpers. + * + * For now it just has assert and printf replacements, but it might be extended + * with stack trace reports and more advanced logging in the near future. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#ifndef U_DEBUG_H_ +#define U_DEBUG_H_ + +#include <stdarg.h> +#include <string.h> +#include "util/os_misc.h" +#include "util/detect_os.h" +#include "util/macros.h" + +#if DETECT_OS_HAIKU +/* Haiku provides debug_printf in libroot with OS.h */ +#include <OS.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +#if defined(__GNUC__) +#define _util_printf_format(fmt, list) __attribute__ ((format (printf, fmt, list))) +#else +#define _util_printf_format(fmt, list) +#endif + +void _debug_vprintf(const char *format, va_list ap); + + +static inline void +_debug_printf(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + _debug_vprintf(format, ap); + va_end(ap); +} + + +/** + * Print debug messages. + * + * The actual channel used to output debug message is platform specific. To + * avoid misformating or truncation, follow these rules of thumb: + * - output whole lines + * - avoid outputing large strings (512 bytes is the current maximum length + * that is guaranteed to be printed in all platforms) + */ +#if !DETECT_OS_HAIKU +static inline void +debug_printf(const char *format, ...) _util_printf_format(1,2); + +static inline void +debug_printf(const char *format, ...) +{ +#ifdef DEBUG + va_list ap; + va_start(ap, format); + _debug_vprintf(format, ap); + va_end(ap); +#else + (void) format; /* silence warning */ +#endif +} +#endif + + +/* + * ... isn't portable so we need to pass arguments in parentheses. + * + * usage: + * debug_printf_once(("answer: %i\n", 42)); + */ +#define debug_printf_once(args) \ + do { \ + static bool once = true; \ + if (once) { \ + once = false; \ + debug_printf args; \ + } \ + } while (0) + + +#ifdef DEBUG +#define debug_vprintf(_format, _ap) _debug_vprintf(_format, _ap) +#else +#define debug_vprintf(_format, _ap) ((void)0) +#endif + + +#ifdef DEBUG +/** + * Dump a blob in hex to the same place that debug_printf sends its + * messages. + */ +void debug_print_blob( const char *name, const void *blob, unsigned size ); +#else +#define debug_print_blob(_name, _blob, _size) ((void)0) +#endif + + +/** + * Disable interactive error message boxes. + * + * Should be called as soon as possible for effectiveness. + */ +void +debug_disable_error_message_boxes(void); + + +/** + * Hard-coded breakpoint. + */ +#ifdef DEBUG +#define debug_break() os_break() +#else /* !DEBUG */ +#define debug_break() ((void)0) +#endif /* !DEBUG */ + + +long +debug_get_num_option(const char *name, long dfault); + +#ifdef _MSC_VER +__declspec(noreturn) +#endif +void _debug_assert_fail(const char *expr, + const char *file, + unsigned line, + const char *function) +#if defined(__GNUC__) && !defined(DEBUG) + __attribute__((noreturn)) +#endif +; + + +/** + * Assert macro + * + * Do not expect that the assert call terminates -- errors must be handled + * regardless of assert behavior. + * + * For non debug builds the assert macro will expand to a no-op, so do not + * call functions with side effects in the assert expression. + */ +#ifndef NDEBUG +#define debug_assert(expr) ((expr) ? (void)0 : _debug_assert_fail(#expr, __FILE__, __LINE__, __FUNCTION__)) +#else +#define debug_assert(expr) (void)(0 && (expr)) +#endif + + +/** Override standard assert macro */ +#ifdef assert +#undef assert +#endif +#define assert(expr) debug_assert(expr) + + +/** + * Output the current function name. + */ +#ifdef DEBUG +#define debug_checkpoint() \ + _debug_printf("%s\n", __FUNCTION__) +#else +#define debug_checkpoint() \ + ((void)0) +#endif + + +/** + * Output the full source code position. + */ +#ifdef DEBUG +#define debug_checkpoint_full() \ + _debug_printf("%s:%u:%s\n", __FILE__, __LINE__, __FUNCTION__) +#else +#define debug_checkpoint_full() \ + ((void)0) +#endif + + +/** + * Output a warning message. Muted on release version. + */ +#ifdef DEBUG +#define debug_warning(__msg) \ + _debug_printf("%s:%u:%s: warning: %s\n", __FILE__, __LINE__, __FUNCTION__, __msg) +#else +#define debug_warning(__msg) \ + ((void)0) +#endif + + +/** + * Emit a warning message, but only once. + */ +#ifdef DEBUG +#define debug_warn_once(__msg) \ + do { \ + static bool warned = false; \ + if (!warned) { \ + _debug_printf("%s:%u:%s: one time warning: %s\n", \ + __FILE__, __LINE__, __FUNCTION__, __msg); \ + warned = true; \ + } \ + } while (0) +#else +#define debug_warn_once(__msg) \ + ((void)0) +#endif + + +/** + * Output an error message. Not muted on release version. + */ +#ifdef DEBUG +#define debug_error(__msg) \ + _debug_printf("%s:%u:%s: error: %s\n", __FILE__, __LINE__, __FUNCTION__, __msg) +#else +#define debug_error(__msg) \ + _debug_printf("error: %s\n", __msg) +#endif + +/** + * Output a debug log message to the debug info callback. + */ +#define pipe_debug_message(cb, type, fmt, ...) do { \ + static unsigned id = 0; \ + if ((cb) && (cb)->debug_message) { \ + _pipe_debug_message(cb, &id, \ + PIPE_DEBUG_TYPE_ ## type, \ + fmt, ##__VA_ARGS__); \ + } \ +} while (0) + +struct pipe_debug_callback; + +void +_pipe_debug_message( + struct pipe_debug_callback *cb, + unsigned *id, + enum pipe_debug_type type, + const char *fmt, ...) _util_printf_format(4, 5); + + +/** + * Used by debug_dump_enum and debug_dump_flags to describe symbols. + */ +struct debug_named_value +{ + const char *name; + uint64_t value; + const char *desc; +}; + + +/** + * Some C pre-processor magic to simplify creating named values. + * + * Example: + * @code + * static const debug_named_value my_names[] = { + * DEBUG_NAMED_VALUE(MY_ENUM_VALUE_X), + * DEBUG_NAMED_VALUE(MY_ENUM_VALUE_Y), + * DEBUG_NAMED_VALUE(MY_ENUM_VALUE_Z), + * DEBUG_NAMED_VALUE_END + * }; + * + * ... + * debug_printf("%s = %s\n", + * name, + * debug_dump_enum(my_names, my_value)); + * ... + * @endcode + */ +#define DEBUG_NAMED_VALUE(__symbol) {#__symbol, (unsigned long)__symbol, NULL} +#define DEBUG_NAMED_VALUE_WITH_DESCRIPTION(__symbol, __desc) {#__symbol, (unsigned long)__symbol, __desc} +#define DEBUG_NAMED_VALUE_END {NULL, 0, NULL} + + +/** + * Convert a enum value to a string. + */ +const char * +debug_dump_enum(const struct debug_named_value *names, + unsigned long value); + +const char * +debug_dump_enum_noprefix(const struct debug_named_value *names, + const char *prefix, + unsigned long value); + + +/** + * Convert binary flags value to a string. + */ +const char * +debug_dump_flags(const struct debug_named_value *names, + unsigned long value); + + +/** + * Function enter exit loggers + */ +#ifdef DEBUG +int debug_funclog_enter(const char* f, const int line, const char* file); +void debug_funclog_exit(const char* f, const int line, const char* file); +void debug_funclog_enter_exit(const char* f, const int line, const char* file); + +#define DEBUG_FUNCLOG_ENTER() \ + int __debug_decleration_work_around = \ + debug_funclog_enter(__FUNCTION__, __LINE__, __FILE__) +#define DEBUG_FUNCLOG_EXIT() \ + do { \ + (void)__debug_decleration_work_around; \ + debug_funclog_exit(__FUNCTION__, __LINE__, __FILE__); \ + return; \ + } while(0) +#define DEBUG_FUNCLOG_EXIT_RET(ret) \ + do { \ + (void)__debug_decleration_work_around; \ + debug_funclog_exit(__FUNCTION__, __LINE__, __FILE__); \ + return ret; \ + } while(0) +#define DEBUG_FUNCLOG_ENTER_EXIT() \ + debug_funclog_enter_exit(__FUNCTION__, __LINE__, __FILE__) + +#else +#define DEBUG_FUNCLOG_ENTER() \ + int __debug_decleration_work_around +#define DEBUG_FUNCLOG_EXIT() \ + do { (void)__debug_decleration_work_around; return; } while(0) +#define DEBUG_FUNCLOG_EXIT_RET(ret) \ + do { (void)__debug_decleration_work_around; return ret; } while(0) +#define DEBUG_FUNCLOG_ENTER_EXIT() +#endif + + +/** + * Get option. + * + * It is an alias for getenv on Linux. + * + * On Windows it reads C:\gallium.cfg, which is a text file with CR+LF line + * endings with one option per line as + * + * NAME=value + * + * This file must be terminated with an extra empty line. + */ +const char * +debug_get_option(const char *name, const char *dfault); + +bool +debug_get_bool_option(const char *name, bool dfault); + +long +debug_get_num_option(const char *name, long dfault); + +uint64_t +debug_get_flags_option(const char *name, + const struct debug_named_value *flags, + uint64_t dfault); + +#define DEBUG_GET_ONCE_OPTION(suffix, name, dfault) \ +static const char * \ +debug_get_option_ ## suffix (void) \ +{ \ + static bool first = true; \ + static const char * value; \ + if (first) { \ + first = false; \ + value = debug_get_option(name, dfault); \ + } \ + return value; \ +} + +#define DEBUG_GET_ONCE_BOOL_OPTION(sufix, name, dfault) \ +static bool \ +debug_get_option_ ## sufix (void) \ +{ \ + static bool first = true; \ + static bool value; \ + if (first) { \ + first = false; \ + value = debug_get_bool_option(name, dfault); \ + } \ + return value; \ +} + +#define DEBUG_GET_ONCE_NUM_OPTION(sufix, name, dfault) \ +static long \ +debug_get_option_ ## sufix (void) \ +{ \ + static bool first = true; \ + static long value; \ + if (first) { \ + first = false; \ + value = debug_get_num_option(name, dfault); \ + } \ + return value; \ +} + +#define DEBUG_GET_ONCE_FLAGS_OPTION(sufix, name, flags, dfault) \ +static unsigned long \ +debug_get_option_ ## sufix (void) \ +{ \ + static bool first = true; \ + static unsigned long value; \ + if (first) { \ + first = false; \ + value = debug_get_flags_option(name, flags, dfault); \ + } \ + return value; \ +} + + +#ifdef __cplusplus +} +#endif + +#endif /* U_DEBUG_H_ */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/u_dynarray.h b/third_party/rust/glslopt/glsl-optimizer/src/util/u_dynarray.h new file mode 100644 index 0000000000..000feaa834 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/u_dynarray.h @@ -0,0 +1,214 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_DYNARRAY_H +#define U_DYNARRAY_H + +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include "ralloc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* A zero-initialized version of this is guaranteed to represent an + * empty array. + * + * Also, size <= capacity and data != 0 if and only if capacity != 0 + * capacity will always be the allocation size of data + */ +struct util_dynarray +{ + void *mem_ctx; + void *data; + unsigned size; + unsigned capacity; +}; + +static inline void +util_dynarray_init(struct util_dynarray *buf, void *mem_ctx) +{ + memset(buf, 0, sizeof(*buf)); + buf->mem_ctx = mem_ctx; +} + +static inline void +util_dynarray_fini(struct util_dynarray *buf) +{ + if (buf->data) { + if (buf->mem_ctx) { + ralloc_free(buf->data); + } else { + free(buf->data); + } + util_dynarray_init(buf, buf->mem_ctx); + } +} + +static inline void +util_dynarray_clear(struct util_dynarray *buf) +{ + buf->size = 0; +} + +#define DYN_ARRAY_INITIAL_SIZE 64 + +MUST_CHECK static inline void * +util_dynarray_ensure_cap(struct util_dynarray *buf, unsigned newcap) +{ + if (newcap > buf->capacity) { + unsigned capacity = MAX3(DYN_ARRAY_INITIAL_SIZE, buf->capacity * 2, newcap); + void *data; + + if (buf->mem_ctx) { + data = reralloc_size(buf->mem_ctx, buf->data, capacity); + } else { + data = realloc(buf->data, capacity); + } + if (!data) + return 0; + + buf->data = data; + buf->capacity = capacity; + } + + return (void *)((char *)buf->data + buf->size); +} + +/* use util_dynarray_trim to reduce the allocated storage */ +MUST_CHECK static inline void * +util_dynarray_resize_bytes(struct util_dynarray *buf, unsigned nelts, size_t eltsize) +{ + if (unlikely(nelts > UINT_MAX / eltsize)) + return 0; + + unsigned newsize = nelts * eltsize; + void *p = util_dynarray_ensure_cap(buf, newsize); + if (!p) + return 0; + + buf->size = newsize; + + return p; +} + +static inline void +util_dynarray_clone(struct util_dynarray *buf, void *mem_ctx, + struct util_dynarray *from_buf) +{ + util_dynarray_init(buf, mem_ctx); + if (util_dynarray_resize_bytes(buf, from_buf->size, 1)) + memcpy(buf->data, from_buf->data, from_buf->size); +} + +MUST_CHECK static inline void * +util_dynarray_grow_bytes(struct util_dynarray *buf, unsigned ngrow, size_t eltsize) +{ + unsigned growbytes = ngrow * eltsize; + + if (unlikely(ngrow > (UINT_MAX / eltsize) || + growbytes > UINT_MAX - buf->size)) + return 0; + + unsigned newsize = buf->size + growbytes; + void *p = util_dynarray_ensure_cap(buf, newsize); + if (!p) + return 0; + + buf->size = newsize; + + return p; +} + +static inline void +util_dynarray_trim(struct util_dynarray *buf) +{ + if (buf->size != buf->capacity) { + if (buf->size) { + if (buf->mem_ctx) { + buf->data = reralloc_size(buf->mem_ctx, buf->data, buf->size); + } else { + buf->data = realloc(buf->data, buf->size); + } + buf->capacity = buf->size; + } else { + if (buf->mem_ctx) { + ralloc_free(buf->data); + } else { + free(buf->data); + } + buf->data = NULL; + buf->capacity = 0; + } + } +} + +#define util_dynarray_append(buf, type, v) do {type __v = (v); memcpy(util_dynarray_grow_bytes((buf), 1, sizeof(type)), &__v, sizeof(type));} while(0) +/* Returns a pointer to the space of the first new element (in case of growth) or NULL on failure. */ +#define util_dynarray_resize(buf, type, nelts) util_dynarray_resize_bytes(buf, (nelts), sizeof(type)) +#define util_dynarray_grow(buf, type, ngrow) util_dynarray_grow_bytes(buf, (ngrow), sizeof(type)) +#define util_dynarray_top_ptr(buf, type) (type*)((char*)(buf)->data + (buf)->size - sizeof(type)) +#define util_dynarray_top(buf, type) *util_dynarray_top_ptr(buf, type) +#define util_dynarray_pop_ptr(buf, type) (type*)((char*)(buf)->data + ((buf)->size -= sizeof(type))) +#define util_dynarray_pop(buf, type) *util_dynarray_pop_ptr(buf, type) +#define util_dynarray_contains(buf, type) ((buf)->size >= sizeof(type)) +#define util_dynarray_element(buf, type, idx) ((type*)(buf)->data + (idx)) +#define util_dynarray_begin(buf) ((buf)->data) +#define util_dynarray_end(buf) ((void*)util_dynarray_element((buf), char, (buf)->size)) +#define util_dynarray_num_elements(buf, type) ((buf)->size / sizeof(type)) + +#define util_dynarray_foreach(buf, type, elem) \ + for (type *elem = (type *)(buf)->data; \ + elem < (type *)((char *)(buf)->data + (buf)->size); elem++) + +#define util_dynarray_foreach_reverse(buf, type, elem) \ + if ((buf)->size > 0) \ + for (type *elem = util_dynarray_top_ptr(buf, type); \ + elem; \ + elem = elem > (type *)(buf)->data ? elem - 1 : NULL) + +#define util_dynarray_delete_unordered(buf, type, v) \ + do { \ + unsigned num_elements = (buf)->size / sizeof(type); \ + unsigned i; \ + for (i = 0; i < num_elements; i++) { \ + type __v = *util_dynarray_element((buf), type, (i)); \ + if (v == __v) { \ + memcpy(util_dynarray_element((buf), type, (i)), \ + util_dynarray_pop_ptr((buf), type), sizeof(type)); \ + break; \ + } \ + } \ + } while (0) + +#ifdef __cplusplus +} +#endif + +#endif /* U_DYNARRAY_H */ + diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/u_endian.h b/third_party/rust/glslopt/glsl-optimizer/src/util/u_endian.h new file mode 100644 index 0000000000..6bbae3c444 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/u_endian.h @@ -0,0 +1,89 @@ +/************************************************************************** + * + * Copyright 2007-2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#ifndef U_ENDIAN_H +#define U_ENDIAN_H + +#ifdef HAVE_ENDIAN_H +#include <endian.h> + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define UTIL_ARCH_LITTLE_ENDIAN 1 +# define UTIL_ARCH_BIG_ENDIAN 0 +#elif __BYTE_ORDER == __BIG_ENDIAN +# define UTIL_ARCH_LITTLE_ENDIAN 0 +# define UTIL_ARCH_BIG_ENDIAN 1 +#endif + +#elif defined(__APPLE__) +#include <machine/endian.h> + +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN +# define UTIL_ARCH_LITTLE_ENDIAN 1 +# define UTIL_ARCH_BIG_ENDIAN 0 +#elif __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN +# define UTIL_ARCH_LITTLE_ENDIAN 0 +# define UTIL_ARCH_BIG_ENDIAN 1 +#endif + +#elif defined(__sun) +#include <sys/isa_defs.h> + +#if defined(_LITTLE_ENDIAN) +# define UTIL_ARCH_LITTLE_ENDIAN 1 +# define UTIL_ARCH_BIG_ENDIAN 0 +#elif defined(_BIG_ENDIAN) +# define UTIL_ARCH_LITTLE_ENDIAN 0 +# define UTIL_ARCH_BIG_ENDIAN 1 +#endif + +#elif defined(__OpenBSD__) || defined(__NetBSD__) || \ + defined(__FreeBSD__) || defined(__DragonFly__) +#include <sys/types.h> +#include <machine/endian.h> + +#if _BYTE_ORDER == _LITTLE_ENDIAN +# define UTIL_ARCH_LITTLE_ENDIAN 1 +# define UTIL_ARCH_BIG_ENDIAN 0 +#elif _BYTE_ORDER == _BIG_ENDIAN +# define UTIL_ARCH_LITTLE_ENDIAN 0 +# define UTIL_ARCH_BIG_ENDIAN 1 +#endif + +#elif defined(_WIN32) || defined(ANDROID) + +#define UTIL_ARCH_LITTLE_ENDIAN 1 +#define UTIL_ARCH_BIG_ENDIAN 0 + +#endif + +#if !defined(UTIL_ARCH_LITTLE_ENDIAN) || !defined(UTIL_ARCH_BIG_ENDIAN) +# error "UTIL_ARCH_LITTLE_ENDIAN and/or UTIL_ARCH_BIG_ENDIAN were unset." +#elif UTIL_ARCH_LITTLE_ENDIAN == UTIL_ARCH_BIG_ENDIAN +# error "UTIL_ARCH_LITTLE_ENDIAN and UTIL_ARCH_BIG_ENDIAN must not both be 1 or 0." +#endif + +#endif diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/u_math.c b/third_party/rust/glslopt/glsl-optimizer/src/util/u_math.c new file mode 100644 index 0000000000..9a8a9ecbbd --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/u_math.c @@ -0,0 +1,139 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#include "pipe/p_config.h" +#include "util/u_math.h" +#include "util/u_cpu_detect.h" + +#if defined(PIPE_ARCH_SSE) +#include <xmmintrin.h> +/* This is defined in pmmintrin.h, but it can only be included when -msse3 is + * used, so just define it here to avoid further. */ +#ifndef _MM_DENORMALS_ZERO_MASK +#define _MM_DENORMALS_ZERO_MASK 0x0040 +#endif +#endif + + +/** 2^x, for x in [-1.0, 1.0) */ +float pow2_table[POW2_TABLE_SIZE]; + + +static void +init_pow2_table(void) +{ + int i; + for (i = 0; i < POW2_TABLE_SIZE; i++) + pow2_table[i] = exp2f((i - POW2_TABLE_OFFSET) / POW2_TABLE_SCALE); +} + + +/** log2(x), for x in [1.0, 2.0) */ +float log2_table[LOG2_TABLE_SIZE]; + + +static void +init_log2_table(void) +{ + unsigned i; + for (i = 0; i < LOG2_TABLE_SIZE; i++) + log2_table[i] = (float) log2(1.0 + i * (1.0 / LOG2_TABLE_SCALE)); +} + + +/** + * One time init for math utilities. + */ +void +util_init_math(void) +{ + static bool initialized = false; + if (!initialized) { + init_pow2_table(); + init_log2_table(); + initialized = true; + } +} + +/** + * Fetches the contents of the fpstate (mxcsr on x86) register. + * + * On platforms without support for it just returns 0. + */ +unsigned +util_fpstate_get(void) +{ + unsigned mxcsr = 0; + +#if defined(PIPE_ARCH_SSE) + if (util_cpu_caps.has_sse) { + mxcsr = _mm_getcsr(); + } +#endif + + return mxcsr; +} + +/** + * Make sure that the fp treats the denormalized floating + * point numbers as zero. + * + * This is the behavior required by D3D10. OpenGL doesn't care. + */ +unsigned +util_fpstate_set_denorms_to_zero(unsigned current_mxcsr) +{ +#if defined(PIPE_ARCH_SSE) + if (util_cpu_caps.has_sse) { + /* Enable flush to zero mode */ + current_mxcsr |= _MM_FLUSH_ZERO_MASK; + if (util_cpu_caps.has_daz) { + /* Enable denormals are zero mode */ + current_mxcsr |= _MM_DENORMALS_ZERO_MASK; + } + util_fpstate_set(current_mxcsr); + } +#endif + return current_mxcsr; +} + +/** + * Set the state of the fpstate (mxcsr on x86) register. + * + * On platforms without support for it's a noop. + */ +void +util_fpstate_set(unsigned mxcsr) +{ +#if defined(PIPE_ARCH_SSE) + if (util_cpu_caps.has_sse) { + _mm_setcsr(mxcsr); + } +#endif +} diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/u_math.h b/third_party/rust/glslopt/glsl-optimizer/src/util/u_math.h new file mode 100644 index 0000000000..59266c1692 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/u_math.h @@ -0,0 +1,828 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Math utilities and approximations for common math functions. + * Reduced precision is usually acceptable in shaders... + * + * "fast" is used in the names of functions which are low-precision, + * or at least lower-precision than the normal C lib functions. + */ + + +#ifndef U_MATH_H +#define U_MATH_H + + +#include "c99_math.h" +#include <assert.h> +#include <float.h> +#include <stdarg.h> + +#include "bitscan.h" +#include "u_endian.h" /* for UTIL_ARCH_BIG_ENDIAN */ + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifndef M_SQRT2 +#define M_SQRT2 1.41421356237309504880 +#endif + +#define POW2_TABLE_SIZE_LOG2 9 +#define POW2_TABLE_SIZE (1 << POW2_TABLE_SIZE_LOG2) +#define POW2_TABLE_OFFSET (POW2_TABLE_SIZE/2) +#define POW2_TABLE_SCALE ((float)(POW2_TABLE_SIZE/2)) +extern float pow2_table[POW2_TABLE_SIZE]; + + +/** + * Initialize math module. This should be called before using any + * other functions in this module. + */ +extern void +util_init_math(void); + + +union fi { + float f; + int32_t i; + uint32_t ui; +}; + + +union di { + double d; + int64_t i; + uint64_t ui; +}; + + +/** + * Extract the IEEE float32 exponent. + */ +static inline signed +util_get_float32_exponent(float x) +{ + union fi f; + + f.f = x; + + return ((f.ui >> 23) & 0xff) - 127; +} + + +/** + * Fast version of 2^x + * Identity: exp2(a + b) = exp2(a) * exp2(b) + * Let ipart = int(x) + * Let fpart = x - ipart; + * So, exp2(x) = exp2(ipart) * exp2(fpart) + * Compute exp2(ipart) with i << ipart + * Compute exp2(fpart) with lookup table. + */ +static inline float +util_fast_exp2(float x) +{ + int32_t ipart; + float fpart, mpart; + union fi epart; + + if(x > 129.00000f) + return 3.402823466e+38f; + + if (x < -126.99999f) + return 0.0f; + + ipart = (int32_t) x; + fpart = x - (float) ipart; + + /* same as + * epart.f = (float) (1 << ipart) + * but faster and without integer overflow for ipart > 31 + */ + epart.i = (ipart + 127 ) << 23; + + mpart = pow2_table[POW2_TABLE_OFFSET + (int)(fpart * POW2_TABLE_SCALE)]; + + return epart.f * mpart; +} + + +/** + * Fast approximation to exp(x). + */ +static inline float +util_fast_exp(float x) +{ + const float k = 1.44269f; /* = log2(e) */ + return util_fast_exp2(k * x); +} + + +#define LOG2_TABLE_SIZE_LOG2 16 +#define LOG2_TABLE_SCALE (1 << LOG2_TABLE_SIZE_LOG2) +#define LOG2_TABLE_SIZE (LOG2_TABLE_SCALE + 1) +extern float log2_table[LOG2_TABLE_SIZE]; + + +/** + * Fast approximation to log2(x). + */ +static inline float +util_fast_log2(float x) +{ + union fi num; + float epart, mpart; + num.f = x; + epart = (float)(((num.i & 0x7f800000) >> 23) - 127); + /* mpart = log2_table[mantissa*LOG2_TABLE_SCALE + 0.5] */ + mpart = log2_table[((num.i & 0x007fffff) + (1 << (22 - LOG2_TABLE_SIZE_LOG2))) >> (23 - LOG2_TABLE_SIZE_LOG2)]; + return epart + mpart; +} + + +/** + * Fast approximation to x^y. + */ +static inline float +util_fast_pow(float x, float y) +{ + return util_fast_exp2(util_fast_log2(x) * y); +} + + +/** + * Floor(x), returned as int. + */ +static inline int +util_ifloor(float f) +{ +#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__) + /* + * IEEE floor for computers that round to nearest or even. + * 'f' must be between -4194304 and 4194303. + * This floor operation is done by "(iround(f + .5) + iround(f - .5)) >> 1", + * but uses some IEEE specific tricks for better speed. + * Contributed by Josh Vanderhoof + */ + int ai, bi; + double af, bf; + af = (3 << 22) + 0.5 + (double)f; + bf = (3 << 22) + 0.5 - (double)f; + /* GCC generates an extra fstp/fld without this. */ + __asm__ ("fstps %0" : "=m" (ai) : "t" (af) : "st"); + __asm__ ("fstps %0" : "=m" (bi) : "t" (bf) : "st"); + return (ai - bi) >> 1; +#else + int ai, bi; + double af, bf; + union fi u; + af = (3 << 22) + 0.5 + (double) f; + bf = (3 << 22) + 0.5 - (double) f; + u.f = (float) af; ai = u.i; + u.f = (float) bf; bi = u.i; + return (ai - bi) >> 1; +#endif +} + + +/** + * Round float to nearest int. + */ +static inline int +util_iround(float f) +{ +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) + int r; + __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st"); + return r; +#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) + int r; + _asm { + fld f + fistp r + } + return r; +#else + if (f >= 0.0f) + return (int) (f + 0.5f); + else + return (int) (f - 0.5f); +#endif +} + + +/** + * Approximate floating point comparison + */ +static inline bool +util_is_approx(float a, float b, float tol) +{ + return fabsf(b - a) <= tol; +} + + +/** + * util_is_X_inf_or_nan = test if x is NaN or +/- Inf + * util_is_X_nan = test if x is NaN + * util_X_inf_sign = return +1 for +Inf, -1 for -Inf, or 0 for not Inf + * + * NaN can be checked with x != x, however this fails with the fast math flag + **/ + + +/** + * Single-float + */ +static inline bool +util_is_inf_or_nan(float x) +{ + union fi tmp; + tmp.f = x; + return (tmp.ui & 0x7f800000) == 0x7f800000; +} + + +static inline bool +util_is_nan(float x) +{ + union fi tmp; + tmp.f = x; + return (tmp.ui & 0x7fffffff) > 0x7f800000; +} + + +static inline int +util_inf_sign(float x) +{ + union fi tmp; + tmp.f = x; + if ((tmp.ui & 0x7fffffff) != 0x7f800000) { + return 0; + } + + return (x < 0) ? -1 : 1; +} + + +/** + * Double-float + */ +static inline bool +util_is_double_inf_or_nan(double x) +{ + union di tmp; + tmp.d = x; + return (tmp.ui & 0x7ff0000000000000ULL) == 0x7ff0000000000000ULL; +} + + +static inline bool +util_is_double_nan(double x) +{ + union di tmp; + tmp.d = x; + return (tmp.ui & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL; +} + + +static inline int +util_double_inf_sign(double x) +{ + union di tmp; + tmp.d = x; + if ((tmp.ui & 0x7fffffffffffffffULL) != 0x7ff0000000000000ULL) { + return 0; + } + + return (x < 0) ? -1 : 1; +} + + +/** + * Half-float + */ +static inline bool +util_is_half_inf_or_nan(int16_t x) +{ + return (x & 0x7c00) == 0x7c00; +} + + +static inline bool +util_is_half_nan(int16_t x) +{ + return (x & 0x7fff) > 0x7c00; +} + + +static inline int +util_half_inf_sign(int16_t x) +{ + if ((x & 0x7fff) != 0x7c00) { + return 0; + } + + return (x < 0) ? -1 : 1; +} + + +/** + * Return float bits. + */ +static inline unsigned +fui( float f ) +{ + union fi fi; + fi.f = f; + return fi.ui; +} + +static inline float +uif(uint32_t ui) +{ + union fi fi; + fi.ui = ui; + return fi.f; +} + + +/** + * Convert uint8_t to float in [0, 1]. + */ +static inline float +ubyte_to_float(uint8_t ub) +{ + return (float) ub * (1.0f / 255.0f); +} + + +/** + * Convert float in [0,1] to uint8_t in [0,255] with clamping. + */ +static inline uint8_t +float_to_ubyte(float f) +{ + /* return 0 for NaN too */ + if (!(f > 0.0f)) { + return (uint8_t) 0; + } + else if (f >= 1.0f) { + return (uint8_t) 255; + } + else { + union fi tmp; + tmp.f = f; + tmp.f = tmp.f * (255.0f/256.0f) + 32768.0f; + return (uint8_t) tmp.i; + } +} + +/** + * Convert uint16_t to float in [0, 1]. + */ +static inline float +ushort_to_float(uint16_t us) +{ + return (float) us * (1.0f / 65535.0f); +} + + +/** + * Convert float in [0,1] to uint16_t in [0,65535] with clamping. + */ +static inline uint16_t +float_to_ushort(float f) +{ + /* return 0 for NaN too */ + if (!(f > 0.0f)) { + return (uint16_t) 0; + } + else if (f >= 1.0f) { + return (uint16_t) 65535; + } + else { + union fi tmp; + tmp.f = f; + tmp.f = tmp.f * (65535.0f/65536.0f) + 128.0f; + return (uint16_t) tmp.i; + } +} + +static inline float +byte_to_float_tex(int8_t b) +{ + return (b == -128) ? -1.0F : b * 1.0F / 127.0F; +} + +static inline int8_t +float_to_byte_tex(float f) +{ + return (int8_t) (127.0F * f); +} + +/** + * Calc log base 2 + */ +static inline unsigned +util_logbase2(unsigned n) +{ +#if defined(HAVE___BUILTIN_CLZ) + return ((sizeof(unsigned) * 8 - 1) - __builtin_clz(n | 1)); +#else + unsigned pos = 0; + if (n >= 1<<16) { n >>= 16; pos += 16; } + if (n >= 1<< 8) { n >>= 8; pos += 8; } + if (n >= 1<< 4) { n >>= 4; pos += 4; } + if (n >= 1<< 2) { n >>= 2; pos += 2; } + if (n >= 1<< 1) { pos += 1; } + return pos; +#endif +} + +static inline uint64_t +util_logbase2_64(uint64_t n) +{ +#if defined(HAVE___BUILTIN_CLZLL) + return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1)); +#else + uint64_t pos = 0ull; + if (n >= 1ull<<32) { n >>= 32; pos += 32; } + if (n >= 1ull<<16) { n >>= 16; pos += 16; } + if (n >= 1ull<< 8) { n >>= 8; pos += 8; } + if (n >= 1ull<< 4) { n >>= 4; pos += 4; } + if (n >= 1ull<< 2) { n >>= 2; pos += 2; } + if (n >= 1ull<< 1) { pos += 1; } + return pos; +#endif +} + +/** + * Returns the ceiling of log n base 2, and 0 when n == 0. Equivalently, + * returns the smallest x such that n <= 2**x. + */ +static inline unsigned +util_logbase2_ceil(unsigned n) +{ + if (n <= 1) + return 0; + + return 1 + util_logbase2(n - 1); +} + +static inline uint64_t +util_logbase2_ceil64(uint64_t n) +{ + if (n <= 1) + return 0; + + return 1ull + util_logbase2_64(n - 1); +} + +/** + * Returns the smallest power of two >= x + */ +static inline unsigned +util_next_power_of_two(unsigned x) +{ +#if defined(HAVE___BUILTIN_CLZ) + if (x <= 1) + return 1; + + return (1 << ((sizeof(unsigned) * 8) - __builtin_clz(x - 1))); +#else + unsigned val = x; + + if (x <= 1) + return 1; + + if (util_is_power_of_two_or_zero(x)) + return x; + + val--; + val = (val >> 1) | val; + val = (val >> 2) | val; + val = (val >> 4) | val; + val = (val >> 8) | val; + val = (val >> 16) | val; + val++; + return val; +#endif +} + +static inline uint64_t +util_next_power_of_two64(uint64_t x) +{ +#if defined(HAVE___BUILTIN_CLZLL) + if (x <= 1) + return 1; + + return (1ull << ((sizeof(uint64_t) * 8) - __builtin_clzll(x - 1))); +#else + uint64_t val = x; + + if (x <= 1) + return 1; + + if (util_is_power_of_two_or_zero64(x)) + return x; + + val--; + val = (val >> 1) | val; + val = (val >> 2) | val; + val = (val >> 4) | val; + val = (val >> 8) | val; + val = (val >> 16) | val; + val = (val >> 32) | val; + val++; + return val; +#endif +} + +/** + * Reverse bits in n + * Algorithm taken from: + * http://stackoverflow.com/questions/9144800/c-reverse-bits-in-unsigned-integer + */ +static inline unsigned +util_bitreverse(unsigned n) +{ + n = ((n >> 1) & 0x55555555u) | ((n & 0x55555555u) << 1); + n = ((n >> 2) & 0x33333333u) | ((n & 0x33333333u) << 2); + n = ((n >> 4) & 0x0f0f0f0fu) | ((n & 0x0f0f0f0fu) << 4); + n = ((n >> 8) & 0x00ff00ffu) | ((n & 0x00ff00ffu) << 8); + n = ((n >> 16) & 0xffffu) | ((n & 0xffffu) << 16); + return n; +} + +/** + * Convert from little endian to CPU byte order. + */ + +#if UTIL_ARCH_BIG_ENDIAN +#define util_le64_to_cpu(x) util_bswap64(x) +#define util_le32_to_cpu(x) util_bswap32(x) +#define util_le16_to_cpu(x) util_bswap16(x) +#else +#define util_le64_to_cpu(x) (x) +#define util_le32_to_cpu(x) (x) +#define util_le16_to_cpu(x) (x) +#endif + +#define util_cpu_to_le64(x) util_le64_to_cpu(x) +#define util_cpu_to_le32(x) util_le32_to_cpu(x) +#define util_cpu_to_le16(x) util_le16_to_cpu(x) + +/** + * Reverse byte order of a 32 bit word. + */ +static inline uint32_t +util_bswap32(uint32_t n) +{ +#if defined(HAVE___BUILTIN_BSWAP32) + return __builtin_bswap32(n); +#else + return (n >> 24) | + ((n >> 8) & 0x0000ff00) | + ((n << 8) & 0x00ff0000) | + (n << 24); +#endif +} + +/** + * Reverse byte order of a 64bit word. + */ +static inline uint64_t +util_bswap64(uint64_t n) +{ +#if defined(HAVE___BUILTIN_BSWAP64) + return __builtin_bswap64(n); +#else + return ((uint64_t)util_bswap32((uint32_t)n) << 32) | + util_bswap32((n >> 32)); +#endif +} + + +/** + * Reverse byte order of a 16 bit word. + */ +static inline uint16_t +util_bswap16(uint16_t n) +{ + return (n >> 8) | + (n << 8); +} + +static inline void* +util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t n) +{ +#if UTIL_ARCH_BIG_ENDIAN + size_t i, e; + assert(n % 4 == 0); + + for (i = 0, e = n / 4; i < e; i++) { + uint32_t * restrict d = (uint32_t* restrict)dest; + const uint32_t * restrict s = (const uint32_t* restrict)src; + d[i] = util_bswap32(s[i]); + } + return dest; +#else + return memcpy(dest, src, n); +#endif +} + +/** + * Clamp X to [MIN, MAX]. + * This is a macro to allow float, int, uint, etc. types. + * We arbitrarily turn NaN into MIN. + */ +#define CLAMP( X, MIN, MAX ) ( (X)>(MIN) ? ((X)>(MAX) ? (MAX) : (X)) : (MIN) ) + +#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) +#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) + +#define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C)) +#define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C)) + +#define MIN4( A, B, C, D ) ((A) < (B) ? MIN3(A, C, D) : MIN3(B, C, D)) +#define MAX4( A, B, C, D ) ((A) > (B) ? MAX3(A, C, D) : MAX3(B, C, D)) + + +/** + * Align a value up to an alignment value + * + * If \c value is not already aligned to the requested alignment value, it + * will be rounded up. + * + * \param value Value to be rounded + * \param alignment Alignment value to be used. This must be a power of two. + * + * \sa ROUND_DOWN_TO() + */ +static inline uintptr_t +ALIGN(uintptr_t value, int32_t alignment) +{ + assert(util_is_power_of_two_nonzero(alignment)); + return (((value) + (alignment) - 1) & ~((alignment) - 1)); +} + +/** + * Like ALIGN(), but works with a non-power-of-two alignment. + */ +static inline uintptr_t +ALIGN_NPOT(uintptr_t value, int32_t alignment) +{ + assert(alignment > 0); + return (value + alignment - 1) / alignment * alignment; +} + +/** + * Align a value down to an alignment value + * + * If \c value is not already aligned to the requested alignment value, it + * will be rounded down. + * + * \param value Value to be rounded + * \param alignment Alignment value to be used. This must be a power of two. + * + * \sa ALIGN() + */ +static inline uintptr_t +ROUND_DOWN_TO(uintptr_t value, int32_t alignment) +{ + assert(util_is_power_of_two_nonzero(alignment)); + return ((value) & ~(alignment - 1)); +} + +/** + * Align a value, only works pot alignemnts. + */ +static inline int +align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + +static inline uint64_t +align64(uint64_t value, unsigned alignment) +{ + return (value + alignment - 1) & ~((uint64_t)alignment - 1); +} + +/** + * Works like align but on npot alignments. + */ +static inline size_t +util_align_npot(size_t value, size_t alignment) +{ + if (value % alignment) + return value + (alignment - (value % alignment)); + return value; +} + +static inline unsigned +u_minify(unsigned value, unsigned levels) +{ + return MAX2(1, value >> levels); +} + +#ifndef COPY_4V +#define COPY_4V( DST, SRC ) \ +do { \ + (DST)[0] = (SRC)[0]; \ + (DST)[1] = (SRC)[1]; \ + (DST)[2] = (SRC)[2]; \ + (DST)[3] = (SRC)[3]; \ +} while (0) +#endif + + +#ifndef COPY_4FV +#define COPY_4FV( DST, SRC ) COPY_4V(DST, SRC) +#endif + + +#ifndef ASSIGN_4V +#define ASSIGN_4V( DST, V0, V1, V2, V3 ) \ +do { \ + (DST)[0] = (V0); \ + (DST)[1] = (V1); \ + (DST)[2] = (V2); \ + (DST)[3] = (V3); \ +} while (0) +#endif + + +static inline uint32_t +util_unsigned_fixed(float value, unsigned frac_bits) +{ + return value < 0 ? 0 : (uint32_t)(value * (1<<frac_bits)); +} + +static inline int32_t +util_signed_fixed(float value, unsigned frac_bits) +{ + return (int32_t)(value * (1<<frac_bits)); +} + +unsigned +util_fpstate_get(void); +unsigned +util_fpstate_set_denorms_to_zero(unsigned current_fpstate); +void +util_fpstate_set(unsigned fpstate); + +/** + * For indexed draw calls, return true if the vertex count to be drawn is + * much lower than the vertex count that has to be uploaded, meaning + * that the driver should flatten indices instead of trying to upload + * a too big range. + * + * This is used by vertex upload code in u_vbuf and glthread. + */ +static inline bool +util_is_vbo_upload_ratio_too_large(unsigned draw_vertex_count, + unsigned upload_vertex_count) +{ + if (draw_vertex_count > 1024) + return upload_vertex_count > draw_vertex_count * 4; + else if (draw_vertex_count > 32) + return upload_vertex_count > draw_vertex_count * 8; + else + return upload_vertex_count > draw_vertex_count * 16; +} + +#ifdef __cplusplus +} +#endif + +#endif /* U_MATH_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/u_memory.h b/third_party/rust/glslopt/glsl-optimizer/src/util/u_memory.h new file mode 100644 index 0000000000..4cdccb66aa --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/u_memory.h @@ -0,0 +1,99 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * Memory functions + */ + + +#ifndef U_MEMORY_H +#define U_MEMORY_H + +#include "util/u_debug.h" +#include "util/os_memory.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +#define MALLOC(_size) os_malloc(_size) + +#define CALLOC(_count, _size) os_calloc(_count, _size) + +#define FREE(_ptr ) os_free(_ptr) + +#define REALLOC(_ptr, _old_size, _size) os_realloc(_ptr, _old_size, _size) + +#define MALLOC_STRUCT(T) (struct T *) MALLOC(sizeof(struct T)) + +#define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T)) + +#define CALLOC_VARIANT_LENGTH_STRUCT(T,more_size) ((struct T *) CALLOC(1, sizeof(struct T) + more_size)) + + +#define align_malloc(_size, _alignment) os_malloc_aligned(_size, _alignment) +#define align_free(_ptr) os_free_aligned(_ptr) +#define align_realloc(_ptr, _oldsize, _newsize, _alignment) os_realloc_aligned(_ptr, _oldsize, _newsize, _alignment) + +static inline void * +align_calloc(size_t size, unsigned long alignment) +{ + void *ptr = align_malloc(size, alignment); + if (ptr) + memset(ptr, 0, size); + return ptr; +} + +/** + * Duplicate a block of memory. + */ +static inline void * +mem_dup(const void *src, size_t size) +{ + void *dup = MALLOC(size); + if (dup) + memcpy(dup, src, size); + return dup; +} + + +/** + * Offset of a field in a struct, in bytes. + */ +#define Offset(TYPE, MEMBER) ((uintptr_t)&(((TYPE *)NULL)->MEMBER)) + + + +#ifdef __cplusplus +} +#endif + + +#endif /* U_MEMORY_H */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/u_queue.h b/third_party/rust/glslopt/glsl-optimizer/src/util/u_queue.h new file mode 100644 index 0000000000..5943df4fcb --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/u_queue.h @@ -0,0 +1,277 @@ +/* + * Copyright © 2016 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/* Job queue with execution in a separate thread. + * + * Jobs can be added from any thread. After that, the wait call can be used + * to wait for completion of the job. + */ + +#ifndef U_QUEUE_H +#define U_QUEUE_H + +#include <string.h> + +#include "util/futex.h" +#include "util/list.h" +#include "util/macros.h" +#include "util/os_time.h" +#include "util/u_atomic.h" +#include "util/u_thread.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY (1 << 0) +#define UTIL_QUEUE_INIT_RESIZE_IF_FULL (1 << 1) +#define UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY (1 << 2) + +#if UTIL_FUTEX_SUPPORTED +#define UTIL_QUEUE_FENCE_FUTEX +#else +#define UTIL_QUEUE_FENCE_STANDARD +#endif + +#ifdef UTIL_QUEUE_FENCE_FUTEX +/* Job completion fence. + * Put this into your job structure. + */ +struct util_queue_fence { + /* The fence can be in one of three states: + * 0 - signaled + * 1 - unsignaled + * 2 - unsignaled, may have waiters + */ + uint32_t val; +}; + +static inline void +util_queue_fence_init(struct util_queue_fence *fence) +{ + fence->val = 0; +} + +static inline void +util_queue_fence_destroy(struct util_queue_fence *fence) +{ + assert(fence->val == 0); + /* no-op */ +} + +static inline void +util_queue_fence_signal(struct util_queue_fence *fence) +{ + uint32_t val = p_atomic_xchg(&fence->val, 0); + + assert(val != 0); + + if (val == 2) + futex_wake(&fence->val, INT_MAX); +} + +/** + * Move \p fence back into unsignalled state. + * + * \warning The caller must ensure that no other thread may currently be + * waiting (or about to wait) on the fence. + */ +static inline void +util_queue_fence_reset(struct util_queue_fence *fence) +{ +#ifdef NDEBUG + fence->val = 1; +#else + uint32_t v = p_atomic_xchg(&fence->val, 1); + assert(v == 0); +#endif +} + +static inline bool +util_queue_fence_is_signalled(struct util_queue_fence *fence) +{ + return fence->val == 0; +} +#endif + +#ifdef UTIL_QUEUE_FENCE_STANDARD +/* Job completion fence. + * Put this into your job structure. + */ +struct util_queue_fence { + mtx_t mutex; + cnd_t cond; + int signalled; +}; + +void util_queue_fence_init(struct util_queue_fence *fence); +void util_queue_fence_destroy(struct util_queue_fence *fence); +void util_queue_fence_signal(struct util_queue_fence *fence); + +/** + * Move \p fence back into unsignalled state. + * + * \warning The caller must ensure that no other thread may currently be + * waiting (or about to wait) on the fence. + */ +static inline void +util_queue_fence_reset(struct util_queue_fence *fence) +{ + assert(fence->signalled); + fence->signalled = 0; +} + +static inline bool +util_queue_fence_is_signalled(struct util_queue_fence *fence) +{ + return fence->signalled != 0; +} +#endif + +void +_util_queue_fence_wait(struct util_queue_fence *fence); + +static inline void +util_queue_fence_wait(struct util_queue_fence *fence) +{ + if (unlikely(!util_queue_fence_is_signalled(fence))) + _util_queue_fence_wait(fence); +} + +bool +_util_queue_fence_wait_timeout(struct util_queue_fence *fence, + int64_t abs_timeout); + +/** + * Wait for the fence to be signaled with a timeout. + * + * \param fence the fence + * \param abs_timeout the absolute timeout in nanoseconds, relative to the + * clock provided by os_time_get_nano. + * + * \return true if the fence was signaled, false if the timeout occurred. + */ +static inline bool +util_queue_fence_wait_timeout(struct util_queue_fence *fence, + int64_t abs_timeout) +{ + if (util_queue_fence_is_signalled(fence)) + return true; + + if (abs_timeout == (int64_t)OS_TIMEOUT_INFINITE) { + _util_queue_fence_wait(fence); + return true; + } + + return _util_queue_fence_wait_timeout(fence, abs_timeout); +} + +typedef void (*util_queue_execute_func)(void *job, int thread_index); + +struct util_queue_job { + void *job; + size_t job_size; + struct util_queue_fence *fence; + util_queue_execute_func execute; + util_queue_execute_func cleanup; +}; + +/* Put this into your context. */ +struct util_queue { + char name[14]; /* 13 characters = the thread name without the index */ + mtx_t finish_lock; /* for util_queue_finish and protects threads/num_threads */ + mtx_t lock; + cnd_t has_queued_cond; + cnd_t has_space_cond; + thrd_t *threads; + unsigned flags; + int num_queued; + unsigned max_threads; + unsigned num_threads; /* decreasing this number will terminate threads */ + int max_jobs; + int write_idx, read_idx; /* ring buffer pointers */ + size_t total_jobs_size; /* memory use of all jobs in the queue */ + struct util_queue_job *jobs; + + /* for cleanup at exit(), protected by exit_mutex */ + struct list_head head; +}; + +bool util_queue_init(struct util_queue *queue, + const char *name, + unsigned max_jobs, + unsigned num_threads, + unsigned flags); +void util_queue_destroy(struct util_queue *queue); + +/* optional cleanup callback is called after fence is signaled: */ +void util_queue_add_job(struct util_queue *queue, + void *job, + struct util_queue_fence *fence, + util_queue_execute_func execute, + util_queue_execute_func cleanup, + const size_t job_size); +void util_queue_drop_job(struct util_queue *queue, + struct util_queue_fence *fence); + +void util_queue_finish(struct util_queue *queue); + +/* Adjust the number of active threads. The new number of threads can't be + * greater than the initial number of threads at the creation of the queue, + * and it can't be less than 1. + */ +void +util_queue_adjust_num_threads(struct util_queue *queue, unsigned num_threads); + +int64_t util_queue_get_thread_time_nano(struct util_queue *queue, + unsigned thread_index); + +/* util_queue needs to be cleared to zeroes for this to work */ +static inline bool +util_queue_is_initialized(struct util_queue *queue) +{ + return queue->threads != NULL; +} + +/* Convenient structure for monitoring the queue externally and passing + * the structure between Mesa components. The queue doesn't use it directly. + */ +struct util_queue_monitoring +{ + /* For querying the thread busyness. */ + struct util_queue *queue; + + /* Counters updated by the user of the queue. */ + unsigned num_offloaded_items; + unsigned num_direct_items; + unsigned num_syncs; +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/u_string.h b/third_party/rust/glslopt/glsl-optimizer/src/util/u_string.h new file mode 100644 index 0000000000..88df2cceda --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/u_string.h @@ -0,0 +1,130 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Platform independent functions for string manipulation. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#ifndef U_STRING_H_ +#define U_STRING_H_ + +#if !defined(XF86_LIBC_H) +#include <stdio.h> +#endif +#include <stdlib.h> +#include <stddef.h> +#include <stdarg.h> +#include <string.h> + +#include "util/macros.h" // PRINTFLIKE + + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(_GNU_SOURCE) || defined(__APPLE__) + +#define strchrnul util_strchrnul +static inline char * +util_strchrnul(const char *s, char c) +{ + for (; *s && *s != c; ++s); + + return (char *)s; +} + +#endif + +#ifdef _WIN32 + +#define sprintf util_sprintf +static inline void + PRINTFLIKE(2, 3) +util_sprintf(char *str, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vsnprintf(str, (size_t)-1, format, ap); + va_end(ap); +} + +#define vasprintf util_vasprintf +static inline int +util_vasprintf(char **ret, const char *format, va_list ap) +{ + va_list ap_copy; + + /* Compute length of output string first */ + va_copy(ap_copy, ap); + int r = vsnprintf(NULL, 0, format, ap_copy); + va_end(ap_copy); + + if (r < 0) + return -1; + + *ret = (char *) malloc(r + 1); + if (!*ret) + return -1; + + /* Print to buffer */ + return vsnprintf(*ret, r + 1, format, ap); +} + +#define asprintf util_asprintf +static inline int +util_asprintf(char **str, const char *fmt, ...) +{ + int ret; + va_list args; + va_start(args, fmt); + ret = vasprintf(str, fmt, args); + va_end(args); + return ret; +} + +#ifndef strcasecmp +#define strcasecmp stricmp +#endif + +#define strdup _strdup + +#if defined(_WIN32) && !defined(HAVE_STRTOK_R) +#define strtok_r strtok_s +#endif + +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* U_STRING_H_ */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/u_thread.h b/third_party/rust/glslopt/glsl-optimizer/src/util/u_thread.h new file mode 100644 index 0000000000..b91d05e4cf --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/u_thread.h @@ -0,0 +1,256 @@ +/************************************************************************** + * + * Copyright 1999-2006 Brian Paul + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_THREAD_H_ +#define U_THREAD_H_ + +#include <stdint.h> +#include <stdbool.h> + +#include "c11/threads.h" +#include "detect_os.h" + +#ifdef HAVE_PTHREAD +#include <signal.h> +#ifdef PTHREAD_SETAFFINITY_IN_NP_HEADER +#include <pthread_np.h> +#endif +#endif + +#ifdef __HAIKU__ +#include <OS.h> +#endif + +#ifdef __FreeBSD__ +/* pthread_np.h -> sys/param.h -> machine/param.h + * - defines ALIGN which clashes with our ALIGN + */ +#undef ALIGN +#define cpu_set_t cpuset_t +#endif + +static inline thrd_t u_thread_create(int (*routine)(void *), void *param) +{ + thrd_t thread; +#ifdef HAVE_PTHREAD + sigset_t saved_set, new_set; + int ret; + + sigfillset(&new_set); + sigdelset(&new_set, SIGSYS); + pthread_sigmask(SIG_BLOCK, &new_set, &saved_set); + ret = thrd_create( &thread, routine, param ); + pthread_sigmask(SIG_SETMASK, &saved_set, NULL); +#else + int ret; + ret = thrd_create( &thread, routine, param ); +#endif + if (ret) + return 0; + + return thread; +} + +static inline void u_thread_setname( const char *name ) +{ +#if defined(HAVE_PTHREAD) +#if DETECT_OS_LINUX || DETECT_OS_CYGWIN || DETECT_OS_SOLARIS + pthread_setname_np(pthread_self(), name); +#elif DETECT_OS_FREEBSD || DETECT_OS_OPENBSD + pthread_set_name_np(pthread_self(), name); +#elif DETECT_OS_NETBSD + pthread_setname_np(pthread_self(), "%s", (void *)name); +#elif DETECT_OS_APPLE + pthread_setname_np(name); +#elif DETECT_OS_HAIKU + rename_thread(find_thread(NULL), name); +#else +#warning Not sure how to call pthread_setname_np +#endif +#endif + (void)name; +} + +/** + * An AMD Zen CPU consists of multiple modules where each module has its own L3 + * cache. Inter-thread communication such as locks and atomics between modules + * is very expensive. It's desirable to pin a group of closely cooperating + * threads to one group of cores sharing L3. + * + * \param thread thread + * \param L3_index index of the L3 cache + * \param cores_per_L3 number of CPU cores shared by one L3 + */ +static inline void +util_pin_thread_to_L3(thrd_t thread, unsigned L3_index, unsigned cores_per_L3) +{ +#if defined(HAVE_PTHREAD_SETAFFINITY) + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + for (unsigned i = 0; i < cores_per_L3; i++) + CPU_SET(L3_index * cores_per_L3 + i, &cpuset); + pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset); +#endif +} + +/** + * Return the index of L3 that the thread is pinned to. If the thread is + * pinned to multiple L3 caches, return -1. + * + * \param thread thread + * \param cores_per_L3 number of CPU cores shared by one L3 + */ +static inline int +util_get_L3_for_pinned_thread(thrd_t thread, unsigned cores_per_L3) +{ +#if defined(HAVE_PTHREAD_SETAFFINITY) + cpu_set_t cpuset; + + if (pthread_getaffinity_np(thread, sizeof(cpuset), &cpuset) == 0) { + int L3_index = -1; + + for (unsigned i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, &cpuset)) { + int x = i / cores_per_L3; + + if (L3_index != x) { + if (L3_index == -1) + L3_index = x; + else + return -1; /* multiple L3s are set */ + } + } + } + return L3_index; + } +#endif + return -1; +} + +/* + * Thread statistics. + */ + +/* Return the time of a thread's CPU time clock. */ +static inline int64_t +u_thread_get_time_nano(thrd_t thread) +{ +#if defined(HAVE_PTHREAD) && !defined(__APPLE__) && !defined(__HAIKU__) + struct timespec ts; + clockid_t cid; + + pthread_getcpuclockid(thread, &cid); + clock_gettime(cid, &ts); + return (int64_t)ts.tv_sec * 1000000000 + ts.tv_nsec; +#else + return 0; +#endif +} + +static inline bool u_thread_is_self(thrd_t thread) +{ +#if defined(HAVE_PTHREAD) + return pthread_equal(pthread_self(), thread); +#endif + return false; +} + +/* + * util_barrier + */ + +#if defined(HAVE_PTHREAD) && !defined(__APPLE__) + +typedef pthread_barrier_t util_barrier; + +static inline void util_barrier_init(util_barrier *barrier, unsigned count) +{ + pthread_barrier_init(barrier, NULL, count); +} + +static inline void util_barrier_destroy(util_barrier *barrier) +{ + pthread_barrier_destroy(barrier); +} + +static inline void util_barrier_wait(util_barrier *barrier) +{ + pthread_barrier_wait(barrier); +} + + +#else /* If the OS doesn't have its own, implement barriers using a mutex and a condvar */ + +typedef struct { + unsigned count; + unsigned waiters; + uint64_t sequence; + mtx_t mutex; + cnd_t condvar; +} util_barrier; + +static inline void util_barrier_init(util_barrier *barrier, unsigned count) +{ + barrier->count = count; + barrier->waiters = 0; + barrier->sequence = 0; + (void) mtx_init(&barrier->mutex, mtx_plain); + cnd_init(&barrier->condvar); +} + +static inline void util_barrier_destroy(util_barrier *barrier) +{ + assert(barrier->waiters == 0); + mtx_destroy(&barrier->mutex); + cnd_destroy(&barrier->condvar); +} + +static inline void util_barrier_wait(util_barrier *barrier) +{ + mtx_lock(&barrier->mutex); + + assert(barrier->waiters < barrier->count); + barrier->waiters++; + + if (barrier->waiters < barrier->count) { + uint64_t sequence = barrier->sequence; + + do { + cnd_wait(&barrier->condvar, &barrier->mutex); + } while (sequence == barrier->sequence); + } else { + barrier->waiters = 0; + barrier->sequence++; + cnd_broadcast(&barrier->condvar); + } + + mtx_unlock(&barrier->mutex); +} + +#endif + +#endif /* U_THREAD_H_ */ diff --git a/third_party/rust/glslopt/glsl-optimizer/src/util/xxhash.h b/third_party/rust/glslopt/glsl-optimizer/src/util/xxhash.h new file mode 100644 index 0000000000..c0c8f44b60 --- /dev/null +++ b/third_party/rust/glslopt/glsl-optimizer/src/util/xxhash.h @@ -0,0 +1,1435 @@ +/* + xxHash - Extremely Fast Hash algorithm + Header File + Copyright (C) 2012-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +Note : SMHasher's CRC32 implementation is not the fastest one. +Other speed-oriented implementations can be faster, +especially in combination with PCLMUL instruction : +http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735 + +A 64-bit version, named XXH64, is available since r35. +It offers much better speed, but for 64-bit applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ + +#if defined (__cplusplus) +extern "C" { +#endif + + +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + +/* **************************** + * API modifier + ******************************/ +/** XXH_INLINE_ALL (and XXH_PRIVATE_API) + * This build macro includes xxhash functions in `static` mode + * in order to inline them, and remove their symbol from the public list. + * Inlining offers great performance improvement on small keys, + * and dramatic ones when length is expressed as a compile-time constant. + * See https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html . + * Methodology : + * #define XXH_INLINE_ALL + * #include "xxhash.h" + * `xxhash.c` is automatically included. + * It's not useful to compile and link it as a separate object. + */ +#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) +# ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +# endif +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else + /* this version may generate warnings for unused static functions */ +# define XXH_PUBLIC_API static +# endif +#else +# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +# ifdef XXH_EXPORT +# define XXH_PUBLIC_API __declspec(dllexport) +# elif XXH_IMPORT +# define XXH_PUBLIC_API __declspec(dllimport) +# endif +# else +# define XXH_PUBLIC_API /* do nothing */ +# endif +#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ + +/*! XXH_NAMESPACE, aka Namespace Emulation : + * + * If you want to include _and expose_ xxHash functions from within your own library, + * but also want to avoid symbol collisions with other libraries which may also include xxHash, + * + * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library + * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). + * + * Note that no change is required within the calling program as long as it includes `xxhash.h` : + * regular symbol name will be automatically translated by this header. + */ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +#endif + + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 7 +#define XXH_VERSION_RELEASE 2 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); + + +/* **************************** +* Definitions +******************************/ +#include <stddef.h> /* size_t */ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + +/*-********************************************************************** +* 32-bit hash +************************************************************************/ +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include <stdint.h> + typedef uint32_t XXH32_hash_t; +#else +# include <limits.h> +# if UINT_MAX == 0xFFFFFFFFUL + typedef unsigned int XXH32_hash_t; +# else +# if ULONG_MAX == 0xFFFFFFFFUL + typedef unsigned long XXH32_hash_t; +# else +# error "unsupported platform : need a 32-bit type" +# endif +# endif +#endif + +/*! XXH32() : + Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */ +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed); + +/******* Streaming *******/ + +/* + * Streaming functions generate the xxHash value from an incrememtal input. + * This method is slower than single-call functions, due to state management. + * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. + * + * XXH state must first be allocated, using XXH*_createState() . + * + * Start a new hash by initializing state with a seed, using XXH*_reset(). + * + * Then, feed the hash state by calling XXH*_update() as many times as necessary. + * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + * + * Finally, a hash value can be produced anytime, by using XXH*_digest(). + * This function returns the nn-bits hash as an int or long long. + * + * It's still possible to continue inserting input into the hash state after a digest, + * and generate some new hash values later on, by invoking again XXH*_digest(). + * + * When done, release the state, using XXH*_freeState(). + */ + +typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); + +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); + +/******* Canonical representation *******/ + +/* Default return values from XXH functions are basic unsigned 32 and 64 bits. + * This the simplest and fastest format for further post-processing. + * However, this leaves open the question of what is the order of bytes, + * since little and big endian conventions will write the same number differently. + * + * The canonical representation settles this issue, + * by mandating big-endian convention, + * aka, the same convention as human-readable numbers (large digits first). + * When writing hash values to storage, sending them over a network, or printing them, + * it's highly recommended to use the canonical representation, + * to ensure portability across a wider range of systems, present and future. + * + * The following functions allow transformation of hash values into and from canonical format. + */ + +typedef struct { unsigned char digest[4]; } XXH32_canonical_t; +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); + + +#ifndef XXH_NO_LONG_LONG +/*-********************************************************************** +* 64-bit hash +************************************************************************/ +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include <stdint.h> + typedef uint64_t XXH64_hash_t; +#else + /* the following type must have a width of 64-bit */ + typedef unsigned long long XXH64_hash_t; +#endif + +/*! XXH64() : + * Returns the 64-bit hash of sequence of length @length stored at memory address @input. + * @seed can be used to alter the result predictably. + * This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark). + */ +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, XXH64_hash_t seed); + +/******* Streaming *******/ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, XXH64_hash_t seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + +/******* Canonical representation *******/ +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); + + +#endif /* XXH_NO_LONG_LONG */ + +#endif /* XXHASH_H_5627135585666179 */ + + + +#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) +#define XXHASH_H_STATIC_13879238742 +/* ************************************************************************************************ + This section contains declarations which are not guaranteed to remain stable. + They may change in future versions, becoming incompatible with a different version of the library. + These declarations should only be used with static linking. + Never use them in association with dynamic linking ! +*************************************************************************************************** */ + +/* These definitions are only present to allow + * static allocation of XXH state, on stack or in a struct for example. + * Never **ever** use members directly. */ + +struct XXH32_state_s { + XXH32_hash_t total_len_32; + XXH32_hash_t large_len; + XXH32_hash_t v1; + XXH32_hash_t v2; + XXH32_hash_t v3; + XXH32_hash_t v4; + XXH32_hash_t mem32[4]; + XXH32_hash_t memsize; + XXH32_hash_t reserved; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH32_state_t */ + + +#ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */ + +struct XXH64_state_s { + XXH64_hash_t total_len; + XXH64_hash_t v1; + XXH64_hash_t v2; + XXH64_hash_t v3; + XXH64_hash_t v4; + XXH64_hash_t mem64[4]; + XXH32_hash_t memsize; + XXH32_hash_t reserved32; /* required for padding anyway */ + XXH64_hash_t reserved64; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH64_state_t */ + +#endif /* XXH_NO_LONG_LONG */ + +#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) +# define XXH_IMPLEMENTATION +#endif + +#endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */ + + + +/*-********************************************************************** +* xxHash implementation +* Functions implementation used to be hosted within xxhash.c . +* However, code inlining requires to place implementation in the header file. +* As a consequence, xxhash.c used to be included within xxhash.h . +* But some build systems don't like *.c inclusions. +* So the implementation is now directly integrated within xxhash.h . +* Another small advantage is that xxhash.c is no longer required in /includes . +************************************************************************/ + +#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \ + || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387) +# define XXH_IMPLEM_13a8737387 + +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if !defined(__clang__) && defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && (__ARM_ARCH == 6) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif !defined(__clang__) && ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \ + (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7))) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault. + * When this macro is enabled, xxHash actively checks input for null pointer. + * It it is, result for null input pointers is the same as a null-length input. + */ +#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */ +# define XXH_ACCEPT_NULL_INPUT_POINTER 0 +#endif + +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; + * set it to 0 when the input is guaranteed to be aligned, + * or when alignment doesn't matter for performance. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + +/*!XXH_REROLL: + * Whether to reroll XXH32_finalize, and XXH64_finalize, + * instead of using an unrolled jump table/if statement loop. + * + * This is automatically defined on -Os/-Oz on GCC and Clang. */ +#ifndef XXH_REROLL +# if defined(__OPTIMIZE_SIZE__) +# define XXH_REROLL 1 +# else +# define XXH_REROLL 0 +# endif +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/*! Modify the local functions below should you wish to use some other memory routines +* for malloc(), free() */ +#include <stdlib.h> +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free (void* p) { free(p); } +/*! and for memcpy() */ +#include <string.h> +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } + +#include <limits.h> /* ULLONG_MAX */ + + +/* ************************************* +* Compiler Specific Options +***************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# define XXH_FORCE_INLINE static __forceinline +# define XXH_NO_INLINE static __declspec(noinline) +#else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define XXH_FORCE_INLINE static inline __attribute__((always_inline)) +# define XXH_NO_INLINE static __attribute__((noinline)) +# else +# define XXH_FORCE_INLINE static inline +# define XXH_NO_INLINE static +# endif +# else +# define XXH_FORCE_INLINE static +# define XXH_NO_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + + +/* ************************************* +* Debug +***************************************/ +/* DEBUGLEVEL is expected to be defined externally, + * typically through compiler command line. + * Value must be a number. */ +#ifndef DEBUGLEVEL +# define DEBUGLEVEL 0 +#endif + +#if (DEBUGLEVEL>=1) +# include <assert.h> /* note : can still be disabled with NDEBUG */ +# define XXH_ASSERT(c) assert(c) +#else +# define XXH_ASSERT(c) ((void)0) +#endif + +/* note : use after variable declarations */ +#define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; } + + +/* ************************************* +* Basic Types +***************************************/ +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include <stdint.h> + typedef uint8_t xxh_u8; +#else + typedef unsigned char xxh_u8; +#endif +typedef XXH32_hash_t xxh_u32; + + +/* *** Memory access *** */ + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { xxh_u32 u32; } __attribute__((packed)) unalign; +static xxh_u32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ +static xxh_u32 XXH_read32(const void* memPtr) +{ + xxh_u32 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* *** Endianess *** */ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN +# if defined(_WIN32) /* Windows is always little endian */ \ + || defined(__LITTLE_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define XXH_CPU_LITTLE_ENDIAN 1 +# elif defined(__BIG_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define XXH_CPU_LITTLE_ENDIAN 0 +# else +static int XXH_isLittleEndian(void) +{ + const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} +# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() +# endif +#endif + + + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +#if !defined(NO_CLANG_BUILTIN) && __has_builtin(__builtin_rotateleft32) && __has_builtin(__builtin_rotateleft64) +# define XXH_rotl32 __builtin_rotateleft32 +# define XXH_rotl64 __builtin_rotateleft64 +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#elif defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +#elif XXH_GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +#else +static xxh_u32 XXH_swap32 (xxh_u32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); +} + +static xxh_u32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} + +XXH_FORCE_INLINE xxh_u32 +XXH_readLE32_align(const void* ptr, XXH_alignment align) +{ + if (align==XXH_unaligned) { + return XXH_readLE32(ptr); + } else { + return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr); + } +} + + +/* ************************************* +* Misc +***************************************/ +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ******************************************************************* +* 32-bit hash functions +*********************************************************************/ +static const xxh_u32 PRIME32_1 = 0x9E3779B1U; /* 0b10011110001101110111100110110001 */ +static const xxh_u32 PRIME32_2 = 0x85EBCA77U; /* 0b10000101111010111100101001110111 */ +static const xxh_u32 PRIME32_3 = 0xC2B2AE3DU; /* 0b11000010101100101010111000111101 */ +static const xxh_u32 PRIME32_4 = 0x27D4EB2FU; /* 0b00100111110101001110101100101111 */ +static const xxh_u32 PRIME32_5 = 0x165667B1U; /* 0b00010110010101100110011110110001 */ + +static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) +{ + acc += input * PRIME32_2; + acc = XXH_rotl32(acc, 13); + acc *= PRIME32_1; +#if defined(__GNUC__) && defined(__SSE4_1__) && !defined(XXH_ENABLE_AUTOVECTORIZE) + /* UGLY HACK: + * This inline assembly hack forces acc into a normal register. This is the + * only thing that prevents GCC and Clang from autovectorizing the XXH32 loop + * (pragmas and attributes don't work for some resason) without globally + * disabling SSE4.1. + * + * The reason we want to avoid vectorization is because despite working on + * 4 integers at a time, there are multiple factors slowing XXH32 down on + * SSE4: + * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on newer chips!) + * making it slightly slower to multiply four integers at once compared to four + * integers independently. Even when pmulld was fastest, Sandy/Ivy Bridge, it is + * still not worth it to go into SSE just to multiply unless doing a long operation. + * + * - Four instructions are required to rotate, + * movqda tmp, v // not required with VEX encoding + * pslld tmp, 13 // tmp <<= 13 + * psrld v, 19 // x >>= 19 + * por v, tmp // x |= tmp + * compared to one for scalar: + * roll v, 13 // reliably fast across the board + * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason + * + * - Instruction level parallelism is actually more beneficial here because the + * SIMD actually serializes this operation: While v1 is rotating, v2 can load data, + * while v3 can multiply. SSE forces them to operate together. + * + * How this hack works: + * __asm__("" // Declare an assembly block but don't declare any instructions + * : // However, as an Input/Output Operand, + * "+r" // constrain a read/write operand (+) as a general purpose register (r). + * (acc) // and set acc as the operand + * ); + * + * Because of the 'r', the compiler has promised that seed will be in a + * general purpose register and the '+' says that it will be 'read/write', + * so it has to assume it has changed. It is like volatile without all the + * loads and stores. + * + * Since the argument has to be in a normal register (not an SSE register), + * each time XXH32_round is called, it is impossible to vectorize. */ + __asm__("" : "+r" (acc)); +#endif + return acc; +} + +/* mix all bits */ +static xxh_u32 XXH32_avalanche(xxh_u32 h32) +{ + h32 ^= h32 >> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + return(h32); +} + +#define XXH_get32bits(p) XXH_readLE32_align(p, align) + +static xxh_u32 +XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align) +{ +#define PROCESS1 \ + h32 += (*ptr++) * PRIME32_5; \ + h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; + +#define PROCESS4 \ + h32 += XXH_get32bits(ptr) * PRIME32_3; \ + ptr+=4; \ + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + + /* Compact rerolled version */ + if (XXH_REROLL) { + len &= 15; + while (len >= 4) { + PROCESS4; + len -= 4; + } + while (len > 0) { + PROCESS1; + --len; + } + return XXH32_avalanche(h32); + } else { + switch(len&15) /* or switch(bEnd - p) */ { + case 12: PROCESS4; + /* fallthrough */ + case 8: PROCESS4; + /* fallthrough */ + case 4: PROCESS4; + return XXH32_avalanche(h32); + + case 13: PROCESS4; + /* fallthrough */ + case 9: PROCESS4; + /* fallthrough */ + case 5: PROCESS4; + PROCESS1; + return XXH32_avalanche(h32); + + case 14: PROCESS4; + /* fallthrough */ + case 10: PROCESS4; + /* fallthrough */ + case 6: PROCESS4; + PROCESS1; + PROCESS1; + return XXH32_avalanche(h32); + + case 15: PROCESS4; + /* fallthrough */ + case 11: PROCESS4; + /* fallthrough */ + case 7: PROCESS4; + /* fallthrough */ + case 3: PROCESS1; + /* fallthrough */ + case 2: PROCESS1; + /* fallthrough */ + case 1: PROCESS1; + /* fallthrough */ + case 0: return XXH32_avalanche(h32); + } + XXH_ASSERT(0); + return h32; /* reaching this point is deemed impossible */ + } +} + +XXH_FORCE_INLINE xxh_u32 +XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align) +{ + const xxh_u8* bEnd = input + len; + xxh_u32 h32; + +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + if (input==NULL) { + len=0; + bEnd=input=(const xxh_u8*)(size_t)16; + } +#endif + + if (len>=16) { + const xxh_u8* const limit = bEnd - 15; + xxh_u32 v1 = seed + PRIME32_1 + PRIME32_2; + xxh_u32 v2 = seed + PRIME32_2; + xxh_u32 v3 = seed + 0; + xxh_u32 v4 = seed - PRIME32_1; + + do { + v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4; + v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4; + v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4; + v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4; + } while (input < limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + PRIME32_5; + } + + h32 += (xxh_u32)len; + + return XXH32_finalize(h32, input, len&15, align); +} + + +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_state_t state; + XXH32_reset(&state, seed); + XXH32_update(&state, (const xxh_u8*)input, len); + return XXH32_digest(&state); + +#else + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); + } } + + return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); +#endif +} + + + +/******* Hash streaming *******/ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.v1 = seed + PRIME32_1 + PRIME32_2; + state.v2 = seed + PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME32_1; + /* do not write into reserved, planned to be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); + return XXH_OK; +} + + +XXH_PUBLIC_API XXH_errorcode +XXH32_update(XXH32_state_t* state, const void* input, size_t len) +{ + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; +#endif + + { const xxh_u8* p = (const xxh_u8*)input; + const xxh_u8* const bEnd = p + len; + + state->total_len_32 += (XXH32_hash_t)len; + state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16)); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len); + state->memsize += (XXH32_hash_t)len; + return XXH_OK; + } + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const xxh_u32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32)); + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) { + const xxh_u8* const limit = bEnd - 16; + xxh_u32 v1 = state->v1; + xxh_u32 v2 = state->v2; + xxh_u32 v3 = state->v3; + xxh_u32 v4 = state->v4; + + do { + v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + } + + return XXH_OK; +} + + +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* state) +{ + xxh_u32 h32; + + if (state->large_len) { + h32 = XXH_rotl32(state->v1, 1) + + XXH_rotl32(state->v2, 7) + + XXH_rotl32(state->v3, 12) + + XXH_rotl32(state->v4, 18); + } else { + h32 = state->v3 /* == seed */ + PRIME32_5; + } + + h32 += state->total_len_32; + + return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned); +} + + +/******* Canonical representation *******/ + +/*! Default XXH result types are basic unsigned 32 and 64 bits. +* The canonical representation follows human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file or buffer, remaining comparable across different systems. +*/ + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + + +#ifndef XXH_NO_LONG_LONG + +/* ******************************************************************* +* 64-bit hash functions +*********************************************************************/ + +/******* Memory access *******/ + +typedef XXH64_hash_t xxh_u64; + + +/*! XXH_REROLL_XXH64: + * Whether to reroll the XXH64_finalize() loop. + * + * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a performance gain + * on 64-bit hosts, as only one jump is required. + * + * However, on 32-bit hosts, because arithmetic needs to be done with two 32-bit registers, + * and 64-bit arithmetic needs to be simulated, it isn't beneficial to unroll. The code becomes + * ridiculously large (the largest function in the binary on i386!), and rerolling it saves + * anywhere from 3kB to 20kB. It is also slightly faster because it fits into cache better + * and is more likely to be inlined by the compiler. + * + * If XXH_REROLL is defined, this is ignored and the loop is always rerolled. */ +#ifndef XXH_REROLL_XXH64 +# if (defined(__ILP32__) || defined(_ILP32)) /* ILP32 is often defined on 32-bit GCC family */ \ + || !(defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) /* x86-64 */ \ + || defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) /* aarch64 */ \ + || defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || defined(__powerpc64__) /* ppc64 */ \ + || defined(__mips64__) || defined(__mips64)) /* mips64 */ \ + || (!defined(SIZE_MAX) || SIZE_MAX < ULLONG_MAX) /* check limits */ +# define XXH_REROLL_XXH64 1 +# else +# define XXH_REROLL_XXH64 0 +# endif +#endif /* !defined(XXH_REROLL_XXH64) */ + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static xxh_u64 XXH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64; +static xxh_u64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static xxh_u64 XXH_read64(const void* memPtr) +{ + xxh_u64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap64 _byteswap_uint64 +#elif XXH_GCC_VERSION >= 403 +# define XXH_swap64 __builtin_bswap64 +#else +static xxh_u64 XXH_swap64 (xxh_u64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + +XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); +} + +static xxh_u64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + +XXH_FORCE_INLINE xxh_u64 +XXH_readLE64_align(const void* ptr, XXH_alignment align) +{ + if (align==XXH_unaligned) + return XXH_readLE64(ptr); + else + return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr); +} + + +/******* xxh64 *******/ + +static const xxh_u64 PRIME64_1 = 0x9E3779B185EBCA87ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 */ +static const xxh_u64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 */ +static const xxh_u64 PRIME64_3 = 0x165667B19E3779F9ULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001 */ +static const xxh_u64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011 */ +static const xxh_u64 PRIME64_5 = 0x27D4EB2F165667C5ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101 */ + +static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input) +{ + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +static xxh_u64 XXH64_avalanche(xxh_u64 h64) +{ + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + return h64; +} + + +#define XXH_get64bits(p) XXH_readLE64_align(p, align) + +static xxh_u64 +XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align) +{ +#define PROCESS1_64 \ + h64 ^= (*ptr++) * PRIME64_5; \ + h64 = XXH_rotl64(h64, 11) * PRIME64_1; + +#define PROCESS4_64 \ + h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * PRIME64_1; \ + ptr+=4; \ + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + +#define PROCESS8_64 { \ + xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \ + ptr+=8; \ + h64 ^= k1; \ + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \ +} + + /* Rerolled version for 32-bit targets is faster and much smaller. */ + if (XXH_REROLL || XXH_REROLL_XXH64) { + len &= 31; + while (len >= 8) { + PROCESS8_64; + len -= 8; + } + if (len >= 4) { + PROCESS4_64; + len -= 4; + } + while (len > 0) { + PROCESS1_64; + --len; + } + return XXH64_avalanche(h64); + } else { + switch(len & 31) { + case 24: PROCESS8_64; + /* fallthrough */ + case 16: PROCESS8_64; + /* fallthrough */ + case 8: PROCESS8_64; + return XXH64_avalanche(h64); + + case 28: PROCESS8_64; + /* fallthrough */ + case 20: PROCESS8_64; + /* fallthrough */ + case 12: PROCESS8_64; + /* fallthrough */ + case 4: PROCESS4_64; + return XXH64_avalanche(h64); + + case 25: PROCESS8_64; + /* fallthrough */ + case 17: PROCESS8_64; + /* fallthrough */ + case 9: PROCESS8_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 29: PROCESS8_64; + /* fallthrough */ + case 21: PROCESS8_64; + /* fallthrough */ + case 13: PROCESS8_64; + /* fallthrough */ + case 5: PROCESS4_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 26: PROCESS8_64; + /* fallthrough */ + case 18: PROCESS8_64; + /* fallthrough */ + case 10: PROCESS8_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 30: PROCESS8_64; + /* fallthrough */ + case 22: PROCESS8_64; + /* fallthrough */ + case 14: PROCESS8_64; + /* fallthrough */ + case 6: PROCESS4_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 27: PROCESS8_64; + /* fallthrough */ + case 19: PROCESS8_64; + /* fallthrough */ + case 11: PROCESS8_64; + PROCESS1_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 31: PROCESS8_64; + /* fallthrough */ + case 23: PROCESS8_64; + /* fallthrough */ + case 15: PROCESS8_64; + /* fallthrough */ + case 7: PROCESS4_64; + /* fallthrough */ + case 3: PROCESS1_64; + /* fallthrough */ + case 2: PROCESS1_64; + /* fallthrough */ + case 1: PROCESS1_64; + /* fallthrough */ + case 0: return XXH64_avalanche(h64); + } + } + /* impossible to reach */ + XXH_ASSERT(0); + return 0; /* unreachable, but some compilers complain without it */ +} + +XXH_FORCE_INLINE xxh_u64 +XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align) +{ + const xxh_u8* bEnd = input + len; + xxh_u64 h64; + +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + if (input==NULL) { + len=0; + bEnd=input=(const xxh_u8*)(size_t)32; + } +#endif + + if (len>=32) { + const xxh_u8* const limit = bEnd - 32; + xxh_u64 v1 = seed + PRIME64_1 + PRIME64_2; + xxh_u64 v2 = seed + PRIME64_2; + xxh_u64 v3 = seed + 0; + xxh_u64 v4 = seed - PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8; + v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8; + v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8; + v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8; + } while (input<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (xxh_u64) len; + + return XXH64_finalize(h64, input, len, align); +} + + +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_state_t state; + XXH64_reset(&state, seed); + XXH64_update(&state, (const xxh_u8*)input, len); + return XXH64_digest(&state); + +#else + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); + } } + + return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); + +#endif +} + +/******* Hash Streaming *******/ + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + /* do not write into reserved64, might be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64)); + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH64_update (XXH64_state_t* state, const void* input, size_t len) +{ + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; +#endif + + { const xxh_u8* p = (const xxh_u8*)input; + const xxh_u8* const bEnd = p + len; + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len); + state->memsize += (xxh_u32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3)); + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const xxh_u8* const limit = bEnd - 32; + xxh_u64 v1 = state->v1; + xxh_u64 v2 = state->v2; + xxh_u64 v3 = state->v3; + xxh_u64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + } + + return XXH_OK; +} + + +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* state) +{ + xxh_u64 h64; + + if (state->total_len >= 32) { + xxh_u64 const v1 = state->v1; + xxh_u64 const v2 = state->v2; + xxh_u64 const v3 = state->v3; + xxh_u64 const v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 /*seed*/ + PRIME64_5; + } + + h64 += (xxh_u64) state->total_len; + + return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned); +} + + +/******* Canonical representation *******/ + +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} + + + +/* ********************************************************************* +* XXH3 +* New generation hash designed for speed on small keys and vectorization +************************************************************************ */ + +/* #include "xxh3.h" */ + + +#endif /* XXH_NO_LONG_LONG */ + + +#endif /* XXH_IMPLEMENTATION */ + + +#if defined (__cplusplus) +} +#endif |