diff options
Diffstat (limited to 'plug-ins/file-dds/vec.h')
-rw-r--r-- | plug-ins/file-dds/vec.h | 245 |
1 files changed, 245 insertions, 0 deletions
diff --git a/plug-ins/file-dds/vec.h b/plug-ins/file-dds/vec.h new file mode 100644 index 0000000..cc3c344 --- /dev/null +++ b/plug-ins/file-dds/vec.h @@ -0,0 +1,245 @@ +/* + * DDS GIMP plugin + * + * Copyright (C) 2004-2012 Shawn Kirst <skirst@gmail.com>, + * with parts (C) 2003 Arne Reuter <homepage@arnereuter.de> where specified. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#ifndef __VEC_H__ +#define __VEC_H__ + +#include <math.h> + +#ifdef __SSE__ +#define USE_SSE 1 +#endif + +#ifdef USE_SSE +#include <immintrin.h> +#endif + +#include "imath.h" + +typedef float vec4_t __attribute__((vector_size(16))); +typedef float sym3x3_t[6]; + +#define VEC4_CONST4(x, y, z, w) {x, y, z, w} +#define VEC4_CONST3(x, y, z) {x, y, z, 0.0f} +#define VEC4_CONST1(x) {x, x, x, x} + +static inline vec4_t +vec4_set (float x, + float y, + float z, + float w) +{ +#ifdef USE_SSE + return _mm_setr_ps(x, y, z, w); +#else + vec4_t v = { x, y, z, w }; + return v; +#endif +} + +static inline vec4_t +vec4_set1 (float f) +{ +#ifdef USE_SSE + return _mm_set1_ps(f); +#else + vec4_t v = { f, f, f, f }; + return v; +#endif +} + +static inline vec4_t +vec4_zero (void) +{ +#ifdef USE_SSE + return _mm_setzero_ps(); +#else + vec4_t v = { 0, 0, 0, 0 }; + return v; +#endif +} + +static inline void +vec4_store (float *f, + const vec4_t v) +{ +#ifdef USE_SSE + _mm_store_ps (f, v); +#else + f[0] = v[0]; f[1] = v[1]; f[2] = v[2]; f[3] = v[3]; +#endif +} + +static inline vec4_t +vec4_splatx (const vec4_t v) +{ +#ifdef USE_SSE + return _mm_shuffle_ps(v, v, 0x00); +#else + vec4_t r = { v[0], v[0], v[0], v[0] }; + return r; +#endif +} + +static inline vec4_t +vec4_splaty (const vec4_t v) +{ +#ifdef USE_SSE + return _mm_shuffle_ps(v, v, 0x55); +#else + vec4_t r = { v[1], v[1], v[1], v[1] }; + return r; +#endif +} + +static inline vec4_t +vec4_splatz (const vec4_t v) +{ +#ifdef USE_SSE + return _mm_shuffle_ps(v, v, 0xaa); +#else + vec4_t r = { v[2], v[2], v[2], v[2] }; + return r; +#endif +} + +static inline vec4_t +vec4_splatw (const vec4_t v) +{ +#ifdef USE_SSE + return _mm_shuffle_ps(v, v, 0xff); +#else + vec4_t r = { v[3], v[3], v[3], v[3] }; + return r; +#endif +} + +static inline vec4_t +vec4_rcp (const vec4_t v) +{ +#ifdef USE_SSE + __m128 est = _mm_rcp_ps (v); + __m128 diff = _mm_sub_ps (_mm_set1_ps(1.0f), _mm_mul_ps(est, v)); + return _mm_add_ps(_mm_mul_ps(diff, est), est); +#else + vec4_t one = { 1.0f, 1.0f, 1.0f, 1.0f }; + return one / v; +#endif +} + +static inline vec4_t +vec4_min (const vec4_t a, + const vec4_t b) +{ +#ifdef USE_SSE + return _mm_min_ps(a, b); +#else + return vec4_set (MIN(a[0], b[0]), MIN(a[1], b[1]), MIN(a[2], b[2]), MIN(a[3], b[3])); +#endif +} + +static inline vec4_t +vec4_max (const vec4_t a, + const vec4_t b) +{ +#ifdef USE_SSE + return _mm_max_ps (a, b); +#else + return vec4_set (MAX(a[0], b[0]), MAX(a[1], b[1]), MAX(a[2], b[2]), MAX(a[3], b[3])); +#endif +} + +static inline vec4_t +vec4_trunc (const vec4_t v) +{ +#ifdef USE_SSE +# ifdef __SSE4_1__ + return _mm_round_ps(v, _MM_FROUND_TRUNC); +# elif defined(__SSE2__) + return _mm_cvtepi32_ps(_mm_cvttps_epi32(v)); +# else + // convert to ints + __m128 in = v; + __m64 lo = _mm_cvttps_pi32(in); + __m64 hi = _mm_cvttps_pi32(_mm_movehl_ps(in, in)); + // convert to floats + __m128 part = _mm_movelh_ps(in, _mm_cvtpi32_ps(in, hi)); + __m128 trunc = _mm_cvtpi32_ps(part, lo); + // clear mmx state + _mm_empty (); + return trunc; +# endif +#else + vec4_t r = { v[0] > 0.0f ? floorf(v[0]) : ceil(v[0]), + v[1] > 0.0f ? floorf(v[1]) : ceil(v[1]), + v[2] > 0.0f ? floorf(v[2]) : ceil(v[2]), + v[3] > 0.0f ? floorf(v[3]) : ceil(v[3]), }; + return r; +#endif +} + +static inline float +vec4_accum (const vec4_t v) +{ +#ifdef USE_SSE + float rv; + __m128 t; +# ifdef __SSE3__ + t = _mm_hadd_ps(v, v); + t = _mm_hadd_ps(t, t); +# else + t = _mm_add_ps(v, _mm_movehl_ps(v, v)); + t = _mm_add_ss(t, _mm_shuffle_ps(t, t, 0x01)); +# endif + _mm_store_ss(&rv, t); + return rv; +#else + return v[0] + v[1] + v[2] + v[3]; +#endif +} + +static inline float +vec4_dot (const vec4_t a, + const vec4_t b) +{ +#if defined(USE_SSE) && defined(__SSE4_1__) + float rv; + __m128 t = _mm_dp_ps(a, b, 0xff); + _mm_store_ss(&rv, t); + return rv; +#else + return vec4_accum(a * b); +#endif +} + +static inline int +vec4_cmplt (const vec4_t a, + const vec4_t b) +{ +#ifdef USE_SSE + __m128 bits = _mm_cmplt_ps(a, b); + int val = _mm_movemask_ps(bits); + return val != 0; +#else + return (a[0] < b[0]) || (a[1] < b[1]) || (a[2] < b[2]) || (a[3] < b[3]); +#endif +} + +#endif /* __VEC_H__ */ |