diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:24:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:24:48 +0000 |
commit | cca66b9ec4e494c1d919bff0f71a820d8afab1fa (patch) | |
tree | 146f39ded1c938019e1ed42d30923c2ac9e86789 /src/display/cairo-templates.h | |
parent | Initial commit. (diff) | |
download | inkscape-upstream/1.2.2.tar.xz inkscape-upstream/1.2.2.zip |
Adding upstream version 1.2.2.upstream/1.2.2upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/display/cairo-templates.h | 721 |
1 files changed, 721 insertions, 0 deletions
diff --git a/src/display/cairo-templates.h b/src/display/cairo-templates.h new file mode 100644 index 0000000..8b7493e --- /dev/null +++ b/src/display/cairo-templates.h @@ -0,0 +1,721 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/** + * @file + * Cairo software blending templates. + *//* + * Authors: + * Krzysztof KosiĆski <tweenk.pl@gmail.com> + * + * Copyright (C) 2010 Authors + * Released under GNU GPL v2+, read the file 'COPYING' for more information. + */ + +#ifndef SEEN_INKSCAPE_DISPLAY_CAIRO_TEMPLATES_H +#define SEEN_INKSCAPE_DISPLAY_CAIRO_TEMPLATES_H + +#ifdef HAVE_CONFIG_H +# include "config.h" // only include where actually required! +#endif + +#include <glib.h> + +#ifdef HAVE_OPENMP +#include <omp.h> +#include "preferences.h" +// single-threaded operation if the number of pixels is below this threshold +static const int OPENMP_THRESHOLD = 2048; +#endif + +#include <algorithm> +#include <cairo.h> +#include <cmath> +#include "display/nr-3dutils.h" +#include "display/cairo-utils.h" + +/** + * Blend two surfaces using the supplied functor. + * This template blends two Cairo image surfaces using a blending functor that takes + * two 32-bit ARGB pixel values and returns a modified 32-bit pixel value. + * Differences in input surface formats are handled transparently. In future, this template + * will also handle software fallback for GL surfaces. + */ +template <typename Blend> +void ink_cairo_surface_blend(cairo_surface_t *in1, cairo_surface_t *in2, cairo_surface_t *out, Blend blend) +{ + cairo_surface_flush(in1); + cairo_surface_flush(in2); + + // ASSUMPTIONS + // 1. Cairo ARGB32 surface strides are always divisible by 4 + // 2. We can only receive CAIRO_FORMAT_ARGB32 or CAIRO_FORMAT_A8 surfaces + // 3. Both surfaces are of the same size + // 4. Output surface is ARGB32 if at least one input is ARGB32 + + int w = cairo_image_surface_get_width(in2); + int h = cairo_image_surface_get_height(in2); + int stride1 = cairo_image_surface_get_stride(in1); + int stride2 = cairo_image_surface_get_stride(in2); + int strideout = cairo_image_surface_get_stride(out); + int bpp1 = cairo_image_surface_get_format(in1) == CAIRO_FORMAT_A8 ? 1 : 4; + int bpp2 = cairo_image_surface_get_format(in2) == CAIRO_FORMAT_A8 ? 1 : 4; + int bppout = std::max(bpp1, bpp2); + + // Check whether we can loop over pixels without taking stride into account. + bool fast_path = true; + fast_path &= (stride1 == w * bpp1); + fast_path &= (stride2 == w * bpp2); + fast_path &= (strideout == w * bppout); + + int limit = w * h; + + guint32 *const in1_data = reinterpret_cast<guint32*>(cairo_image_surface_get_data(in1)); + guint32 *const in2_data = reinterpret_cast<guint32*>(cairo_image_surface_get_data(in2)); + guint32 *const out_data = reinterpret_cast<guint32*>(cairo_image_surface_get_data(out)); + + // NOTE + // OpenMP probably doesn't help much here. + // It would be better to render more than 1 tile at a time. + #if HAVE_OPENMP + Inkscape::Preferences *prefs = Inkscape::Preferences::get(); + int numOfThreads = prefs->getIntLimited("/options/threading/numthreads", omp_get_num_procs(), 1, 256); + if (numOfThreads){} // inform compiler we are using it. + #endif + + // The number of code paths here is evil. + if (bpp1 == 4) { + if (bpp2 == 4) { + if (fast_path) { + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = 0; i < limit; ++i) { + *(out_data + i) = blend(*(in1_data + i), *(in2_data + i)); + } + } else { + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = 0; i < h; ++i) { + guint32 *in1_p = in1_data + i * stride1/4; + guint32 *in2_p = in2_data + i * stride2/4; + guint32 *out_p = out_data + i * strideout/4; + for (int j = 0; j < w; ++j) { + *out_p = blend(*in1_p, *in2_p); + ++in1_p; ++in2_p; ++out_p; + } + } + } + } else { + // bpp2 == 1 + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = 0; i < h; ++i) { + guint32 *in1_p = in1_data + i * stride1/4; + guint8 *in2_p = reinterpret_cast<guint8*>(in2_data) + i * stride2; + guint32 *out_p = out_data + i * strideout/4; + for (int j = 0; j < w; ++j) { + guint32 in2_px = *in2_p; + in2_px <<= 24; + *out_p = blend(*in1_p, in2_px); + ++in1_p; ++in2_p; ++out_p; + } + } + } + } else { + if (bpp2 == 4) { + // bpp1 == 1 + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = 0; i < h; ++i) { + guint8 *in1_p = reinterpret_cast<guint8*>(in1_data) + i * stride1; + guint32 *in2_p = in2_data + i * stride2/4; + guint32 *out_p = out_data + i * strideout/4; + for (int j = 0; j < w; ++j) { + guint32 in1_px = *in1_p; + in1_px <<= 24; + *out_p = blend(in1_px, *in2_p); + ++in1_p; ++in2_p; ++out_p; + } + } + } else { + // bpp1 == 1 && bpp2 == 1 + if (fast_path) { + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = 0; i < limit; ++i) { + guint8 *in1_p = reinterpret_cast<guint8*>(in1_data) + i; + guint8 *in2_p = reinterpret_cast<guint8*>(in2_data) + i; + guint8 *out_p = reinterpret_cast<guint8*>(out_data) + i; + guint32 in1_px = *in1_p; in1_px <<= 24; + guint32 in2_px = *in2_p; in2_px <<= 24; + guint32 out_px = blend(in1_px, in2_px); + *out_p = out_px >> 24; + } + } else { + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = 0; i < h; ++i) { + guint8 *in1_p = reinterpret_cast<guint8*>(in1_data) + i * stride1; + guint8 *in2_p = reinterpret_cast<guint8*>(in2_data) + i * stride2; + guint8 *out_p = reinterpret_cast<guint8*>(out_data) + i * strideout; + for (int j = 0; j < w; ++j) { + guint32 in1_px = *in1_p; in1_px <<= 24; + guint32 in2_px = *in2_p; in2_px <<= 24; + guint32 out_px = blend(in1_px, in2_px); + *out_p = out_px >> 24; + ++in1_p; ++in2_p; ++out_p; + } + } + } + } + } + + cairo_surface_mark_dirty(out); +} + +template <typename Filter> +void ink_cairo_surface_filter(cairo_surface_t *in, cairo_surface_t *out, Filter filter) +{ + cairo_surface_flush(in); + + // ASSUMPTIONS + // 1. Cairo ARGB32 surface strides are always divisible by 4 + // 2. We can only receive CAIRO_FORMAT_ARGB32 or CAIRO_FORMAT_A8 surfaces + // 3. Surfaces have the same dimensions + + int w = cairo_image_surface_get_width(in); + int h = cairo_image_surface_get_height(in); + int stridein = cairo_image_surface_get_stride(in); + int strideout = cairo_image_surface_get_stride(out); + int bppin = cairo_image_surface_get_format(in) == CAIRO_FORMAT_A8 ? 1 : 4; + int bppout = cairo_image_surface_get_format(out) == CAIRO_FORMAT_A8 ? 1 : 4; + int limit = w * h; + + // Check whether we can loop over pixels without taking stride into account. + bool fast_path = true; + fast_path &= (stridein == w * bppin); + fast_path &= (strideout == w * bppout); + + guint32 *const in_data = reinterpret_cast<guint32*>(cairo_image_surface_get_data(in)); + guint32 *const out_data = reinterpret_cast<guint32*>(cairo_image_surface_get_data(out)); + + #if HAVE_OPENMP + Inkscape::Preferences *prefs = Inkscape::Preferences::get(); + int numOfThreads = prefs->getIntLimited("/options/threading/numthreads", omp_get_num_procs(), 1, 256); + if (numOfThreads){} // inform compiler we are using it. + #endif + + // this is provided just in case, to avoid problems with strict aliasing rules + if (in == out) { + if (bppin == 4) { + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = 0; i < limit; ++i) { + *(in_data + i) = filter(*(in_data + i)); + } + } else { + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = 0; i < limit; ++i) { + guint8 *in_p = reinterpret_cast<guint8*>(in_data) + i; + guint32 in_px = *in_p; in_px <<= 24; + guint32 out_px = filter(in_px); + *in_p = out_px >> 24; + } + } + cairo_surface_mark_dirty(out); + return; + } + + if (bppin == 4) { + if (bppout == 4) { + // bppin == 4, bppout == 4 + if (fast_path) { + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = 0; i < limit; ++i) { + *(out_data + i) = filter(*(in_data + i)); + } + } else { + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = 0; i < h; ++i) { + guint32 *in_p = in_data + i * stridein/4; + guint32 *out_p = out_data + i * strideout/4; + for (int j = 0; j < w; ++j) { + *out_p = filter(*in_p); + ++in_p; ++out_p; + } + } + } + } else { + // bppin == 4, bppout == 1 + // we use this path with COLORMATRIX_LUMINANCETOALPHA + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = 0; i < h; ++i) { + guint32 *in_p = in_data + i * stridein/4; + guint8 *out_p = reinterpret_cast<guint8*>(out_data) + i * strideout; + for (int j = 0; j < w; ++j) { + guint32 out_px = filter(*in_p); + *out_p = out_px >> 24; + ++in_p; ++out_p; + } + } + } + } else if (bppout == 1) { + // bppin == 1, bppout == 1 + if (fast_path) { + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = 0; i < limit; ++i) { + guint8 *in_p = reinterpret_cast<guint8*>(in_data) + i; + guint8 *out_p = reinterpret_cast<guint8*>(out_data) + i; + guint32 in_px = *in_p; in_px <<= 24; + guint32 out_px = filter(in_px); + *out_p = out_px >> 24; + } + } else { + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = 0; i < h; ++i) { + guint8 *in_p = reinterpret_cast<guint8*>(in_data) + i * stridein; + guint8 *out_p = reinterpret_cast<guint8*>(out_data) + i * strideout; + for (int j = 0; j < w; ++j) { + guint32 in_px = *in_p; in_px <<= 24; + guint32 out_px = filter(in_px); + *out_p = out_px >> 24; + ++in_p; ++out_p; + } + } + } + } else { + // bppin == 1, bppout == 4 + // used in COLORMATRIX_MATRIX when in is NR_FILTER_SOURCEALPHA + if (fast_path) { + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = 0; i < limit; ++i) { + guint8 in_p = reinterpret_cast<guint8*>(in_data)[i]; + out_data[i] = filter(guint32(in_p) << 24); + } + } else { + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = 0; i < h; ++i) { + guint8 *in_p = reinterpret_cast<guint8*>(in_data) + i * stridein; + guint32 *out_p = out_data + i * strideout/4; + for (int j = 0; j < w; ++j) { + out_p[j] = filter(guint32(in_p[j]) << 24); + } + } + } + } + cairo_surface_mark_dirty(out); +} + + +/** + * Synthesize surface pixels based on their position. + * This template accepts a functor that gets called with the x and y coordinates of the pixels, + * given as integers. + * @param out Output surface + * @param out_area The region of the output surface that should be synthesized + * @param synth Synthesis functor + */ +template <typename Synth> +void ink_cairo_surface_synthesize(cairo_surface_t *out, cairo_rectangle_t const &out_area, Synth synth) +{ + // ASSUMPTIONS + // 1. Cairo ARGB32 surface strides are always divisible by 4 + // 2. We can only receive CAIRO_FORMAT_ARGB32 or CAIRO_FORMAT_A8 surfaces + + int w = out_area.width; + int h = out_area.height; + int strideout = cairo_image_surface_get_stride(out); + int bppout = cairo_image_surface_get_format(out) == CAIRO_FORMAT_A8 ? 1 : 4; + // NOTE: fast path is not used, because we would need 2 divisions to get pixel indices + + unsigned char *out_data = cairo_image_surface_get_data(out); + + #if HAVE_OPENMP + int limit = w * h; + Inkscape::Preferences *prefs = Inkscape::Preferences::get(); + int numOfThreads = prefs->getIntLimited("/options/threading/numthreads", omp_get_num_procs(), 1, 256); + if (numOfThreads){} // inform compiler we are using it. + #endif + + if (bppout == 4) { + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = out_area.y; i < h; ++i) { + guint32 *out_p = reinterpret_cast<guint32*>(out_data + i * strideout); + for (int j = out_area.x; j < w; ++j) { + *out_p = synth(j, i); + ++out_p; + } + } + } else { + // bppout == 1 + #if HAVE_OPENMP + #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads) + #endif + for (int i = out_area.y; i < h; ++i) { + guint8 *out_p = out_data + i * strideout; + for (int j = out_area.x; j < w; ++j) { + guint32 out_px = synth(j, i); + *out_p = out_px >> 24; + ++out_p; + } + } + } + cairo_surface_mark_dirty(out); +} + +template <typename Synth> +void ink_cairo_surface_synthesize(cairo_surface_t *out, Synth synth) +{ + int w = cairo_image_surface_get_width(out); + int h = cairo_image_surface_get_height(out); + + cairo_rectangle_t area; + area.x = 0; + area.y = 0; + area.width = w; + area.height = h; + + ink_cairo_surface_synthesize(out, area, synth); +} + +struct SurfaceSynth { + SurfaceSynth(cairo_surface_t *surface) + : _px(cairo_image_surface_get_data(surface)) + , _w(cairo_image_surface_get_width(surface)) + , _h(cairo_image_surface_get_height(surface)) + , _stride(cairo_image_surface_get_stride(surface)) + , _alpha(cairo_surface_get_content(surface) == CAIRO_CONTENT_ALPHA) + { + cairo_surface_flush(surface); + } + + guint32 pixelAt(int x, int y) const { + if (_alpha) { + unsigned char *px = _px + y*_stride + x; + return *px << 24; + } else { + unsigned char *px = _px + y*_stride + x*4; + return *reinterpret_cast<guint32*>(px); + } + } + guint32 alphaAt(int x, int y) const { + if (_alpha) { + unsigned char *px = _px + y*_stride + x; + return *px; + } else { + unsigned char *px = _px + y*_stride + x*4; + guint32 p = *reinterpret_cast<guint32*>(px); + return (p & 0xff000000) >> 24; + } + } + + // retrieve a pixel value with bilinear interpolation + guint32 pixelAt(double x, double y) const { + if (_alpha) { + return alphaAt(x, y) << 24; + } + + double xf = floor(x), yf = floor(y); + int xi = xf, yi = yf; + guint32 xif = round((x - xf) * 255), yif = round((y - yf) * 255); + guint32 p00, p01, p10, p11; + + unsigned char *pxi = _px + yi*_stride + xi*4; + guint32 *pxu = reinterpret_cast<guint32*>(pxi); + guint32 *pxl = reinterpret_cast<guint32*>(pxi + _stride); + p00 = *pxu; p10 = *(pxu + 1); + p01 = *pxl; p11 = *(pxl + 1); + + guint32 comp[4]; + + for (unsigned i = 0; i < 4; ++i) { + guint32 shift = i*8; + guint32 mask = 0xff << shift; + guint32 c00 = (p00 & mask) >> shift; + guint32 c10 = (p10 & mask) >> shift; + guint32 c01 = (p01 & mask) >> shift; + guint32 c11 = (p11 & mask) >> shift; + + guint32 iu = (255-xif) * c00 + xif * c10; + guint32 il = (255-xif) * c01 + xif * c11; + comp[i] = (255-yif) * iu + yif * il; + comp[i] = (comp[i] + (255*255/2)) / (255*255); + } + + guint32 result = comp[0] | (comp[1] << 8) | (comp[2] << 16) | (comp[3] << 24); + return result; + } + + // retrieve an alpha value with bilinear interpolation + guint32 alphaAt(double x, double y) const { + double xf = floor(x), yf = floor(y); + int xi = xf, yi = yf; + guint32 xif = round((x - xf) * 255), yif = round((y - yf) * 255); + guint32 p00, p01, p10, p11; + if (_alpha) { + unsigned char *pxu = _px + yi*_stride + xi; + unsigned char *pxl = pxu + _stride; + p00 = *pxu; p10 = *(pxu + 1); + p01 = *pxl; p11 = *(pxl + 1); + } else { + unsigned char *pxi = _px + yi*_stride + xi*4; + guint32 *pxu = reinterpret_cast<guint32*>(pxi); + guint32 *pxl = reinterpret_cast<guint32*>(pxi + _stride); + p00 = (*pxu & 0xff000000) >> 24; p10 = (*(pxu + 1) & 0xff000000) >> 24; + p01 = (*pxl & 0xff000000) >> 24; p11 = (*(pxl + 1) & 0xff000000) >> 24; + } + guint32 iu = (255-xif) * p00 + xif * p10; + guint32 il = (255-xif) * p01 + xif * p11; + guint32 result = (255-yif) * iu + yif * il; + result = (result + (255*255/2)) / (255*255); + return result; + } + + // compute surface normal at given coordinates using 3x3 Sobel gradient filter + NR::Fvector surfaceNormalAt(int x, int y, double scale) const { + // Below there are some multiplies by zero. They will be optimized out. + // Do not remove them, because they improve readability. + // NOTE: fetching using alphaAt is slightly lazy. + NR::Fvector normal; + double fx = -scale/255.0, fy = -scale/255.0; + normal[Z_3D] = 1.0; + if (G_UNLIKELY(x == 0)) { + // leftmost column + if (G_UNLIKELY(y == 0)) { + // upper left corner + fx *= (2.0/3.0); + fy *= (2.0/3.0); + double p00 = alphaAt(x,y), p10 = alphaAt(x+1, y), + p01 = alphaAt(x,y+1), p11 = alphaAt(x+1, y+1); + normal[X_3D] = + -2.0 * p00 +2.0 * p10 + -1.0 * p01 +1.0 * p11; + normal[Y_3D] = + -2.0 * p00 -1.0 * p10 + +2.0 * p01 +1.0 * p11; + } else if (G_UNLIKELY(y == (_h - 1))) { + // lower left corner + fx *= (2.0/3.0); + fy *= (2.0/3.0); + double p00 = alphaAt(x,y-1), p10 = alphaAt(x+1, y-1), + p01 = alphaAt(x,y ), p11 = alphaAt(x+1, y); + normal[X_3D] = + -1.0 * p00 +1.0 * p10 + -2.0 * p01 +2.0 * p11; + normal[Y_3D] = + -2.0 * p00 -1.0 * p10 + +2.0 * p01 +1.0 * p11; + } else { + // leftmost column + fx *= (1.0/2.0); + fy *= (1.0/3.0); + double p00 = alphaAt(x, y-1), p10 = alphaAt(x+1, y-1), + p01 = alphaAt(x, y ), p11 = alphaAt(x+1, y ), + p02 = alphaAt(x, y+1), p12 = alphaAt(x+1, y+1); + normal[X_3D] = + -1.0 * p00 +1.0 * p10 + -2.0 * p01 +2.0 * p11 + -1.0 * p02 +1.0 * p12; + normal[Y_3D] = + -2.0 * p00 -1.0 * p10 + +0.0 * p01 +0.0 * p11 // this will be optimized out + +2.0 * p02 +1.0 * p12; + } + } else if (G_UNLIKELY(x == (_w - 1))) { + // rightmost column + if (G_UNLIKELY(y == 0)) { + // top right corner + fx *= (2.0/3.0); + fy *= (2.0/3.0); + double p00 = alphaAt(x-1,y), p10 = alphaAt(x, y), + p01 = alphaAt(x-1,y+1), p11 = alphaAt(x, y+1); + normal[X_3D] = + -2.0 * p00 +2.0 * p10 + -1.0 * p01 +1.0 * p11; + normal[Y_3D] = + -1.0 * p00 -2.0 * p10 + +1.0 * p01 +2.0 * p11; + } else if (G_UNLIKELY(y == (_h - 1))) { + // bottom right corner + fx *= (2.0/3.0); + fy *= (2.0/3.0); + double p00 = alphaAt(x-1,y-1), p10 = alphaAt(x, y-1), + p01 = alphaAt(x-1,y ), p11 = alphaAt(x, y); + normal[X_3D] = + -1.0 * p00 +1.0 * p10 + -2.0 * p01 +2.0 * p11; + normal[Y_3D] = + -1.0 * p00 -2.0 * p10 + +1.0 * p01 +2.0 * p11; + } else { + // rightmost column + fx *= (1.0/2.0); + fy *= (1.0/3.0); + double p00 = alphaAt(x-1, y-1), p10 = alphaAt(x, y-1), + p01 = alphaAt(x-1, y ), p11 = alphaAt(x, y ), + p02 = alphaAt(x-1, y+1), p12 = alphaAt(x, y+1); + normal[X_3D] = + -1.0 * p00 +1.0 * p10 + -2.0 * p01 +2.0 * p11 + -1.0 * p02 +1.0 * p12; + normal[Y_3D] = + -1.0 * p00 -2.0 * p10 + +0.0 * p01 +0.0 * p11 + +1.0 * p02 +2.0 * p12; + } + } else { + // interior + if (G_UNLIKELY(y == 0)) { + // top row + fx *= (1.0/3.0); + fy *= (1.0/2.0); + double p00 = alphaAt(x-1, y ), p10 = alphaAt(x, y ), p20 = alphaAt(x+1, y ), + p01 = alphaAt(x-1, y+1), p11 = alphaAt(x, y+1), p21 = alphaAt(x+1, y+1); + normal[X_3D] = + -2.0 * p00 +0.0 * p10 +2.0 * p20 + -1.0 * p01 +0.0 * p11 +1.0 * p21; + normal[Y_3D] = + -1.0 * p00 -2.0 * p10 -1.0 * p20 + +1.0 * p01 +2.0 * p11 +1.0 * p21; + } else if (G_UNLIKELY(y == (_h - 1))) { + // bottom row + fx *= (1.0/3.0); + fy *= (1.0/2.0); + double p00 = alphaAt(x-1, y-1), p10 = alphaAt(x, y-1), p20 = alphaAt(x+1, y-1), + p01 = alphaAt(x-1, y ), p11 = alphaAt(x, y ), p21 = alphaAt(x+1, y ); + normal[X_3D] = + -1.0 * p00 +0.0 * p10 +1.0 * p20 + -2.0 * p01 +0.0 * p11 +2.0 * p21; + normal[Y_3D] = + -1.0 * p00 -2.0 * p10 -1.0 * p20 + +1.0 * p01 +2.0 * p11 +1.0 * p21; + } else { + // interior pixels + // note: p11 is actually unused, so we don't fetch its value + fx *= (1.0/4.0); + fy *= (1.0/4.0); + double p00 = alphaAt(x-1, y-1), p10 = alphaAt(x, y-1), p20 = alphaAt(x+1, y-1), + p01 = alphaAt(x-1, y ), p11 = 0.0, p21 = alphaAt(x+1, y ), + p02 = alphaAt(x-1, y+1), p12 = alphaAt(x, y+1), p22 = alphaAt(x+1, y+1); + normal[X_3D] = + -1.0 * p00 +0.0 * p10 +1.0 * p20 + -2.0 * p01 +0.0 * p11 +2.0 * p21 + -1.0 * p02 +0.0 * p12 +1.0 * p22; + normal[Y_3D] = + -1.0 * p00 -2.0 * p10 -1.0 * p20 + +0.0 * p01 +0.0 * p11 +0.0 * p21 + +1.0 * p02 +2.0 * p12 +1.0 * p22; + } + } + normal[X_3D] *= fx; + normal[Y_3D] *= fy; + NR::normalize_vector(normal); + return normal; + } + + unsigned char *_px; + int _w, _h, _stride; + bool _alpha; +}; + +/* +// simple pixel accessor for image surface that handles different edge wrapping modes +class PixelAccessor { +public: + typedef PixelAccessor self; + enum EdgeMode { + EDGE_PAD, + EDGE_WRAP, + EDGE_ZERO + }; + + PixelAccessor(cairo_surface_t *s, EdgeMode e) + : _surface(s) + , _px(cairo_image_surface_get_data(s)) + , _x(0), _y(0) + , _w(cairo_image_surface_get_width(s)) + , _h(cairo_image_surface_get_height(s)) + , _stride(cairo_image_surface_get_stride(s)) + , _edge_mode(e) + , _alpha(cairo_image_surface_get_format(s) == CAIRO_FORMAT_A8) + {} + + guint32 pixelAt(int x, int y) { + // This is a lot of ifs for a single pixel access. However, branch prediction + // should help us a lot, as the result of ifs is always the same for a single image. + int real_x = x, real_y = y; + switch (_edge_mode) { + case EDGE_PAD: + real_x = CLAMP(x, 0, _w-1); + real_y = CLAMP(y, 0, _h-1); + break; + case EDGE_WRAP: + real_x %= _w; + real_y %= _h; + break; + case EDGE_ZERO: + default: + if (x < 0 || x >= _w || y < 0 || y >= _h) + return 0; + break; + } + if (_alpha) { + return *(_px + real_y*_stride + real_x) << 24; + } else { + guint32 *px = reinterpret_cast<guint32*>(_px +real_y*_stride + real_x*4); + return *px; + } + } +private: + cairo_surface_t *_surface; + guint8 *_px; + int _x, _y, _w, _h, _stride; + EdgeMode _edge_mode; + bool _alpha; +};*/ + +// Some helpers for pixel manipulation +G_GNUC_CONST inline gint32 +pxclamp(gint32 v, gint32 low, gint32 high) { + // NOTE: it is possible to write a "branchless" clamping operation. + // However, it will be slower than this function, because the code below + // is compiled to conditional moves. + if (v < low) return low; + if (v > high) return high; + return v; +} + +#endif +/* + Local Variables: + mode:c++ + c-file-style:"stroustrup" + c-file-offsets:((innamespace . 0)(inline-open . 0)(case-label . +)) + indent-tabs-mode:nil + fill-column:99 + End: +*/ +// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:fileencoding=utf-8:textwidth=99 : |