summaryrefslogtreecommitdiffstats
path: root/src/display/cairo-templates.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:24:48 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:24:48 +0000
commitcca66b9ec4e494c1d919bff0f71a820d8afab1fa (patch)
tree146f39ded1c938019e1ed42d30923c2ac9e86789 /src/display/cairo-templates.h
parentInitial commit. (diff)
downloadinkscape-upstream.tar.xz
inkscape-upstream.zip
Adding upstream version 1.2.2.upstream/1.2.2upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/display/cairo-templates.h721
1 files changed, 721 insertions, 0 deletions
diff --git a/src/display/cairo-templates.h b/src/display/cairo-templates.h
new file mode 100644
index 0000000..8b7493e
--- /dev/null
+++ b/src/display/cairo-templates.h
@@ -0,0 +1,721 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/**
+ * @file
+ * Cairo software blending templates.
+ *//*
+ * Authors:
+ * Krzysztof KosiƄski <tweenk.pl@gmail.com>
+ *
+ * Copyright (C) 2010 Authors
+ * Released under GNU GPL v2+, read the file 'COPYING' for more information.
+ */
+
+#ifndef SEEN_INKSCAPE_DISPLAY_CAIRO_TEMPLATES_H
+#define SEEN_INKSCAPE_DISPLAY_CAIRO_TEMPLATES_H
+
+#ifdef HAVE_CONFIG_H
+# include "config.h" // only include where actually required!
+#endif
+
+#include <glib.h>
+
+#ifdef HAVE_OPENMP
+#include <omp.h>
+#include "preferences.h"
+// single-threaded operation if the number of pixels is below this threshold
+static const int OPENMP_THRESHOLD = 2048;
+#endif
+
+#include <algorithm>
+#include <cairo.h>
+#include <cmath>
+#include "display/nr-3dutils.h"
+#include "display/cairo-utils.h"
+
+/**
+ * Blend two surfaces using the supplied functor.
+ * This template blends two Cairo image surfaces using a blending functor that takes
+ * two 32-bit ARGB pixel values and returns a modified 32-bit pixel value.
+ * Differences in input surface formats are handled transparently. In future, this template
+ * will also handle software fallback for GL surfaces.
+ */
+template <typename Blend>
+void ink_cairo_surface_blend(cairo_surface_t *in1, cairo_surface_t *in2, cairo_surface_t *out, Blend blend)
+{
+ cairo_surface_flush(in1);
+ cairo_surface_flush(in2);
+
+ // ASSUMPTIONS
+ // 1. Cairo ARGB32 surface strides are always divisible by 4
+ // 2. We can only receive CAIRO_FORMAT_ARGB32 or CAIRO_FORMAT_A8 surfaces
+ // 3. Both surfaces are of the same size
+ // 4. Output surface is ARGB32 if at least one input is ARGB32
+
+ int w = cairo_image_surface_get_width(in2);
+ int h = cairo_image_surface_get_height(in2);
+ int stride1 = cairo_image_surface_get_stride(in1);
+ int stride2 = cairo_image_surface_get_stride(in2);
+ int strideout = cairo_image_surface_get_stride(out);
+ int bpp1 = cairo_image_surface_get_format(in1) == CAIRO_FORMAT_A8 ? 1 : 4;
+ int bpp2 = cairo_image_surface_get_format(in2) == CAIRO_FORMAT_A8 ? 1 : 4;
+ int bppout = std::max(bpp1, bpp2);
+
+ // Check whether we can loop over pixels without taking stride into account.
+ bool fast_path = true;
+ fast_path &= (stride1 == w * bpp1);
+ fast_path &= (stride2 == w * bpp2);
+ fast_path &= (strideout == w * bppout);
+
+ int limit = w * h;
+
+ guint32 *const in1_data = reinterpret_cast<guint32*>(cairo_image_surface_get_data(in1));
+ guint32 *const in2_data = reinterpret_cast<guint32*>(cairo_image_surface_get_data(in2));
+ guint32 *const out_data = reinterpret_cast<guint32*>(cairo_image_surface_get_data(out));
+
+ // NOTE
+ // OpenMP probably doesn't help much here.
+ // It would be better to render more than 1 tile at a time.
+ #if HAVE_OPENMP
+ Inkscape::Preferences *prefs = Inkscape::Preferences::get();
+ int numOfThreads = prefs->getIntLimited("/options/threading/numthreads", omp_get_num_procs(), 1, 256);
+ if (numOfThreads){} // inform compiler we are using it.
+ #endif
+
+ // The number of code paths here is evil.
+ if (bpp1 == 4) {
+ if (bpp2 == 4) {
+ if (fast_path) {
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = 0; i < limit; ++i) {
+ *(out_data + i) = blend(*(in1_data + i), *(in2_data + i));
+ }
+ } else {
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = 0; i < h; ++i) {
+ guint32 *in1_p = in1_data + i * stride1/4;
+ guint32 *in2_p = in2_data + i * stride2/4;
+ guint32 *out_p = out_data + i * strideout/4;
+ for (int j = 0; j < w; ++j) {
+ *out_p = blend(*in1_p, *in2_p);
+ ++in1_p; ++in2_p; ++out_p;
+ }
+ }
+ }
+ } else {
+ // bpp2 == 1
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = 0; i < h; ++i) {
+ guint32 *in1_p = in1_data + i * stride1/4;
+ guint8 *in2_p = reinterpret_cast<guint8*>(in2_data) + i * stride2;
+ guint32 *out_p = out_data + i * strideout/4;
+ for (int j = 0; j < w; ++j) {
+ guint32 in2_px = *in2_p;
+ in2_px <<= 24;
+ *out_p = blend(*in1_p, in2_px);
+ ++in1_p; ++in2_p; ++out_p;
+ }
+ }
+ }
+ } else {
+ if (bpp2 == 4) {
+ // bpp1 == 1
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = 0; i < h; ++i) {
+ guint8 *in1_p = reinterpret_cast<guint8*>(in1_data) + i * stride1;
+ guint32 *in2_p = in2_data + i * stride2/4;
+ guint32 *out_p = out_data + i * strideout/4;
+ for (int j = 0; j < w; ++j) {
+ guint32 in1_px = *in1_p;
+ in1_px <<= 24;
+ *out_p = blend(in1_px, *in2_p);
+ ++in1_p; ++in2_p; ++out_p;
+ }
+ }
+ } else {
+ // bpp1 == 1 && bpp2 == 1
+ if (fast_path) {
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = 0; i < limit; ++i) {
+ guint8 *in1_p = reinterpret_cast<guint8*>(in1_data) + i;
+ guint8 *in2_p = reinterpret_cast<guint8*>(in2_data) + i;
+ guint8 *out_p = reinterpret_cast<guint8*>(out_data) + i;
+ guint32 in1_px = *in1_p; in1_px <<= 24;
+ guint32 in2_px = *in2_p; in2_px <<= 24;
+ guint32 out_px = blend(in1_px, in2_px);
+ *out_p = out_px >> 24;
+ }
+ } else {
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = 0; i < h; ++i) {
+ guint8 *in1_p = reinterpret_cast<guint8*>(in1_data) + i * stride1;
+ guint8 *in2_p = reinterpret_cast<guint8*>(in2_data) + i * stride2;
+ guint8 *out_p = reinterpret_cast<guint8*>(out_data) + i * strideout;
+ for (int j = 0; j < w; ++j) {
+ guint32 in1_px = *in1_p; in1_px <<= 24;
+ guint32 in2_px = *in2_p; in2_px <<= 24;
+ guint32 out_px = blend(in1_px, in2_px);
+ *out_p = out_px >> 24;
+ ++in1_p; ++in2_p; ++out_p;
+ }
+ }
+ }
+ }
+ }
+
+ cairo_surface_mark_dirty(out);
+}
+
+template <typename Filter>
+void ink_cairo_surface_filter(cairo_surface_t *in, cairo_surface_t *out, Filter filter)
+{
+ cairo_surface_flush(in);
+
+ // ASSUMPTIONS
+ // 1. Cairo ARGB32 surface strides are always divisible by 4
+ // 2. We can only receive CAIRO_FORMAT_ARGB32 or CAIRO_FORMAT_A8 surfaces
+ // 3. Surfaces have the same dimensions
+
+ int w = cairo_image_surface_get_width(in);
+ int h = cairo_image_surface_get_height(in);
+ int stridein = cairo_image_surface_get_stride(in);
+ int strideout = cairo_image_surface_get_stride(out);
+ int bppin = cairo_image_surface_get_format(in) == CAIRO_FORMAT_A8 ? 1 : 4;
+ int bppout = cairo_image_surface_get_format(out) == CAIRO_FORMAT_A8 ? 1 : 4;
+ int limit = w * h;
+
+ // Check whether we can loop over pixels without taking stride into account.
+ bool fast_path = true;
+ fast_path &= (stridein == w * bppin);
+ fast_path &= (strideout == w * bppout);
+
+ guint32 *const in_data = reinterpret_cast<guint32*>(cairo_image_surface_get_data(in));
+ guint32 *const out_data = reinterpret_cast<guint32*>(cairo_image_surface_get_data(out));
+
+ #if HAVE_OPENMP
+ Inkscape::Preferences *prefs = Inkscape::Preferences::get();
+ int numOfThreads = prefs->getIntLimited("/options/threading/numthreads", omp_get_num_procs(), 1, 256);
+ if (numOfThreads){} // inform compiler we are using it.
+ #endif
+
+ // this is provided just in case, to avoid problems with strict aliasing rules
+ if (in == out) {
+ if (bppin == 4) {
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = 0; i < limit; ++i) {
+ *(in_data + i) = filter(*(in_data + i));
+ }
+ } else {
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = 0; i < limit; ++i) {
+ guint8 *in_p = reinterpret_cast<guint8*>(in_data) + i;
+ guint32 in_px = *in_p; in_px <<= 24;
+ guint32 out_px = filter(in_px);
+ *in_p = out_px >> 24;
+ }
+ }
+ cairo_surface_mark_dirty(out);
+ return;
+ }
+
+ if (bppin == 4) {
+ if (bppout == 4) {
+ // bppin == 4, bppout == 4
+ if (fast_path) {
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = 0; i < limit; ++i) {
+ *(out_data + i) = filter(*(in_data + i));
+ }
+ } else {
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = 0; i < h; ++i) {
+ guint32 *in_p = in_data + i * stridein/4;
+ guint32 *out_p = out_data + i * strideout/4;
+ for (int j = 0; j < w; ++j) {
+ *out_p = filter(*in_p);
+ ++in_p; ++out_p;
+ }
+ }
+ }
+ } else {
+ // bppin == 4, bppout == 1
+ // we use this path with COLORMATRIX_LUMINANCETOALPHA
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = 0; i < h; ++i) {
+ guint32 *in_p = in_data + i * stridein/4;
+ guint8 *out_p = reinterpret_cast<guint8*>(out_data) + i * strideout;
+ for (int j = 0; j < w; ++j) {
+ guint32 out_px = filter(*in_p);
+ *out_p = out_px >> 24;
+ ++in_p; ++out_p;
+ }
+ }
+ }
+ } else if (bppout == 1) {
+ // bppin == 1, bppout == 1
+ if (fast_path) {
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = 0; i < limit; ++i) {
+ guint8 *in_p = reinterpret_cast<guint8*>(in_data) + i;
+ guint8 *out_p = reinterpret_cast<guint8*>(out_data) + i;
+ guint32 in_px = *in_p; in_px <<= 24;
+ guint32 out_px = filter(in_px);
+ *out_p = out_px >> 24;
+ }
+ } else {
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = 0; i < h; ++i) {
+ guint8 *in_p = reinterpret_cast<guint8*>(in_data) + i * stridein;
+ guint8 *out_p = reinterpret_cast<guint8*>(out_data) + i * strideout;
+ for (int j = 0; j < w; ++j) {
+ guint32 in_px = *in_p; in_px <<= 24;
+ guint32 out_px = filter(in_px);
+ *out_p = out_px >> 24;
+ ++in_p; ++out_p;
+ }
+ }
+ }
+ } else {
+ // bppin == 1, bppout == 4
+ // used in COLORMATRIX_MATRIX when in is NR_FILTER_SOURCEALPHA
+ if (fast_path) {
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = 0; i < limit; ++i) {
+ guint8 in_p = reinterpret_cast<guint8*>(in_data)[i];
+ out_data[i] = filter(guint32(in_p) << 24);
+ }
+ } else {
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = 0; i < h; ++i) {
+ guint8 *in_p = reinterpret_cast<guint8*>(in_data) + i * stridein;
+ guint32 *out_p = out_data + i * strideout/4;
+ for (int j = 0; j < w; ++j) {
+ out_p[j] = filter(guint32(in_p[j]) << 24);
+ }
+ }
+ }
+ }
+ cairo_surface_mark_dirty(out);
+}
+
+
+/**
+ * Synthesize surface pixels based on their position.
+ * This template accepts a functor that gets called with the x and y coordinates of the pixels,
+ * given as integers.
+ * @param out Output surface
+ * @param out_area The region of the output surface that should be synthesized
+ * @param synth Synthesis functor
+ */
+template <typename Synth>
+void ink_cairo_surface_synthesize(cairo_surface_t *out, cairo_rectangle_t const &out_area, Synth synth)
+{
+ // ASSUMPTIONS
+ // 1. Cairo ARGB32 surface strides are always divisible by 4
+ // 2. We can only receive CAIRO_FORMAT_ARGB32 or CAIRO_FORMAT_A8 surfaces
+
+ int w = out_area.width;
+ int h = out_area.height;
+ int strideout = cairo_image_surface_get_stride(out);
+ int bppout = cairo_image_surface_get_format(out) == CAIRO_FORMAT_A8 ? 1 : 4;
+ // NOTE: fast path is not used, because we would need 2 divisions to get pixel indices
+
+ unsigned char *out_data = cairo_image_surface_get_data(out);
+
+ #if HAVE_OPENMP
+ int limit = w * h;
+ Inkscape::Preferences *prefs = Inkscape::Preferences::get();
+ int numOfThreads = prefs->getIntLimited("/options/threading/numthreads", omp_get_num_procs(), 1, 256);
+ if (numOfThreads){} // inform compiler we are using it.
+ #endif
+
+ if (bppout == 4) {
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = out_area.y; i < h; ++i) {
+ guint32 *out_p = reinterpret_cast<guint32*>(out_data + i * strideout);
+ for (int j = out_area.x; j < w; ++j) {
+ *out_p = synth(j, i);
+ ++out_p;
+ }
+ }
+ } else {
+ // bppout == 1
+ #if HAVE_OPENMP
+ #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(numOfThreads)
+ #endif
+ for (int i = out_area.y; i < h; ++i) {
+ guint8 *out_p = out_data + i * strideout;
+ for (int j = out_area.x; j < w; ++j) {
+ guint32 out_px = synth(j, i);
+ *out_p = out_px >> 24;
+ ++out_p;
+ }
+ }
+ }
+ cairo_surface_mark_dirty(out);
+}
+
+template <typename Synth>
+void ink_cairo_surface_synthesize(cairo_surface_t *out, Synth synth)
+{
+ int w = cairo_image_surface_get_width(out);
+ int h = cairo_image_surface_get_height(out);
+
+ cairo_rectangle_t area;
+ area.x = 0;
+ area.y = 0;
+ area.width = w;
+ area.height = h;
+
+ ink_cairo_surface_synthesize(out, area, synth);
+}
+
+struct SurfaceSynth {
+ SurfaceSynth(cairo_surface_t *surface)
+ : _px(cairo_image_surface_get_data(surface))
+ , _w(cairo_image_surface_get_width(surface))
+ , _h(cairo_image_surface_get_height(surface))
+ , _stride(cairo_image_surface_get_stride(surface))
+ , _alpha(cairo_surface_get_content(surface) == CAIRO_CONTENT_ALPHA)
+ {
+ cairo_surface_flush(surface);
+ }
+
+ guint32 pixelAt(int x, int y) const {
+ if (_alpha) {
+ unsigned char *px = _px + y*_stride + x;
+ return *px << 24;
+ } else {
+ unsigned char *px = _px + y*_stride + x*4;
+ return *reinterpret_cast<guint32*>(px);
+ }
+ }
+ guint32 alphaAt(int x, int y) const {
+ if (_alpha) {
+ unsigned char *px = _px + y*_stride + x;
+ return *px;
+ } else {
+ unsigned char *px = _px + y*_stride + x*4;
+ guint32 p = *reinterpret_cast<guint32*>(px);
+ return (p & 0xff000000) >> 24;
+ }
+ }
+
+ // retrieve a pixel value with bilinear interpolation
+ guint32 pixelAt(double x, double y) const {
+ if (_alpha) {
+ return alphaAt(x, y) << 24;
+ }
+
+ double xf = floor(x), yf = floor(y);
+ int xi = xf, yi = yf;
+ guint32 xif = round((x - xf) * 255), yif = round((y - yf) * 255);
+ guint32 p00, p01, p10, p11;
+
+ unsigned char *pxi = _px + yi*_stride + xi*4;
+ guint32 *pxu = reinterpret_cast<guint32*>(pxi);
+ guint32 *pxl = reinterpret_cast<guint32*>(pxi + _stride);
+ p00 = *pxu; p10 = *(pxu + 1);
+ p01 = *pxl; p11 = *(pxl + 1);
+
+ guint32 comp[4];
+
+ for (unsigned i = 0; i < 4; ++i) {
+ guint32 shift = i*8;
+ guint32 mask = 0xff << shift;
+ guint32 c00 = (p00 & mask) >> shift;
+ guint32 c10 = (p10 & mask) >> shift;
+ guint32 c01 = (p01 & mask) >> shift;
+ guint32 c11 = (p11 & mask) >> shift;
+
+ guint32 iu = (255-xif) * c00 + xif * c10;
+ guint32 il = (255-xif) * c01 + xif * c11;
+ comp[i] = (255-yif) * iu + yif * il;
+ comp[i] = (comp[i] + (255*255/2)) / (255*255);
+ }
+
+ guint32 result = comp[0] | (comp[1] << 8) | (comp[2] << 16) | (comp[3] << 24);
+ return result;
+ }
+
+ // retrieve an alpha value with bilinear interpolation
+ guint32 alphaAt(double x, double y) const {
+ double xf = floor(x), yf = floor(y);
+ int xi = xf, yi = yf;
+ guint32 xif = round((x - xf) * 255), yif = round((y - yf) * 255);
+ guint32 p00, p01, p10, p11;
+ if (_alpha) {
+ unsigned char *pxu = _px + yi*_stride + xi;
+ unsigned char *pxl = pxu + _stride;
+ p00 = *pxu; p10 = *(pxu + 1);
+ p01 = *pxl; p11 = *(pxl + 1);
+ } else {
+ unsigned char *pxi = _px + yi*_stride + xi*4;
+ guint32 *pxu = reinterpret_cast<guint32*>(pxi);
+ guint32 *pxl = reinterpret_cast<guint32*>(pxi + _stride);
+ p00 = (*pxu & 0xff000000) >> 24; p10 = (*(pxu + 1) & 0xff000000) >> 24;
+ p01 = (*pxl & 0xff000000) >> 24; p11 = (*(pxl + 1) & 0xff000000) >> 24;
+ }
+ guint32 iu = (255-xif) * p00 + xif * p10;
+ guint32 il = (255-xif) * p01 + xif * p11;
+ guint32 result = (255-yif) * iu + yif * il;
+ result = (result + (255*255/2)) / (255*255);
+ return result;
+ }
+
+ // compute surface normal at given coordinates using 3x3 Sobel gradient filter
+ NR::Fvector surfaceNormalAt(int x, int y, double scale) const {
+ // Below there are some multiplies by zero. They will be optimized out.
+ // Do not remove them, because they improve readability.
+ // NOTE: fetching using alphaAt is slightly lazy.
+ NR::Fvector normal;
+ double fx = -scale/255.0, fy = -scale/255.0;
+ normal[Z_3D] = 1.0;
+ if (G_UNLIKELY(x == 0)) {
+ // leftmost column
+ if (G_UNLIKELY(y == 0)) {
+ // upper left corner
+ fx *= (2.0/3.0);
+ fy *= (2.0/3.0);
+ double p00 = alphaAt(x,y), p10 = alphaAt(x+1, y),
+ p01 = alphaAt(x,y+1), p11 = alphaAt(x+1, y+1);
+ normal[X_3D] =
+ -2.0 * p00 +2.0 * p10
+ -1.0 * p01 +1.0 * p11;
+ normal[Y_3D] =
+ -2.0 * p00 -1.0 * p10
+ +2.0 * p01 +1.0 * p11;
+ } else if (G_UNLIKELY(y == (_h - 1))) {
+ // lower left corner
+ fx *= (2.0/3.0);
+ fy *= (2.0/3.0);
+ double p00 = alphaAt(x,y-1), p10 = alphaAt(x+1, y-1),
+ p01 = alphaAt(x,y ), p11 = alphaAt(x+1, y);
+ normal[X_3D] =
+ -1.0 * p00 +1.0 * p10
+ -2.0 * p01 +2.0 * p11;
+ normal[Y_3D] =
+ -2.0 * p00 -1.0 * p10
+ +2.0 * p01 +1.0 * p11;
+ } else {
+ // leftmost column
+ fx *= (1.0/2.0);
+ fy *= (1.0/3.0);
+ double p00 = alphaAt(x, y-1), p10 = alphaAt(x+1, y-1),
+ p01 = alphaAt(x, y ), p11 = alphaAt(x+1, y ),
+ p02 = alphaAt(x, y+1), p12 = alphaAt(x+1, y+1);
+ normal[X_3D] =
+ -1.0 * p00 +1.0 * p10
+ -2.0 * p01 +2.0 * p11
+ -1.0 * p02 +1.0 * p12;
+ normal[Y_3D] =
+ -2.0 * p00 -1.0 * p10
+ +0.0 * p01 +0.0 * p11 // this will be optimized out
+ +2.0 * p02 +1.0 * p12;
+ }
+ } else if (G_UNLIKELY(x == (_w - 1))) {
+ // rightmost column
+ if (G_UNLIKELY(y == 0)) {
+ // top right corner
+ fx *= (2.0/3.0);
+ fy *= (2.0/3.0);
+ double p00 = alphaAt(x-1,y), p10 = alphaAt(x, y),
+ p01 = alphaAt(x-1,y+1), p11 = alphaAt(x, y+1);
+ normal[X_3D] =
+ -2.0 * p00 +2.0 * p10
+ -1.0 * p01 +1.0 * p11;
+ normal[Y_3D] =
+ -1.0 * p00 -2.0 * p10
+ +1.0 * p01 +2.0 * p11;
+ } else if (G_UNLIKELY(y == (_h - 1))) {
+ // bottom right corner
+ fx *= (2.0/3.0);
+ fy *= (2.0/3.0);
+ double p00 = alphaAt(x-1,y-1), p10 = alphaAt(x, y-1),
+ p01 = alphaAt(x-1,y ), p11 = alphaAt(x, y);
+ normal[X_3D] =
+ -1.0 * p00 +1.0 * p10
+ -2.0 * p01 +2.0 * p11;
+ normal[Y_3D] =
+ -1.0 * p00 -2.0 * p10
+ +1.0 * p01 +2.0 * p11;
+ } else {
+ // rightmost column
+ fx *= (1.0/2.0);
+ fy *= (1.0/3.0);
+ double p00 = alphaAt(x-1, y-1), p10 = alphaAt(x, y-1),
+ p01 = alphaAt(x-1, y ), p11 = alphaAt(x, y ),
+ p02 = alphaAt(x-1, y+1), p12 = alphaAt(x, y+1);
+ normal[X_3D] =
+ -1.0 * p00 +1.0 * p10
+ -2.0 * p01 +2.0 * p11
+ -1.0 * p02 +1.0 * p12;
+ normal[Y_3D] =
+ -1.0 * p00 -2.0 * p10
+ +0.0 * p01 +0.0 * p11
+ +1.0 * p02 +2.0 * p12;
+ }
+ } else {
+ // interior
+ if (G_UNLIKELY(y == 0)) {
+ // top row
+ fx *= (1.0/3.0);
+ fy *= (1.0/2.0);
+ double p00 = alphaAt(x-1, y ), p10 = alphaAt(x, y ), p20 = alphaAt(x+1, y ),
+ p01 = alphaAt(x-1, y+1), p11 = alphaAt(x, y+1), p21 = alphaAt(x+1, y+1);
+ normal[X_3D] =
+ -2.0 * p00 +0.0 * p10 +2.0 * p20
+ -1.0 * p01 +0.0 * p11 +1.0 * p21;
+ normal[Y_3D] =
+ -1.0 * p00 -2.0 * p10 -1.0 * p20
+ +1.0 * p01 +2.0 * p11 +1.0 * p21;
+ } else if (G_UNLIKELY(y == (_h - 1))) {
+ // bottom row
+ fx *= (1.0/3.0);
+ fy *= (1.0/2.0);
+ double p00 = alphaAt(x-1, y-1), p10 = alphaAt(x, y-1), p20 = alphaAt(x+1, y-1),
+ p01 = alphaAt(x-1, y ), p11 = alphaAt(x, y ), p21 = alphaAt(x+1, y );
+ normal[X_3D] =
+ -1.0 * p00 +0.0 * p10 +1.0 * p20
+ -2.0 * p01 +0.0 * p11 +2.0 * p21;
+ normal[Y_3D] =
+ -1.0 * p00 -2.0 * p10 -1.0 * p20
+ +1.0 * p01 +2.0 * p11 +1.0 * p21;
+ } else {
+ // interior pixels
+ // note: p11 is actually unused, so we don't fetch its value
+ fx *= (1.0/4.0);
+ fy *= (1.0/4.0);
+ double p00 = alphaAt(x-1, y-1), p10 = alphaAt(x, y-1), p20 = alphaAt(x+1, y-1),
+ p01 = alphaAt(x-1, y ), p11 = 0.0, p21 = alphaAt(x+1, y ),
+ p02 = alphaAt(x-1, y+1), p12 = alphaAt(x, y+1), p22 = alphaAt(x+1, y+1);
+ normal[X_3D] =
+ -1.0 * p00 +0.0 * p10 +1.0 * p20
+ -2.0 * p01 +0.0 * p11 +2.0 * p21
+ -1.0 * p02 +0.0 * p12 +1.0 * p22;
+ normal[Y_3D] =
+ -1.0 * p00 -2.0 * p10 -1.0 * p20
+ +0.0 * p01 +0.0 * p11 +0.0 * p21
+ +1.0 * p02 +2.0 * p12 +1.0 * p22;
+ }
+ }
+ normal[X_3D] *= fx;
+ normal[Y_3D] *= fy;
+ NR::normalize_vector(normal);
+ return normal;
+ }
+
+ unsigned char *_px;
+ int _w, _h, _stride;
+ bool _alpha;
+};
+
+/*
+// simple pixel accessor for image surface that handles different edge wrapping modes
+class PixelAccessor {
+public:
+ typedef PixelAccessor self;
+ enum EdgeMode {
+ EDGE_PAD,
+ EDGE_WRAP,
+ EDGE_ZERO
+ };
+
+ PixelAccessor(cairo_surface_t *s, EdgeMode e)
+ : _surface(s)
+ , _px(cairo_image_surface_get_data(s))
+ , _x(0), _y(0)
+ , _w(cairo_image_surface_get_width(s))
+ , _h(cairo_image_surface_get_height(s))
+ , _stride(cairo_image_surface_get_stride(s))
+ , _edge_mode(e)
+ , _alpha(cairo_image_surface_get_format(s) == CAIRO_FORMAT_A8)
+ {}
+
+ guint32 pixelAt(int x, int y) {
+ // This is a lot of ifs for a single pixel access. However, branch prediction
+ // should help us a lot, as the result of ifs is always the same for a single image.
+ int real_x = x, real_y = y;
+ switch (_edge_mode) {
+ case EDGE_PAD:
+ real_x = CLAMP(x, 0, _w-1);
+ real_y = CLAMP(y, 0, _h-1);
+ break;
+ case EDGE_WRAP:
+ real_x %= _w;
+ real_y %= _h;
+ break;
+ case EDGE_ZERO:
+ default:
+ if (x < 0 || x >= _w || y < 0 || y >= _h)
+ return 0;
+ break;
+ }
+ if (_alpha) {
+ return *(_px + real_y*_stride + real_x) << 24;
+ } else {
+ guint32 *px = reinterpret_cast<guint32*>(_px +real_y*_stride + real_x*4);
+ return *px;
+ }
+ }
+private:
+ cairo_surface_t *_surface;
+ guint8 *_px;
+ int _x, _y, _w, _h, _stride;
+ EdgeMode _edge_mode;
+ bool _alpha;
+};*/
+
+// Some helpers for pixel manipulation
+G_GNUC_CONST inline gint32
+pxclamp(gint32 v, gint32 low, gint32 high) {
+ // NOTE: it is possible to write a "branchless" clamping operation.
+ // However, it will be slower than this function, because the code below
+ // is compiled to conditional moves.
+ if (v < low) return low;
+ if (v > high) return high;
+ return v;
+}
+
+#endif
+/*
+ Local Variables:
+ mode:c++
+ c-file-style:"stroustrup"
+ c-file-offsets:((innamespace . 0)(inline-open . 0)(case-label . +))
+ indent-tabs-mode:nil
+ fill-column:99
+ End:
+*/
+// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:fileencoding=utf-8:textwidth=99 :