summaryrefslogtreecommitdiffstats
path: root/libfreerdp/primitives/prim_copy.c
diff options
context:
space:
mode:
Diffstat (limited to 'libfreerdp/primitives/prim_copy.c')
-rw-r--r--libfreerdp/primitives/prim_copy.c178
1 files changed, 178 insertions, 0 deletions
diff --git a/libfreerdp/primitives/prim_copy.c b/libfreerdp/primitives/prim_copy.c
new file mode 100644
index 0000000..f140c20
--- /dev/null
+++ b/libfreerdp/primitives/prim_copy.c
@@ -0,0 +1,178 @@
+/* FreeRDP: A Remote Desktop Protocol Client
+ * Copy operations.
+ * vi:ts=4 sw=4:
+ *
+ * (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License. You may obtain
+ * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+#include <freerdp/config.h>
+
+#include <string.h>
+#include <freerdp/types.h>
+#include <freerdp/primitives.h>
+#ifdef WITH_IPP
+#include <ipps.h>
+#include <ippi.h>
+#endif /* WITH_IPP */
+#include "prim_internal.h"
+
+static primitives_t* generic = NULL;
+
+/* ------------------------------------------------------------------------- */
+/*static inline BOOL memory_regions_overlap_1d(*/
+static BOOL memory_regions_overlap_1d(const BYTE* p1, const BYTE* p2, size_t bytes)
+{
+ const ULONG_PTR p1m = (const ULONG_PTR)p1;
+ const ULONG_PTR p2m = (const ULONG_PTR)p2;
+
+ if (p1m <= p2m)
+ {
+ if (p1m + bytes > p2m)
+ return TRUE;
+ }
+ else
+ {
+ if (p2m + bytes > p1m)
+ return TRUE;
+ }
+
+ /* else */
+ return FALSE;
+}
+
+/* ------------------------------------------------------------------------- */
+/*static inline BOOL memory_regions_overlap_2d( */
+static BOOL memory_regions_overlap_2d(const BYTE* p1, int p1Step, int p1Size, const BYTE* p2,
+ int p2Step, int p2Size, int width, int height)
+{
+ ULONG_PTR p1m = (ULONG_PTR)p1;
+ ULONG_PTR p2m = (ULONG_PTR)p2;
+
+ if (p1m <= p2m)
+ {
+ ULONG_PTR p1mEnd = p1m + 1ull * (height - 1) * p1Step + 1ull * width * p1Size;
+
+ if (p1mEnd > p2m)
+ return TRUE;
+ }
+ else
+ {
+ ULONG_PTR p2mEnd = p2m + 1ull * (height - 1) * p2Step + 1ull * width * p2Size;
+
+ if (p2mEnd > p1m)
+ return TRUE;
+ }
+
+ /* else */
+ return FALSE;
+}
+
+/* ------------------------------------------------------------------------- */
+static pstatus_t general_copy_8u(const BYTE* pSrc, BYTE* pDst, INT32 len)
+{
+ if (memory_regions_overlap_1d(pSrc, pDst, (size_t)len))
+ {
+ memmove((void*)pDst, (const void*)pSrc, (size_t)len);
+ }
+ else
+ {
+ memcpy((void*)pDst, (const void*)pSrc, (size_t)len);
+ }
+
+ return PRIMITIVES_SUCCESS;
+}
+
+/* ------------------------------------------------------------------------- */
+/* Copy a block of pixels from one buffer to another.
+ * The addresses are assumed to have been already offset to the upper-left
+ * corners of the source and destination region of interest.
+ */
+static pstatus_t general_copy_8u_AC4r(const BYTE* pSrc, INT32 srcStep, BYTE* pDst, INT32 dstStep,
+ INT32 width, INT32 height)
+{
+ const BYTE* src = (const BYTE*)pSrc;
+ BYTE* dst = (BYTE*)pDst;
+ int rowbytes = width * sizeof(UINT32);
+
+ if ((width == 0) || (height == 0))
+ return PRIMITIVES_SUCCESS;
+
+ if (memory_regions_overlap_2d(pSrc, srcStep, sizeof(UINT32), pDst, dstStep, sizeof(UINT32),
+ width, height))
+ {
+ do
+ {
+ generic->copy(src, dst, rowbytes);
+ src += srcStep;
+ dst += dstStep;
+ } while (--height);
+ }
+ else
+ {
+ /* TODO: do it in one operation when the rowdata is adjacent. */
+ do
+ {
+ /* If we find a replacement for memcpy that is consistently
+ * faster, this could be replaced with that.
+ */
+ memcpy(dst, src, rowbytes);
+ src += srcStep;
+ dst += dstStep;
+ } while (--height);
+ }
+
+ return PRIMITIVES_SUCCESS;
+}
+
+#ifdef WITH_IPP
+/* ------------------------------------------------------------------------- */
+/* This is just ippiCopy_8u_AC4R without the IppiSize structure parameter. */
+static pstatus_t ippiCopy_8u_AC4r(const BYTE* pSrc, INT32 srcStep, BYTE* pDst, INT32 dstStep,
+ INT32 width, INT32 height)
+{
+ IppiSize roi;
+ roi.width = width;
+ roi.height = height;
+ return (pstatus_t)ippiCopy_8u_AC4R(pSrc, srcStep, pDst, dstStep, roi);
+}
+#endif /* WITH_IPP */
+
+/* ------------------------------------------------------------------------- */
+void primitives_init_copy(primitives_t* prims)
+{
+ /* Start with the default. */
+ prims->copy_8u = general_copy_8u;
+ prims->copy_8u_AC4r = general_copy_8u_AC4r;
+ /* This is just an alias with void* parameters */
+ prims->copy = (__copy_t)(prims->copy_8u);
+}
+
+#if defined(WITH_SSE2) || defined(WITH_NEON)
+void primitives_init_copy_opt(primitives_t* prims)
+{
+ generic = primitives_get_generic();
+ primitives_init_copy(prims);
+ /* Pick tuned versions if possible. */
+#ifdef WITH_IPP
+ prims->copy_8u = (__copy_8u_t)ippsCopy_8u;
+ prims->copy_8u_AC4r = (__copy_8u_AC4r_t)ippiCopy_8u_AC4r;
+#endif
+ /* Performance with an SSE2 version with no prefetch seemed to be
+ * all over the map vs. memcpy.
+ * Sometimes it was significantly faster, sometimes dreadfully slower,
+ * and it seemed to vary a lot depending on block size and processor.
+ * Hence, no SSE version is used here unless once can be written that
+ * is consistently faster than memcpy.
+ */
+ /* This is just an alias with void* parameters */
+ prims->copy = (__copy_t)(prims->copy_8u);
+}
+#endif