1 files changed, 183 insertions, 0 deletions
diff --git a/src/erasure-code/isa/xor_op.cc b/src/erasure-code/isa/xor_op.cc
new file mode 100644
index 000000000..2b56e977c
--- /dev/null
+++ b/src/erasure-code/isa/xor_op.cc
@@ -0,0 +1,183 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 CERN (Switzerland)
+ *                                                                                                                                                                                                            * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>                                                                                                                                            *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+// -----------------------------------------------------------------------------
+#include "xor_op.h"
+#include <stdio.h>
+#include <string.h>
+#include "arch/intel.h"
+
+#include "include/ceph_assert.h"
+
+// -----------------------------------------------------------------------------
+
+
+// -----------------------------------------------------------------------------
+
+void
+// -----------------------------------------------------------------------------
+byte_xor(unsigned char* cw, unsigned char* dw, unsigned char* ew)
+// -----------------------------------------------------------------------------
+{
+  while (cw < ew)
+    *dw++ ^= *cw++;
+}
+
+// -----------------------------------------------------------------------------
+
+void
+// -----------------------------------------------------------------------------
+vector_xor(vector_op_t* cw,
+           vector_op_t* dw,
+           vector_op_t* ew)
+// -----------------------------------------------------------------------------
+{
+  ceph_assert(is_aligned(cw, EC_ISA_VECTOR_OP_WORDSIZE));
+  ceph_assert(is_aligned(dw, EC_ISA_VECTOR_OP_WORDSIZE));
+  ceph_assert(is_aligned(ew, EC_ISA_VECTOR_OP_WORDSIZE));
+  while (cw < ew) {
+    *dw++ ^= *cw++;
+  }
+}
+
+
+// -----------------------------------------------------------------------------
+
+void
+// -----------------------------------------------------------------------------
+region_xor(unsigned char** src,
+           unsigned char* parity,
+           int src_size,
+           unsigned size)
+{
+  if (!size) {
+    // nothing to do
+    return;
+  }
+
+  if (!src_size) {
+    // nothing to do
+    return;
+  }
+
+  if (src_size == 1) {
+    // just copy source to parity
+    memcpy(parity, src[0], size);
+    return;
+  }
+
+  unsigned size_left = size;
+
+  // ----------------------------------------------------------
+  // region or vector XOR operations require aligned addresses
+  // ----------------------------------------------------------
+
+  bool src_aligned = true;
+  for (int i = 0; i < src_size; i++) {
+    src_aligned &= is_aligned(src[i], EC_ISA_VECTOR_OP_WORDSIZE);
+  }
+
+  if (src_aligned &&
+      is_aligned(parity, EC_ISA_VECTOR_OP_WORDSIZE)) {
+
+#ifdef __x86_64__
+    if (ceph_arch_intel_sse2) {
+      // -----------------------------
+      // use SSE2 region xor function
+      // -----------------------------
+      unsigned region_size =
+        (size / EC_ISA_VECTOR_SSE2_WORDSIZE) * EC_ISA_VECTOR_SSE2_WORDSIZE;
+
+      size_left -= region_size;
+      // 64-byte region xor
+      region_sse2_xor((char**) src, (char*) parity, src_size, region_size);
+    } else
+#endif
+    {
+      // --------------------------------------------
+      // use region xor based on vector xor operation
+      // --------------------------------------------
+      unsigned vector_words = size / EC_ISA_VECTOR_OP_WORDSIZE;
+      unsigned vector_size = vector_words * EC_ISA_VECTOR_OP_WORDSIZE;
+      memcpy(parity, src[0], vector_size);
+
+      size_left -= vector_size;
+      vector_op_t* p_vec = (vector_op_t*) parity;
+      for (int i = 1; i < src_size; i++) {
+        vector_op_t* s_vec = (vector_op_t*) src[i];
+        vector_op_t* e_vec = s_vec + vector_words;
+        vector_xor(s_vec, p_vec, e_vec);
+      }
+    }
+  }
+
+  if (size_left) {
+    // --------------------------------------------------
+    // xor the not aligned part with byte-wise region xor
+    // --------------------------------------------------
+    memcpy(parity + size - size_left, src[0] + size - size_left, size_left);
+    for (int i = 1; i < src_size; i++) {
+      byte_xor(src[i] + size - size_left, parity + size - size_left, src[i] + size);
+    }
+  }
+}
+
+// -----------------------------------------------------------------------------
+
+void
+// -----------------------------------------------------------------------------
+region_sse2_xor(char** src,
+                char* parity,
+                int src_size,
+                unsigned size)
+// -----------------------------------------------------------------------------
+{
+#ifdef __x86_64__
+  ceph_assert(!(size % EC_ISA_VECTOR_SSE2_WORDSIZE));
+  unsigned char* p;
+  int d, l;
+  unsigned i;
+  unsigned char* vbuf[256];
+
+  for (int v = 0; v < src_size; v++) {
+    vbuf[v] = (unsigned char*) src[v];
+  }
+
+  l = src_size;
+  p = (unsigned char*) parity;
+
+  for (i = 0; i < size; i += EC_ISA_VECTOR_SSE2_WORDSIZE) {
+    asm volatile("movdqa %0,%%xmm0" : : "m" (vbuf[0][i]));
+    asm volatile("movdqa %0,%%xmm1" : : "m" (vbuf[0][i + 16]));
+    asm volatile("movdqa %0,%%xmm2" : : "m" (vbuf[0][i + 32]));
+    asm volatile("movdqa %0,%%xmm3" : : "m" (vbuf[0][i + 48]));
+
+    for (d = 1; d < l; d++) {
+      asm volatile("movdqa %0,%%xmm4" : : "m" (vbuf[d][i]));
+      asm volatile("movdqa %0,%%xmm5" : : "m" (vbuf[d][i + 16]));
+      asm volatile("movdqa %0,%%xmm6" : : "m" (vbuf[d][i + 32]));
+      asm volatile("movdqa %0,%%xmm7" : : "m" (vbuf[d][i + 48]));
+      asm volatile("pxor %xmm4,%xmm0");
+      asm volatile("pxor %xmm5,%xmm1");
+      asm volatile("pxor %xmm6,%xmm2");
+      asm volatile("pxor %xmm7,%xmm3");
+    }
+    asm volatile("movntdq %%xmm0,%0" : "=m" (p[i]));
+    asm volatile("movntdq %%xmm1,%0" : "=m" (p[i + 16]));
+    asm volatile("movntdq %%xmm2,%0" : "=m" (p[i + 32]));
+    asm volatile("movntdq %%xmm3,%0" : "=m" (p[i + 48]));
+  }
+
+  asm volatile("sfence" : : : "memory");
+#endif // __x86_64__
+  return;
+}