Adding upstream version 14.2.21.upstream/14.2.21 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 18:24:20 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 18:24:20 +0000
commit: 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
tree: e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/erasure-code/isa
parent: Initial commit. (diff)
download: ceph-upstream.tar.xz
ceph-upstream.zip
10 files changed, 1522 insertions, 0 deletions
diff --git a/src/erasure-code/isa/CMakeLists.txt b/src/erasure-code/isa/CMakeLists.txt
new file mode 100644
index 00000000..cc489211
--- /dev/null
+++ b/src/erasure-code/isa/CMakeLists.txt
@@ -0,0 +1,67 @@
+# ISA
+set(isal_src_dir ${CMAKE_SOURCE_DIR}/src/isa-l)
+include_directories(${isal_src_dir}/include)
+
+set(isa_srcs
+  ${isal_src_dir}/erasure_code/ec_base.c
+  ${isal_src_dir}/erasure_code/gf_2vect_dot_prod_sse.asm
+  ${isal_src_dir}/erasure_code/gf_3vect_dot_prod_sse.asm
+  ${isal_src_dir}/erasure_code/gf_4vect_dot_prod_sse.asm
+  ${isal_src_dir}/erasure_code/gf_5vect_dot_prod_sse.asm
+  ${isal_src_dir}/erasure_code/gf_6vect_dot_prod_sse.asm
+  ${isal_src_dir}/erasure_code/gf_vect_dot_prod_sse.asm
+  ${isal_src_dir}/erasure_code/gf_2vect_mad_avx2.asm
+  ${isal_src_dir}/erasure_code/gf_3vect_mad_avx2.asm
+  ${isal_src_dir}/erasure_code/gf_4vect_mad_avx2.asm
+  ${isal_src_dir}/erasure_code/gf_5vect_mad_avx2.asm
+  ${isal_src_dir}/erasure_code/gf_6vect_mad_avx2.asm
+  ${isal_src_dir}/erasure_code/gf_vect_mad_avx2.asm
+  ${isal_src_dir}/erasure_code/ec_highlevel_func.c
+  ${isal_src_dir}/erasure_code/gf_2vect_mad_avx.asm
+  ${isal_src_dir}/erasure_code/gf_3vect_mad_avx.asm
+  ${isal_src_dir}/erasure_code/gf_4vect_mad_avx.asm
+  ${isal_src_dir}/erasure_code/gf_5vect_mad_avx.asm
+  ${isal_src_dir}/erasure_code/gf_6vect_mad_avx.asm
+  ${isal_src_dir}/erasure_code/gf_vect_mad_avx.asm
+  ${isal_src_dir}/erasure_code/ec_multibinary.asm
+  ${isal_src_dir}/erasure_code/gf_2vect_mad_sse.asm
+  ${isal_src_dir}/erasure_code/gf_3vect_mad_sse.asm
+  ${isal_src_dir}/erasure_code/gf_4vect_mad_sse.asm
+  ${isal_src_dir}/erasure_code/gf_5vect_mad_sse.asm
+  ${isal_src_dir}/erasure_code/gf_6vect_mad_sse.asm
+  ${isal_src_dir}/erasure_code/gf_vect_mad_sse.asm
+  ${isal_src_dir}/erasure_code/gf_2vect_dot_prod_avx2.asm
+  ${isal_src_dir}/erasure_code/gf_3vect_dot_prod_avx2.asm
+  ${isal_src_dir}/erasure_code/gf_4vect_dot_prod_avx2.asm
+  ${isal_src_dir}/erasure_code/gf_5vect_dot_prod_avx2.asm
+  ${isal_src_dir}/erasure_code/gf_6vect_dot_prod_avx2.asm
+  ${isal_src_dir}/erasure_code/gf_vect_dot_prod_avx2.asm
+  ${isal_src_dir}/erasure_code/gf_vect_mul_avx.asm
+  ${isal_src_dir}/erasure_code/gf_2vect_dot_prod_avx.asm
+  ${isal_src_dir}/erasure_code/gf_3vect_dot_prod_avx.asm
+  ${isal_src_dir}/erasure_code/gf_4vect_dot_prod_avx.asm
+  ${isal_src_dir}/erasure_code/gf_5vect_dot_prod_avx.asm
+  ${isal_src_dir}/erasure_code/gf_6vect_dot_prod_avx.asm
+  ${isal_src_dir}/erasure_code/gf_vect_dot_prod_avx.asm
+  ${isal_src_dir}/erasure_code/gf_vect_mul_sse.asm
+  ${isal_src_dir}/erasure_code/gf_2vect_dot_prod_avx512.asm
+  ${isal_src_dir}/erasure_code/gf_2vect_mad_avx512.asm
+  ${isal_src_dir}/erasure_code/gf_3vect_dot_prod_avx512.asm
+  ${isal_src_dir}/erasure_code/gf_3vect_mad_avx512.asm
+  ${isal_src_dir}/erasure_code/gf_4vect_dot_prod_avx512.asm
+  ${isal_src_dir}/erasure_code/gf_4vect_mad_avx512.asm
+  ${isal_src_dir}/erasure_code/gf_vect_dot_prod_avx512.asm
+  ${isal_src_dir}/erasure_code/gf_vect_mad_avx512.asm
+  ErasureCodeIsa.cc
+  ErasureCodeIsaTableCache.cc
+  ErasureCodePluginIsa.cc
+  xor_op.cc
+)
+
+add_library(ec_isa SHARED
+  ${isa_srcs}
+  $<TARGET_OBJECTS:erasure_code_objs>)
+target_link_libraries(ec_isa ${EXTRALIBS})
+set_target_properties(ec_isa PROPERTIES
+  INSTALL_RPATH "")
+install(TARGETS ec_isa DESTINATION ${erasure_plugin_dir})
diff --git a/src/erasure-code/isa/ErasureCodeIsa.cc b/src/erasure-code/isa/ErasureCodeIsa.cc
new file mode 100644
index 00000000..6f2f181f
--- /dev/null
+++ b/src/erasure-code/isa/ErasureCodeIsa.cc
@@ -0,0 +1,421 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 CERN (Switzerland)
+ *
+ * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+// -----------------------------------------------------------------------------
+#include <algorithm>
+#include <errno.h>
+// -----------------------------------------------------------------------------
+#include "common/debug.h"
+#include "ErasureCodeIsa.h"
+#include "xor_op.h"
+#include "include/ceph_assert.h"
+using namespace std;
+
+// -----------------------------------------------------------------------------
+extern "C" {
+#include "isa-l/include/erasure_code.h"
+}
+// -----------------------------------------------------------------------------
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_osd
+#undef dout_prefix
+#define dout_prefix _prefix(_dout)
+// -----------------------------------------------------------------------------
+
+// -----------------------------------------------------------------------------
+
+static ostream&
+_prefix(std::ostream* _dout)
+{
+  return *_dout << "ErasureCodeIsa: ";
+}
+// -----------------------------------------------------------------------------
+
+const std::string ErasureCodeIsaDefault::DEFAULT_K("7");
+const std::string ErasureCodeIsaDefault::DEFAULT_M("3");
+
+
+// -----------------------------------------------------------------------------
+
+int
+ErasureCodeIsa::init(ErasureCodeProfile &profile, ostream *ss)
+{
+  int err = 0;
+  err |= parse(profile, ss);
+  if (err)
+    return err;
+  prepare();
+  return ErasureCode::init(profile, ss);
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned int
+ErasureCodeIsa::get_chunk_size(unsigned int object_size) const
+{
+  unsigned alignment = get_alignment();
+  unsigned chunk_size = ( object_size + k - 1 ) / k;
+  dout(20) << "get_chunk_size: chunk_size " << chunk_size
+           << " must be modulo " << alignment << dendl;
+  unsigned modulo = chunk_size % alignment;
+  if (modulo) {
+    dout(10) << "get_chunk_size: " << chunk_size
+             << " padded to " << chunk_size + alignment - modulo << dendl;
+    chunk_size += alignment - modulo;
+  }
+  return chunk_size;
+}
+
+// -----------------------------------------------------------------------------
+
+int ErasureCodeIsa::encode_chunks(const set<int> &want_to_encode,
+                                  map<int, bufferlist> *encoded)
+{
+  char *chunks[k + m];
+  for (int i = 0; i < k + m; i++)
+    chunks[i] = (*encoded)[i].c_str();
+  isa_encode(&chunks[0], &chunks[k], (*encoded)[0].length());
+  return 0;
+}
+
+int ErasureCodeIsa::decode_chunks(const set<int> &want_to_read,
+                                  const map<int, bufferlist> &chunks,
+                                  map<int, bufferlist> *decoded)
+{
+  unsigned blocksize = (*chunks.begin()).second.length();
+  int erasures[k + m + 1];
+  int erasures_count = 0;
+  char *data[k];
+  char *coding[m];
+  for (int i = 0; i < k + m; i++) {
+    if (chunks.find(i) == chunks.end()) {
+      erasures[erasures_count] = i;
+      erasures_count++;
+    }
+    if (i < k)
+      data[i] = (*decoded)[i].c_str();
+    else
+      coding[i - k] = (*decoded)[i].c_str();
+  }
+  erasures[erasures_count] = -1;
+  ceph_assert(erasures_count > 0);
+  return isa_decode(erasures, data, coding, blocksize);
+}
+
+// -----------------------------------------------------------------------------
+
+void
+ErasureCodeIsaDefault::isa_encode(char **data,
+                                  char **coding,
+                                  int blocksize)
+{
+
+  if (m == 1)
+    // single parity stripe
+    region_xor((unsigned char**) data, (unsigned char*) coding[0], k, blocksize);
+  else
+    ec_encode_data(blocksize, k, m, encode_tbls,
+                   (unsigned char**) data, (unsigned char**) coding);
+}
+
+// -----------------------------------------------------------------------------
+
+bool
+ErasureCodeIsaDefault::erasure_contains(int *erasures, int i)
+{
+  for (int l = 0; erasures[l] != -1; l++) {
+    if (erasures[l] == i)
+      return true;
+  }
+  return false;
+}
+
+// -----------------------------------------------------------------------------
+
+
+
+// -----------------------------------------------------------------------------
+
+int
+ErasureCodeIsaDefault::isa_decode(int *erasures,
+                                  char **data,
+                                  char **coding,
+                                  int blocksize)
+{
+  int nerrs = 0;
+  int i, r, s;
+
+  // count the errors
+  for (int l = 0; erasures[l] != -1; l++) {
+    nerrs++;
+  }
+
+  unsigned char *recover_source[k];
+  unsigned char *recover_target[m];
+
+  memset(recover_source, 0, sizeof (recover_source));
+  memset(recover_target, 0, sizeof (recover_target));
+
+  // ---------------------------------------------
+  // Assign source and target buffers
+  // ---------------------------------------------
+  for (i = 0, s = 0, r = 0; ((r < k) || (s < nerrs)) && (i < (k + m)); i++) {
+    if (!erasure_contains(erasures, i)) {
+      if (r < k) {
+        if (i < k) {
+          recover_source[r] = (unsigned char*) data[i];
+        } else {
+          recover_source[r] = (unsigned char*) coding[i - k];
+        }
+        r++;
+      }
+    } else {
+      if (s < m) {
+        if (i < k) {
+          recover_target[s] = (unsigned char*) data[i];
+        } else {
+          recover_target[s] = (unsigned char*) coding[i - k];
+        }
+        s++;
+      }
+    }
+  }
+
+  if (m == 1) {
+    // single parity decoding
+    ceph_assert(1 == nerrs);
+    dout(20) << "isa_decode: reconstruct using region xor [" <<
+      erasures[0] << "]" << dendl;
+    region_xor(recover_source, recover_target[0], k, blocksize);
+    return 0;
+  }
+
+
+  if ((matrixtype == kVandermonde) &&
+      (nerrs == 1) &&
+      (erasures[0] < (k + 1))) {
+    // use xor decoding if a data chunk is missing or the first coding chunk
+    dout(20) << "isa_decode: reconstruct using region xor [" <<
+      erasures[0] << "]" << dendl;
+    ceph_assert(1 == s);
+    ceph_assert(k == r);
+    region_xor(recover_source, recover_target[0], k, blocksize);
+    return 0;
+  }
+
+  unsigned char d[k * (m + k)];
+  unsigned char decode_tbls[k * (m + k)*32];
+  unsigned char *p_tbls = decode_tbls;
+
+  int decode_index[k];
+
+  if (nerrs > m)
+    return -1;
+
+  std::string erasure_signature; // describes a matrix configuration for caching
+
+  // ---------------------------------------------
+  // Construct b by removing error rows
+  // ---------------------------------------------
+
+  for (i = 0, r = 0; i < k; i++, r++) {
+    char id[128];
+    while (erasure_contains(erasures, r))
+      r++;
+
+    decode_index[i] = r;
+
+    snprintf(id, sizeof (id), "+%d", r);
+    erasure_signature += id;
+  }
+
+  for (int p = 0; p < nerrs; p++) {
+    char id[128];
+    snprintf(id, sizeof (id), "-%d", erasures[p]);
+    erasure_signature += id;
+  }
+
+  // ---------------------------------------------
+  // Try to get an already computed matrix
+  // ---------------------------------------------
+  if (!tcache.getDecodingTableFromCache(erasure_signature, p_tbls, matrixtype, k, m)) {
+    int j;
+    unsigned char b[k * (m + k)];
+    unsigned char c[k * (m + k)];
+
+    for (i = 0; i < k; i++) {
+      r = decode_index[i];
+      for (j = 0; j < k; j++)
+        b[k * i + j] = encode_coeff[k * r + j];
+    }
+    // ---------------------------------------------
+    // Compute inverted matrix
+    // ---------------------------------------------
+
+    // --------------------------------------------------------
+    // Remark: this may fail for certain Vandermonde matrices !
+    // There is an advanced way trying to use different
+    // source chunks to get an invertible matrix, however
+    // there are also (k,m) combinations which cannot be
+    // inverted when m chunks are lost and this optimizations
+    // does not help. Therefor we keep the code simpler.
+    // --------------------------------------------------------
+    if (gf_invert_matrix(b, d, k) < 0) {
+      dout(0) << "isa_decode: bad matrix" << dendl;
+      return -1;
+    }
+
+    for (int p = 0; p < nerrs; p++) {
+      if (erasures[p] < k) {
+        // decoding matrix elements for data chunks
+        for (j = 0; j < k; j++) {
+          c[k * p + j] = d[k * erasures[p] + j];
+        }
+      } else {
+        // decoding matrix element for coding chunks
+        for (i = 0; i < k; i++) {
+          int s = 0;
+          for (j = 0; j < k; j++)
+            s ^= gf_mul(d[j * k + i],
+                        encode_coeff[k * erasures[p] + j]);
+
+          c[k * p + i] = s;
+        }
+      }
+    }
+
+    // ---------------------------------------------
+    // Initialize Decoding Table
+    // ---------------------------------------------
+    ec_init_tables(k, nerrs, c, decode_tbls);
+    tcache.putDecodingTableToCache(erasure_signature, p_tbls, matrixtype, k, m);
+  }
+  // Recover data sources
+  ec_encode_data(blocksize,
+                 k, nerrs, decode_tbls, recover_source, recover_target);
+
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned
+ErasureCodeIsaDefault::get_alignment() const
+{
+  return EC_ISA_ADDRESS_ALIGNMENT;
+}
+
+// -----------------------------------------------------------------------------
+
+int ErasureCodeIsaDefault::parse(ErasureCodeProfile &profile,
+                                 ostream *ss)
+{
+  int err = ErasureCode::parse(profile, ss);
+  err |= to_int("k", profile, &k, DEFAULT_K, ss);
+  err |= to_int("m", profile, &m, DEFAULT_M, ss);
+  err |= sanity_check_k_m(k, m, ss);
+
+  if (matrixtype == kVandermonde) {
+    // these are verified safe values evaluated using the
+    // benchmarktool and 10*(combinatoric for maximum loss) random
+    // full erasures
+    if (k > 32) {
+      *ss << "Vandermonde: m=" << m
+        << " should be less/equal than 32 : revert to k=32" << std::endl;
+      k = 32;
+      err = -EINVAL;
+    }
+
+    if (m > 4) {
+      *ss << "Vandermonde: m=" << m
+        << " should be less than 5 to guarantee an MDS codec:"
+        << " revert to m=4" << std::endl;
+      m = 4;
+      err = -EINVAL;
+    }
+    switch (m) {
+    case 4:
+      if (k > 21) {
+        *ss << "Vandermonde: k=" << k
+          << " should be less than 22 to guarantee an MDS"
+          << " codec with m=4: revert to k=21" << std::endl;
+        k = 21;
+        err = -EINVAL;
+      }
+      break;
+    default:
+      ;
+    }
+  }
+  return err;
+}
+
+// -----------------------------------------------------------------------------
+
+void
+ErasureCodeIsaDefault::prepare()
+{
+  // setup shared encoding table and coefficients
+  unsigned char** p_enc_table =
+    tcache.getEncodingTable(matrixtype, k, m);
+
+  unsigned char** p_enc_coeff =
+    tcache.getEncodingCoefficient(matrixtype, k, m);
+
+  if (!*p_enc_coeff) {
+    dout(10) << "[ cache tables ] creating coeff for k=" <<
+      k << " m=" << m << dendl;
+    // build encoding coefficients which need to be computed once for each (k,m)
+    encode_coeff = (unsigned char*) malloc(k * (m + k));
+
+    if (matrixtype == kVandermonde)
+      gf_gen_rs_matrix(encode_coeff, k + m, k);
+    if (matrixtype == kCauchy)
+      gf_gen_cauchy1_matrix(encode_coeff, k + m, k);
+
+      // either our new created coefficients are stored or if they have been
+      // created in the meanwhile the locally allocated coefficients will be
+      // freed by setEncodingCoefficient
+    encode_coeff = tcache.setEncodingCoefficient(matrixtype, k, m, encode_coeff);
+  } else {
+    encode_coeff = *p_enc_coeff;
+  }
+
+  if (!*p_enc_table) {
+    dout(10) << "[ cache tables ] creating tables for k=" <<
+      k << " m=" << m << dendl;
+    // build encoding table which needs to be computed once for each (k,m)
+    encode_tbls = (unsigned char*) malloc(k * (m + k)*32);
+    ec_init_tables(k, m, &encode_coeff[k * k], encode_tbls);
+
+    // either our new created table is stored or if it has been
+    // created in the meanwhile the locally allocated table will be
+    // freed by setEncodingTable
+    encode_tbls = tcache.setEncodingTable(matrixtype, k, m, encode_tbls);
+  } else {
+    encode_tbls = *p_enc_table;
+  }
+
+  unsigned memory_lru_cache =
+    k * (m + k) * 32 * tcache.decoding_tables_lru_length;
+
+  dout(10) << "[ cache memory ] = " << memory_lru_cache << " bytes" <<
+    " [ matrix ] = " <<
+    ((matrixtype == kVandermonde) ? "Vandermonde" : "Cauchy") << dendl;
+
+  ceph_assert((matrixtype == kVandermonde) || (matrixtype == kCauchy));
+
+}
+// -----------------------------------------------------------------------------
diff --git a/src/erasure-code/isa/ErasureCodeIsa.h b/src/erasure-code/isa/ErasureCodeIsa.h
new file mode 100644
index 00000000..d67b918f
--- /dev/null
+++ b/src/erasure-code/isa/ErasureCodeIsa.h
@@ -0,0 +1,153 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 CERN (Switzerland)
+ *
+ * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+/**
+ * @file   ErasureCodeIsa.cc
+ *
+ * @brief  Erasure Code CODEC using the INTEL ISA-L library.
+ *
+ * The INTEL ISA-L library supports two pre-defined encoding matrices (cauchy = default, reed_sol_van = default)
+ * The default CODEC implementation using these two matrices is implemented in class ErasureCodeIsaDefault.
+ * ISA-L allows to use custom matrices which might be added later as implementations deriving from the base class ErasoreCodeIsa.
+ */
+
+#ifndef CEPH_ERASURE_CODE_ISA_L_H
+#define CEPH_ERASURE_CODE_ISA_L_H
+
+// -----------------------------------------------------------------------------
+#include "erasure-code/ErasureCode.h"
+#include "ErasureCodeIsaTableCache.h"
+// -----------------------------------------------------------------------------
+
+class ErasureCodeIsa : public ErasureCode {
+public:
+
+  enum eMatrix {
+    kVandermonde = 0, kCauchy = 1
+  };
+
+  int k;
+  int m;
+  int w;
+
+  ErasureCodeIsaTableCache &tcache;
+  const char *technique;
+
+  ErasureCodeIsa(const char *_technique,
+                 ErasureCodeIsaTableCache &_tcache) :
+  k(0),
+  m(0),
+  w(0),
+  tcache(_tcache),
+  technique(_technique)
+  {
+  }
+
+  
+  ~ErasureCodeIsa() override
+  {
+  }
+
+  unsigned int
+  get_chunk_count() const override
+  {
+    return k + m;
+  }
+
+  unsigned int
+  get_data_chunk_count() const override
+  {
+    return k;
+  }
+
+  unsigned int get_chunk_size(unsigned int object_size) const override;
+
+  int encode_chunks(const std::set<int> &want_to_encode,
+                            std::map<int, bufferlist> *encoded) override;
+
+  int decode_chunks(const std::set<int> &want_to_read,
+                            const std::map<int, bufferlist> &chunks,
+                            std::map<int, bufferlist> *decoded) override;
+
+  int init(ErasureCodeProfile &profile, std::ostream *ss) override;
+
+  virtual void isa_encode(char **data,
+                          char **coding,
+                          int blocksize) = 0;
+
+
+  virtual int isa_decode(int *erasures,
+                         char **data,
+                         char **coding,
+                         int blocksize) = 0;
+
+  virtual unsigned get_alignment() const = 0;
+
+  virtual void prepare() = 0;
+
+ private:
+  virtual int parse(ErasureCodeProfile &profile,
+                    std::ostream *ss) = 0;
+};
+
+// -----------------------------------------------------------------------------
+
+class ErasureCodeIsaDefault : public ErasureCodeIsa {
+private:
+  int matrixtype;
+
+public:
+
+  static const std::string DEFAULT_K;
+  static const std::string DEFAULT_M;
+
+  unsigned char* encode_coeff; // encoding coefficient
+  unsigned char* encode_tbls; // encoding table
+
+  ErasureCodeIsaDefault(ErasureCodeIsaTableCache &_tcache,
+                        int matrix = kVandermonde) :
+
+  ErasureCodeIsa("default", _tcache),
+  encode_coeff(0), encode_tbls(0)
+  {
+    matrixtype = matrix;
+  }
+
+  
+  ~ErasureCodeIsaDefault() override
+  {
+
+  }
+
+  void isa_encode(char **data,
+                          char **coding,
+                          int blocksize) override;
+
+  virtual bool erasure_contains(int *erasures, int i);
+
+  int isa_decode(int *erasures,
+                         char **data,
+                         char **coding,
+                         int blocksize) override;
+
+  unsigned get_alignment() const override;
+
+  void prepare() override;
+
+ private:
+  int parse(ErasureCodeProfile &profile,
+                    std::ostream *ss) override;
+};
+
+#endif
diff --git a/src/erasure-code/isa/ErasureCodeIsaTableCache.cc b/src/erasure-code/isa/ErasureCodeIsaTableCache.cc
new file mode 100644
index 00000000..7b176de8
--- /dev/null
+++ b/src/erasure-code/isa/ErasureCodeIsaTableCache.cc
@@ -0,0 +1,327 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 CERN (Switzerland)
+ *
+ * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+
+/**
+ * @file   ErasureCodeIsaTableCache.cc
+ *
+ * @brief  Erasure Code Isa CODEC Table Cache
+ *
+ * The INTEL ISA-L library supports two pre-defined encoding matrices (cauchy = default, reed_sol_van = default)
+ * The default CODEC implementation using these two matrices is implemented in class ErasureCodeIsaDefault.
+ * ISA-L allows to use custom matrices which might be added later as implementations deriving from the base class ErasoreCodeIsa.
+ */
+
+// -----------------------------------------------------------------------------
+#include "ErasureCodeIsaTableCache.h"
+#include "common/debug.h"
+// -----------------------------------------------------------------------------
+
+// -----------------------------------------------------------------------------
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_osd
+#undef dout_prefix
+#define dout_prefix _tc_prefix(_dout)
+// -----------------------------------------------------------------------------
+
+// -----------------------------------------------------------------------------
+
+static ostream&
+_tc_prefix(std::ostream* _dout)
+{
+  return *_dout << "ErasureCodeIsaTableCache: ";
+}
+
+// -----------------------------------------------------------------------------
+
+ErasureCodeIsaTableCache::~ErasureCodeIsaTableCache()
+{
+  Mutex::Locker lock(codec_tables_guard);
+
+  codec_technique_tables_t::const_iterator ttables_it;
+  codec_tables_t::const_iterator tables_it;
+  codec_table_t::const_iterator table_it;
+
+  std::map<int, lru_map_t*>::const_iterator lru_map_it;
+  std::map<int, lru_list_t*>::const_iterator lru_list_it;
+
+  // clean-up all allocated tables
+  for (ttables_it = encoding_coefficient.begin(); ttables_it != encoding_coefficient.end(); ++ttables_it) {
+    for (tables_it = ttables_it->second.begin(); tables_it != ttables_it->second.end(); ++tables_it) {
+      for (table_it = tables_it->second.begin(); table_it != tables_it->second.end(); ++table_it) {
+        if (table_it->second) {
+          if (*(table_it->second)) {
+            delete *(table_it->second);
+          }
+          delete table_it->second;
+        }
+      }
+    }
+  }
+
+  for (ttables_it = encoding_table.begin(); ttables_it != encoding_table.end(); ++ttables_it) {
+    for (tables_it = ttables_it->second.begin(); tables_it != ttables_it->second.end(); ++tables_it) {
+      for (table_it = tables_it->second.begin(); table_it != tables_it->second.end(); ++table_it) {
+        if (table_it->second) {
+          if (*(table_it->second)) {
+            delete *(table_it->second);
+          }
+          delete table_it->second;
+        }
+      }
+    }
+  }
+
+  for (lru_map_it = decoding_tables.begin(); lru_map_it != decoding_tables.end(); ++lru_map_it) {
+    if (lru_map_it->second) {
+      delete lru_map_it->second;
+    }
+  }
+
+  for (lru_list_it = decoding_tables_lru.begin(); lru_list_it != decoding_tables_lru.end(); ++lru_list_it) {
+    if (lru_list_it->second) {
+      delete lru_list_it->second;
+    }
+  }
+}
+
+// -----------------------------------------------------------------------------
+
+int
+ErasureCodeIsaTableCache::getDecodingTableCacheSize(int matrixtype)
+{
+  Mutex::Locker lock(codec_tables_guard);
+  if (decoding_tables[matrixtype])
+    return decoding_tables[matrixtype]->size();
+  else
+    return -1;
+}
+
+// -----------------------------------------------------------------------------
+
+ErasureCodeIsaTableCache::lru_map_t*
+ErasureCodeIsaTableCache::getDecodingTables(int matrix_type)
+{
+  // the caller must hold the guard mutex:
+  // => Mutex::Locker lock(codec_tables_guard);
+
+  // create an lru_map if not yet allocated
+  if (!decoding_tables[matrix_type]) {
+    decoding_tables[matrix_type] = new lru_map_t;
+  }
+  return decoding_tables[matrix_type];
+}
+
+// -----------------------------------------------------------------------------
+
+ErasureCodeIsaTableCache::lru_list_t*
+ErasureCodeIsaTableCache::getDecodingTablesLru(int matrix_type)
+{
+  // the caller must hold the guard mutex:
+  // => Mutex::Locker lock(codec_tables_guard);
+
+  // create an lru_list if not yet allocated
+  if (!decoding_tables_lru[matrix_type]) {
+    decoding_tables_lru[matrix_type] = new lru_list_t;
+  }
+  return decoding_tables_lru[matrix_type];
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned char**
+ErasureCodeIsaTableCache::getEncodingTable(int matrix, int k, int m)
+{
+  Mutex::Locker lock(codec_tables_guard);
+  return getEncodingTableNoLock(matrix,k,m);
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned char**
+ErasureCodeIsaTableCache::getEncodingTableNoLock(int matrix, int k, int m)
+{
+  // create a pointer to store an encoding table address
+  if (!encoding_table[matrix][k][m]) {
+    encoding_table[matrix][k][m] = new (unsigned char*);
+    *encoding_table[matrix][k][m] = 0;
+  }
+  return encoding_table[matrix][k][m];
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned char**
+ErasureCodeIsaTableCache::getEncodingCoefficient(int matrix, int k, int m)
+{
+  Mutex::Locker lock(codec_tables_guard);
+  return getEncodingCoefficientNoLock(matrix,k,m);
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned char**
+ErasureCodeIsaTableCache::getEncodingCoefficientNoLock(int matrix, int k, int m)
+{
+  // create a pointer to store an encoding coefficients address
+  if (!encoding_coefficient[matrix][k][m]) {
+    encoding_coefficient[matrix][k][m] = new (unsigned char*);
+    *encoding_coefficient[matrix][k][m] = 0;
+  }
+  return encoding_coefficient[matrix][k][m];
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned char*
+ErasureCodeIsaTableCache::setEncodingTable(int matrix, int k, int m, unsigned char* ec_in_table)
+{
+  Mutex::Locker lock(codec_tables_guard);
+  unsigned char** ec_out_table = getEncodingTableNoLock(matrix, k, m);
+  if (*ec_out_table) {
+    // somebody might have deposited this table in the meanwhile, so clean
+    // the input table and return the stored one
+    free (ec_in_table);
+    return *ec_out_table;
+  } else {
+    // we store the provided input table and return this one
+    *encoding_table[matrix][k][m] = ec_in_table;
+    return ec_in_table;
+  }
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned char*
+ErasureCodeIsaTableCache::setEncodingCoefficient(int matrix, int k, int m, unsigned char* ec_in_coeff)
+{
+  Mutex::Locker lock(codec_tables_guard);
+  unsigned char** ec_out_coeff = getEncodingCoefficientNoLock(matrix, k, m);
+  if (*ec_out_coeff) {
+    // somebody might have deposited these coefficients in the meanwhile, so clean
+    // the input coefficients and return the stored ones
+    free (ec_in_coeff);
+    return *ec_out_coeff;
+  } else {
+    // we store the provided input coefficients and return these
+    *encoding_coefficient[matrix][k][m] = ec_in_coeff;
+    return ec_in_coeff;
+  }
+}
+
+// -----------------------------------------------------------------------------
+
+Mutex*
+ErasureCodeIsaTableCache::getLock()
+{
+  return &codec_tables_guard;
+}
+
+// -----------------------------------------------------------------------------
+
+bool
+ErasureCodeIsaTableCache::getDecodingTableFromCache(std::string &signature,
+                                                    unsigned char* &table,
+                                                    int matrixtype,
+                                                    int k,
+                                                    int m)
+{
+  // --------------------------------------------------------------------------
+  // LRU decoding matrix cache
+  // --------------------------------------------------------------------------
+
+  dout(12) << "[ get table    ] = " << signature << dendl;
+
+  // we try to fetch a decoding table from an LRU cache
+  bool found = false;
+
+  Mutex::Locker lock(codec_tables_guard);
+
+  lru_map_t* decode_tbls_map =
+    getDecodingTables(matrixtype);
+
+  lru_list_t* decode_tbls_lru =
+    getDecodingTablesLru(matrixtype);
+
+  if (decode_tbls_map->count(signature)) {
+    dout(12) << "[ cached table ] = " << signature << dendl;
+    // copy the table out of the cache
+    memcpy(table, (*decode_tbls_map)[signature].second.c_str(), k * (m + k)*32);
+    // find item in LRU queue and push back
+    dout(12) << "[ cache size   ] = " << decode_tbls_lru->size() << dendl;
+    decode_tbls_lru->splice( (decode_tbls_lru->begin()), *decode_tbls_lru, (*decode_tbls_map)[signature].first);
+    found = true;
+  }
+
+  return found;
+}
+
+// -----------------------------------------------------------------------------
+
+void
+ErasureCodeIsaTableCache::putDecodingTableToCache(std::string &signature,
+                                                  unsigned char* &table,
+                                                  int matrixtype,
+                                                  int k,
+                                                  int m)
+{
+  // --------------------------------------------------------------------------
+  // LRU decoding matrix cache
+  // --------------------------------------------------------------------------
+
+  dout(12) << "[ put table    ] = " << signature << dendl;
+
+  // we store a new table to the cache
+
+  bufferptr cachetable;
+
+  Mutex::Locker lock(codec_tables_guard);
+
+  lru_map_t* decode_tbls_map =
+    getDecodingTables(matrixtype);
+
+  lru_list_t* decode_tbls_lru =
+    getDecodingTablesLru(matrixtype);
+
+  // evt. shrink the LRU queue/map
+  if ((int) decode_tbls_lru->size() >= ErasureCodeIsaTableCache::decoding_tables_lru_length) {
+    dout(12) << "[ shrink lru   ] = " << signature << dendl;
+    // reuse old buffer
+    cachetable = (*decode_tbls_map)[decode_tbls_lru->back()].second;
+
+    if ((int) cachetable.length() != (k * (m + k)*32)) {
+      // we need to replace this with a different size buffer
+      cachetable = buffer::create(k * (m + k)*32);
+    }
+
+    // remove from map
+    decode_tbls_map->erase(decode_tbls_lru->back());
+    // remove from lru
+    decode_tbls_lru->pop_back();
+    // add to the head of lru
+    decode_tbls_lru->push_front(signature);
+    // add the new to the map
+    (*decode_tbls_map)[signature] = std::make_pair(decode_tbls_lru->begin(), cachetable);
+  } else {
+    dout(12) << "[ store table  ] = " << signature << dendl;
+    // allocate a new buffer
+    cachetable = buffer::create(k * (m + k)*32);
+    decode_tbls_lru->push_front(signature);
+    (*decode_tbls_map)[signature] = std::make_pair(decode_tbls_lru->begin(), cachetable);
+    dout(12) << "[ cache size   ] = " << decode_tbls_lru->size() << dendl;
+  }
+
+  // copy-in the new table
+  memcpy(cachetable.c_str(), table, k * (m + k)*32);
+}
diff --git a/src/erasure-code/isa/ErasureCodeIsaTableCache.h b/src/erasure-code/isa/ErasureCodeIsaTableCache.h
new file mode 100644
index 00000000..64aaae75
--- /dev/null
+++ b/src/erasure-code/isa/ErasureCodeIsaTableCache.h
@@ -0,0 +1,105 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 CERN (Switzerland)
+ *
+ * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+/**
+ * @file   ErasureCodeIsaTableCache.h
+ *
+ * @brief  Erasure Code Isa CODEC Table Cache
+ *
+ * The INTEL ISA-L library supports two pre-defined encoding matrices (cauchy = default, reed_sol_van = default)
+ * The default CODEC implementation using these two matrices is implemented in class ErasureCodeIsaDefault.
+ * ISA-L allows to use custom matrices which might be added later as implementations deriving from the base class ErasoreCodeIsa.
+ */
+
+#ifndef CEPH_ERASURE_CODE_ISA_TABLE_CACHE_H
+#define CEPH_ERASURE_CODE_ISA_TABLE_CACHE_H
+
+// -----------------------------------------------------------------------------
+#include "common/Mutex.h"
+#include "erasure-code/ErasureCodeInterface.h"
+// -----------------------------------------------------------------------------
+#include <list>
+// -----------------------------------------------------------------------------
+
+class ErasureCodeIsaTableCache {
+  // ---------------------------------------------------------------------------
+  // This class implements a table cache for encoding and decoding matrices.
+  // Encoding matrices are shared for the same (k,m) combination. It supplies
+  // a decoding matrix lru cache which is shared for identical
+  // matrix types e.g. there is one cache (lru-list + lru-map) for Cauchy and
+  // one for Vandermonde matrices!
+  // ---------------------------------------------------------------------------
+
+public:
+
+  // the cache size is sufficient up to (12,4) decodings
+
+  static const int decoding_tables_lru_length = 2516;
+
+  typedef std::pair<std::list<std::string>::iterator, bufferptr> lru_entry_t;
+  typedef std::map< int, unsigned char** > codec_table_t;
+  typedef std::map< int, codec_table_t > codec_tables_t;
+  typedef std::map< int, codec_tables_t > codec_technique_tables_t;
+
+  typedef std::map< std::string, lru_entry_t > lru_map_t;
+  typedef std::list< std::string > lru_list_t;
+
+  ErasureCodeIsaTableCache() :
+  codec_tables_guard("isa-lru-cache")
+  {
+  }
+
+  virtual ~ErasureCodeIsaTableCache();
+
+  Mutex codec_tables_guard; // mutex used to protect modifications in encoding/decoding table maps
+
+  bool getDecodingTableFromCache(std::string &signature,
+                                 unsigned char* &table,
+                                 int matrixtype,
+                                 int k,
+                                 int m);
+
+  void putDecodingTableToCache(std::string&,
+                               unsigned char*&,
+                               int matrixtype,
+                               int k,
+                               int m);
+
+  unsigned char** getEncodingTable(int matrix, int k, int m);
+  unsigned char** getEncodingCoefficient(int matrix, int k, int m);
+
+  unsigned char** getEncodingTableNoLock(int matrix, int k, int m);
+  unsigned char** getEncodingCoefficientNoLock(int matrix, int k, int m);
+
+  unsigned char* setEncodingTable(int matrix, int k, int m, unsigned char*);
+  unsigned char* setEncodingCoefficient(int matrix, int k, int m, unsigned char*);
+
+  int getDecodingTableCacheSize(int matrixtype = 0);
+
+private:
+  codec_technique_tables_t encoding_coefficient; // encoding coefficients accessed via table[matrix][k][m]
+  codec_technique_tables_t encoding_table; // encoding coefficients accessed via table[matrix][k][m]
+
+  std::map<int, lru_map_t*> decoding_tables; // decoding table cache accessed via map[matrixtype]
+  std::map<int, lru_list_t*> decoding_tables_lru; // decoding table lru list accessed via list[matrixtype]
+
+  lru_map_t* getDecodingTables(int matrix_type);
+
+  lru_list_t* getDecodingTablesLru(int matrix_type);
+
+  Mutex* getLock();
+
+};
+
+#endif
diff --git a/src/erasure-code/isa/ErasureCodePluginIsa.cc b/src/erasure-code/isa/ErasureCodePluginIsa.cc
new file mode 100644
index 00000000..5eda591b
--- /dev/null
+++ b/src/erasure-code/isa/ErasureCodePluginIsa.cc
@@ -0,0 +1,82 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 CERN (Switzerland)
+ *
+ * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+
+/**
+ * @file   ErasureCodePluginIsa.cc
+ *
+ * @brief  Erasure Code Plug-in class wrapping the INTEL ISA-L library
+ *
+ * The factory plug-in class allows to call individual encoding techniques.
+ * The INTEL ISA-L library provides two pre-defined encoding matrices
+ * (cauchy, reed_sol_van = default).
+ */
+
+// -----------------------------------------------------------------------------
+#include "ceph_ver.h"
+#include "include/buffer.h"
+#include "ErasureCodePluginIsa.h"
+#include "ErasureCodeIsa.h"
+// -----------------------------------------------------------------------------
+
+int ErasureCodePluginIsa::factory(const std::string &directory,
+		      ErasureCodeProfile &profile,
+                      ErasureCodeInterfaceRef *erasure_code,
+                      std::ostream *ss)
+{
+    ErasureCodeIsa *interface;
+    std::string t;
+    if (profile.find("technique") == profile.end())
+      profile["technique"] = "reed_sol_van";
+    t = profile.find("technique")->second;
+    if ((t == "reed_sol_van")) {
+      interface = new ErasureCodeIsaDefault(tcache,
+                                            ErasureCodeIsaDefault::kVandermonde);
+    } else {
+      if ((t == "cauchy")) {
+        interface = new ErasureCodeIsaDefault(tcache,
+                                              ErasureCodeIsaDefault::kCauchy);
+      } else {
+        *ss << "technique=" << t << " is not a valid coding technique. "
+          << " Choose one of the following: "
+          << "reed_sol_van,"
+          << "cauchy" << std::endl;
+        return -ENOENT;
+      }
+    }
+
+    int r = interface->init(profile, ss);
+    if (r) {
+      delete interface;
+      return r;
+    }
+    *erasure_code = ErasureCodeInterfaceRef(interface);
+    return 0;
+}
+
+// -----------------------------------------------------------------------------
+
+const char *__erasure_code_version()
+{
+  return CEPH_GIT_NICE_VER;
+}
+
+// -----------------------------------------------------------------------------
+
+int __erasure_code_init(char *plugin_name, char *directory)
+{
+  ErasureCodePluginRegistry &instance = ErasureCodePluginRegistry::instance();
+
+  return instance.add(plugin_name, new ErasureCodePluginIsa());
+}
diff --git a/src/erasure-code/isa/ErasureCodePluginIsa.h b/src/erasure-code/isa/ErasureCodePluginIsa.h
new file mode 100644
index 00000000..fc52e10b
--- /dev/null
+++ b/src/erasure-code/isa/ErasureCodePluginIsa.h
@@ -0,0 +1,34 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph distributed storage system
+ *
+ * Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+ * Copyright (C) 2014 Red Hat <contact@redhat.com>
+ *
+ * Author: Loic Dachary <loic@dachary.org>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ * 
+ */
+
+#ifndef CEPH_ERASURE_CODE_PLUGIN_ISA_H
+#define CEPH_ERASURE_CODE_PLUGIN_ISA_H
+
+#include "erasure-code/ErasureCodePlugin.h"
+#include "ErasureCodeIsaTableCache.h"
+
+class ErasureCodePluginIsa : public ErasureCodePlugin {
+public:
+  ErasureCodeIsaTableCache tcache;
+
+  int factory(const std::string &directory,
+		      ErasureCodeProfile &profile,
+		      ErasureCodeInterfaceRef *erasure_code,
+		      std::ostream *ss) override;
+};
+
+#endif
diff --git a/src/erasure-code/isa/README b/src/erasure-code/isa/README
new file mode 100644
index 00000000..f5e9fe59
--- /dev/null
+++ b/src/erasure-code/isa/README
@@ -0,0 +1,63 @@
+============================================
+INTEL ISA library Erasure Coding plugin
+============================================
+
+Build Requirements
+==================
+Plug-in build compiles the included sources of ISA-L v2.10 and links them into the plugin. ISA-L implementation is portable and probes CPU features during runtime. Note that the names of the assembler source files have been renamed from *.asm to *.asm.s to be compatible with Automake.
+
+Run-time Requirements
+=====================
+None
+
+Plug-in Configuration
+=====================
+
+Used parameters are:
+k : number of data chunks
+m : number of coding chunks
+technique : cauchy, reed_sol_van
+
+The plug-in exports only two encoding technique (cauchy, reed_sol_van) using either a Vandermonde matrix or a Cauchy matrix for coding.
+By default a Vandermonde matrix is used. Be aware that sometimes the generated Vandermonde matrix is not always invertible and not fully MDS.
+Therefore the accepted parameter space has limited to maximum (21,4) and (32,3) for Vandermonde matrices.
+
+Run the Test suite
+==================
+cd ceph/src
+make unittest_erasure_code_isa
+./unittest_erasure_code_isa --gtest_filter=*.* --log-to-stderr=true --debug-ods=20
+
+Run the CEPH erasure code benchmark
+===================================
+cd ceph/src
+make ceph_erasure_code_benchmark
+
+# consult ./ceph_erasure_code_benchmark -h for help
+
+# encode performance
+./ceph_erasure_code_benchmark -p isa -P k=8 -P m=3 -S 1048576 -i 1000
+
+# decode performance one lost
+./ceph_erasure_code_benchmark -e 1 -w decode -p isa -P k=8 -P m=3 -S 1048576 -i 1000
+
+# decode performance two lost
+./ceph_erasure_code_benchmark -e 2 -w decode -p isa -P k=8 -P m=3 -S 1048576 -i 1000
+
+# decode performance three lost
+./ceph_erasure_code_benchmark -e 3 -w decode -p isa -P k=8 -P m=3 -S 1048576 -i 1000
+
+
+Developer Notes
+===============
+The plugin provides optimal performance for 32-byte aligned buffer start address and 
+k*32 byte aligned buffer length. The encoding tables are computed only once when the EC 
+object is created. Decoding Tables have to be computed for each decoding since the available 
+data/coding sources may change between calls.
+Decoding tables are cached in an LRU cache which is sufficiently large up to (12,4).
+
+For larger configurations the cache might expire the 'oldest' tables and decoding might
+slow down. The plug-in uses an optimization to use a pure region XOR to decode single disk
+failures if the erased chunk is within the first (k+1) chunks.
+
+The unittest probes all possible failure scenarios for (12,4) Vandermonde and Cauchy matrices.
diff --git a/src/erasure-code/isa/xor_op.cc b/src/erasure-code/isa/xor_op.cc
new file mode 100644
index 00000000..2b56e977
--- /dev/null
+++ b/src/erasure-code/isa/xor_op.cc
@@ -0,0 +1,183 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 CERN (Switzerland)
+ *                                                                                                                                                                                                            * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>                                                                                                                                            *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+// -----------------------------------------------------------------------------
+#include "xor_op.h"
+#include <stdio.h>
+#include <string.h>
+#include "arch/intel.h"
+
+#include "include/ceph_assert.h"
+
+// -----------------------------------------------------------------------------
+
+
+// -----------------------------------------------------------------------------
+
+void
+// -----------------------------------------------------------------------------
+byte_xor(unsigned char* cw, unsigned char* dw, unsigned char* ew)
+// -----------------------------------------------------------------------------
+{
+  while (cw < ew)
+    *dw++ ^= *cw++;
+}
+
+// -----------------------------------------------------------------------------
+
+void
+// -----------------------------------------------------------------------------
+vector_xor(vector_op_t* cw,
+           vector_op_t* dw,
+           vector_op_t* ew)
+// -----------------------------------------------------------------------------
+{
+  ceph_assert(is_aligned(cw, EC_ISA_VECTOR_OP_WORDSIZE));
+  ceph_assert(is_aligned(dw, EC_ISA_VECTOR_OP_WORDSIZE));
+  ceph_assert(is_aligned(ew, EC_ISA_VECTOR_OP_WORDSIZE));
+  while (cw < ew) {
+    *dw++ ^= *cw++;
+  }
+}
+
+
+// -----------------------------------------------------------------------------
+
+void
+// -----------------------------------------------------------------------------
+region_xor(unsigned char** src,
+           unsigned char* parity,
+           int src_size,
+           unsigned size)
+{
+  if (!size) {
+    // nothing to do
+    return;
+  }
+
+  if (!src_size) {
+    // nothing to do
+    return;
+  }
+
+  if (src_size == 1) {
+    // just copy source to parity
+    memcpy(parity, src[0], size);
+    return;
+  }
+
+  unsigned size_left = size;
+
+  // ----------------------------------------------------------
+  // region or vector XOR operations require aligned addresses
+  // ----------------------------------------------------------
+
+  bool src_aligned = true;
+  for (int i = 0; i < src_size; i++) {
+    src_aligned &= is_aligned(src[i], EC_ISA_VECTOR_OP_WORDSIZE);
+  }
+
+  if (src_aligned &&
+      is_aligned(parity, EC_ISA_VECTOR_OP_WORDSIZE)) {
+
+#ifdef __x86_64__
+    if (ceph_arch_intel_sse2) {
+      // -----------------------------
+      // use SSE2 region xor function
+      // -----------------------------
+      unsigned region_size =
+        (size / EC_ISA_VECTOR_SSE2_WORDSIZE) * EC_ISA_VECTOR_SSE2_WORDSIZE;
+
+      size_left -= region_size;
+      // 64-byte region xor
+      region_sse2_xor((char**) src, (char*) parity, src_size, region_size);
+    } else
+#endif
+    {
+      // --------------------------------------------
+      // use region xor based on vector xor operation
+      // --------------------------------------------
+      unsigned vector_words = size / EC_ISA_VECTOR_OP_WORDSIZE;
+      unsigned vector_size = vector_words * EC_ISA_VECTOR_OP_WORDSIZE;
+      memcpy(parity, src[0], vector_size);
+
+      size_left -= vector_size;
+      vector_op_t* p_vec = (vector_op_t*) parity;
+      for (int i = 1; i < src_size; i++) {
+        vector_op_t* s_vec = (vector_op_t*) src[i];
+        vector_op_t* e_vec = s_vec + vector_words;
+        vector_xor(s_vec, p_vec, e_vec);
+      }
+    }
+  }
+
+  if (size_left) {
+    // --------------------------------------------------
+    // xor the not aligned part with byte-wise region xor
+    // --------------------------------------------------
+    memcpy(parity + size - size_left, src[0] + size - size_left, size_left);
+    for (int i = 1; i < src_size; i++) {
+      byte_xor(src[i] + size - size_left, parity + size - size_left, src[i] + size);
+    }
+  }
+}
+
+// -----------------------------------------------------------------------------
+
+void
+// -----------------------------------------------------------------------------
+region_sse2_xor(char** src,
+                char* parity,
+                int src_size,
+                unsigned size)
+// -----------------------------------------------------------------------------
+{
+#ifdef __x86_64__
+  ceph_assert(!(size % EC_ISA_VECTOR_SSE2_WORDSIZE));
+  unsigned char* p;
+  int d, l;
+  unsigned i;
+  unsigned char* vbuf[256];
+
+  for (int v = 0; v < src_size; v++) {
+    vbuf[v] = (unsigned char*) src[v];
+  }
+
+  l = src_size;
+  p = (unsigned char*) parity;
+
+  for (i = 0; i < size; i += EC_ISA_VECTOR_SSE2_WORDSIZE) {
+    asm volatile("movdqa %0,%%xmm0" : : "m" (vbuf[0][i]));
+    asm volatile("movdqa %0,%%xmm1" : : "m" (vbuf[0][i + 16]));
+    asm volatile("movdqa %0,%%xmm2" : : "m" (vbuf[0][i + 32]));
+    asm volatile("movdqa %0,%%xmm3" : : "m" (vbuf[0][i + 48]));
+
+    for (d = 1; d < l; d++) {
+      asm volatile("movdqa %0,%%xmm4" : : "m" (vbuf[d][i]));
+      asm volatile("movdqa %0,%%xmm5" : : "m" (vbuf[d][i + 16]));
+      asm volatile("movdqa %0,%%xmm6" : : "m" (vbuf[d][i + 32]));
+      asm volatile("movdqa %0,%%xmm7" : : "m" (vbuf[d][i + 48]));
+      asm volatile("pxor %xmm4,%xmm0");
+      asm volatile("pxor %xmm5,%xmm1");
+      asm volatile("pxor %xmm6,%xmm2");
+      asm volatile("pxor %xmm7,%xmm3");
+    }
+    asm volatile("movntdq %%xmm0,%0" : "=m" (p[i]));
+    asm volatile("movntdq %%xmm1,%0" : "=m" (p[i + 16]));
+    asm volatile("movntdq %%xmm2,%0" : "=m" (p[i + 32]));
+    asm volatile("movntdq %%xmm3,%0" : "=m" (p[i + 48]));
+  }
+
+  asm volatile("sfence" : : : "memory");
+#endif // __x86_64__
+  return;
+}
diff --git a/src/erasure-code/isa/xor_op.h b/src/erasure-code/isa/xor_op.h
new file mode 100644
index 00000000..978b9a95
--- /dev/null
+++ b/src/erasure-code/isa/xor_op.h
@@ -0,0 +1,87 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 CERN (Switzerland)
+ *                                                                                                                                                                                                           \
+ * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>                                                                                                                                           \
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef EC_ISA_XOR_OP_H
+#define EC_ISA_XOR_OP_H
+
+// -----------------------------------------------------------------------------
+#include <assert.h>
+#include <stdint.h>
+// -----------------------------------------------------------------------------
+
+// -------------------------------------------------------------------------
+// declaration of 64/128-bit vector operations depending on availability
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+#define EC_ISA_ADDRESS_ALIGNMENT 32u
+#define EC_ISA_VECTOR_SSE2_WORDSIZE 64u
+
+#if __GNUC__ > 4 || \
+  ( (__GNUC__ == 4) && (__GNUC_MINOR__ >= 4) ) ||\
+  (__clang__ == 1 )
+#ifdef EC_ISA_VECTOR_OP_DEBUG
+#pragma message "* using 128-bit vector operations in " __FILE__
+#endif
+
+// -------------------------------------------------------------------------
+// use 128-bit pointer
+// -------------------------------------------------------------------------
+typedef long vector_op_t __attribute__((vector_size(16)));
+#define EC_ISA_VECTOR_OP_WORDSIZE 16
+#else
+// -------------------------------------------------------------------------
+// use 64-bit pointer
+// -------------------------------------------------------------------------
+typedef unsigned long long vector_op_t;
+#define EC_ISA_VECTOR_OP_WORDSIZE 8
+#endif
+
+
+// -------------------------------------------------------------------------
+// check if a pointer is aligend to byte_count
+// -------------------------------------------------------------------------
+#define is_aligned(POINTER, BYTE_COUNT) \
+  (((uintptr_t)(const void *)(POINTER)) % (BYTE_COUNT) == 0)
+
+// -------------------------------------------------------------------------
+// compute byte-wise XOR of cw and dw block, ew contains the end address of cw
+// -------------------------------------------------------------------------
+void
+byte_xor(unsigned char* cw, unsigned char* dw, unsigned char* ew);
+
+// -------------------------------------------------------------------------
+// compute word-wise XOR of cw and dw block, ew contains the end address of cw
+// -------------------------------------------------------------------------
+void
+vector_xor(vector_op_t* cw, vector_op_t* dw, vector_op_t* ew);
+
+// -------------------------------------------------------------------------
+// compute region XOR like parity = src[0] ^ src[1] ... ^ src[src_size-]
+// -------------------------------------------------------------------------
+void
+region_xor(unsigned char** src, unsigned char* parity, int src_size, unsigned size);
+
+// -------------------------------------------------------------------------
+// compute region XOR like parity = src[0] ^ src[1] ... ^ src[src_size-]
+// using SSE2 64-byte operations
+// -------------------------------------------------------------------------
+void
+region_sse2_xor(char** src /* array of 64-byte aligned source pointer to xor */,
+                char* parity /* 64-byte aligned output pointer containing the parity */,
+                int src_size /* size of the source pointer array */,
+                unsigned size /* size of the region to xor */);
+
+
+#endif // EC_ISA_XOR_OP_H
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 18:24:20 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 18:24:20 +0000
commit	483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
tree	e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/erasure-code/isa
parent	Initial commit. (diff)
download	ceph-upstream.tar.xz ceph-upstream.zip