summaryrefslogtreecommitdiffstats
path: root/src/erasure-code/isa
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/erasure-code/isa/CMakeLists.txt67
-rw-r--r--src/erasure-code/isa/ErasureCodeIsa.cc421
-rw-r--r--src/erasure-code/isa/ErasureCodeIsa.h153
-rw-r--r--src/erasure-code/isa/ErasureCodeIsaTableCache.cc327
-rw-r--r--src/erasure-code/isa/ErasureCodeIsaTableCache.h105
-rw-r--r--src/erasure-code/isa/ErasureCodePluginIsa.cc82
-rw-r--r--src/erasure-code/isa/ErasureCodePluginIsa.h34
-rw-r--r--src/erasure-code/isa/README63
-rw-r--r--src/erasure-code/isa/xor_op.cc183
-rw-r--r--src/erasure-code/isa/xor_op.h87
10 files changed, 1522 insertions, 0 deletions
diff --git a/src/erasure-code/isa/CMakeLists.txt b/src/erasure-code/isa/CMakeLists.txt
new file mode 100644
index 00000000..cc489211
--- /dev/null
+++ b/src/erasure-code/isa/CMakeLists.txt
@@ -0,0 +1,67 @@
+# ISA
+set(isal_src_dir ${CMAKE_SOURCE_DIR}/src/isa-l)
+include_directories(${isal_src_dir}/include)
+
+set(isa_srcs
+ ${isal_src_dir}/erasure_code/ec_base.c
+ ${isal_src_dir}/erasure_code/gf_2vect_dot_prod_sse.asm
+ ${isal_src_dir}/erasure_code/gf_3vect_dot_prod_sse.asm
+ ${isal_src_dir}/erasure_code/gf_4vect_dot_prod_sse.asm
+ ${isal_src_dir}/erasure_code/gf_5vect_dot_prod_sse.asm
+ ${isal_src_dir}/erasure_code/gf_6vect_dot_prod_sse.asm
+ ${isal_src_dir}/erasure_code/gf_vect_dot_prod_sse.asm
+ ${isal_src_dir}/erasure_code/gf_2vect_mad_avx2.asm
+ ${isal_src_dir}/erasure_code/gf_3vect_mad_avx2.asm
+ ${isal_src_dir}/erasure_code/gf_4vect_mad_avx2.asm
+ ${isal_src_dir}/erasure_code/gf_5vect_mad_avx2.asm
+ ${isal_src_dir}/erasure_code/gf_6vect_mad_avx2.asm
+ ${isal_src_dir}/erasure_code/gf_vect_mad_avx2.asm
+ ${isal_src_dir}/erasure_code/ec_highlevel_func.c
+ ${isal_src_dir}/erasure_code/gf_2vect_mad_avx.asm
+ ${isal_src_dir}/erasure_code/gf_3vect_mad_avx.asm
+ ${isal_src_dir}/erasure_code/gf_4vect_mad_avx.asm
+ ${isal_src_dir}/erasure_code/gf_5vect_mad_avx.asm
+ ${isal_src_dir}/erasure_code/gf_6vect_mad_avx.asm
+ ${isal_src_dir}/erasure_code/gf_vect_mad_avx.asm
+ ${isal_src_dir}/erasure_code/ec_multibinary.asm
+ ${isal_src_dir}/erasure_code/gf_2vect_mad_sse.asm
+ ${isal_src_dir}/erasure_code/gf_3vect_mad_sse.asm
+ ${isal_src_dir}/erasure_code/gf_4vect_mad_sse.asm
+ ${isal_src_dir}/erasure_code/gf_5vect_mad_sse.asm
+ ${isal_src_dir}/erasure_code/gf_6vect_mad_sse.asm
+ ${isal_src_dir}/erasure_code/gf_vect_mad_sse.asm
+ ${isal_src_dir}/erasure_code/gf_2vect_dot_prod_avx2.asm
+ ${isal_src_dir}/erasure_code/gf_3vect_dot_prod_avx2.asm
+ ${isal_src_dir}/erasure_code/gf_4vect_dot_prod_avx2.asm
+ ${isal_src_dir}/erasure_code/gf_5vect_dot_prod_avx2.asm
+ ${isal_src_dir}/erasure_code/gf_6vect_dot_prod_avx2.asm
+ ${isal_src_dir}/erasure_code/gf_vect_dot_prod_avx2.asm
+ ${isal_src_dir}/erasure_code/gf_vect_mul_avx.asm
+ ${isal_src_dir}/erasure_code/gf_2vect_dot_prod_avx.asm
+ ${isal_src_dir}/erasure_code/gf_3vect_dot_prod_avx.asm
+ ${isal_src_dir}/erasure_code/gf_4vect_dot_prod_avx.asm
+ ${isal_src_dir}/erasure_code/gf_5vect_dot_prod_avx.asm
+ ${isal_src_dir}/erasure_code/gf_6vect_dot_prod_avx.asm
+ ${isal_src_dir}/erasure_code/gf_vect_dot_prod_avx.asm
+ ${isal_src_dir}/erasure_code/gf_vect_mul_sse.asm
+ ${isal_src_dir}/erasure_code/gf_2vect_dot_prod_avx512.asm
+ ${isal_src_dir}/erasure_code/gf_2vect_mad_avx512.asm
+ ${isal_src_dir}/erasure_code/gf_3vect_dot_prod_avx512.asm
+ ${isal_src_dir}/erasure_code/gf_3vect_mad_avx512.asm
+ ${isal_src_dir}/erasure_code/gf_4vect_dot_prod_avx512.asm
+ ${isal_src_dir}/erasure_code/gf_4vect_mad_avx512.asm
+ ${isal_src_dir}/erasure_code/gf_vect_dot_prod_avx512.asm
+ ${isal_src_dir}/erasure_code/gf_vect_mad_avx512.asm
+ ErasureCodeIsa.cc
+ ErasureCodeIsaTableCache.cc
+ ErasureCodePluginIsa.cc
+ xor_op.cc
+)
+
+add_library(ec_isa SHARED
+ ${isa_srcs}
+ $<TARGET_OBJECTS:erasure_code_objs>)
+target_link_libraries(ec_isa ${EXTRALIBS})
+set_target_properties(ec_isa PROPERTIES
+ INSTALL_RPATH "")
+install(TARGETS ec_isa DESTINATION ${erasure_plugin_dir})
diff --git a/src/erasure-code/isa/ErasureCodeIsa.cc b/src/erasure-code/isa/ErasureCodeIsa.cc
new file mode 100644
index 00000000..6f2f181f
--- /dev/null
+++ b/src/erasure-code/isa/ErasureCodeIsa.cc
@@ -0,0 +1,421 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 CERN (Switzerland)
+ *
+ * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+// -----------------------------------------------------------------------------
+#include <algorithm>
+#include <errno.h>
+// -----------------------------------------------------------------------------
+#include "common/debug.h"
+#include "ErasureCodeIsa.h"
+#include "xor_op.h"
+#include "include/ceph_assert.h"
+using namespace std;
+
+// -----------------------------------------------------------------------------
+extern "C" {
+#include "isa-l/include/erasure_code.h"
+}
+// -----------------------------------------------------------------------------
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_osd
+#undef dout_prefix
+#define dout_prefix _prefix(_dout)
+// -----------------------------------------------------------------------------
+
+// -----------------------------------------------------------------------------
+
+static ostream&
+_prefix(std::ostream* _dout)
+{
+ return *_dout << "ErasureCodeIsa: ";
+}
+// -----------------------------------------------------------------------------
+
+const std::string ErasureCodeIsaDefault::DEFAULT_K("7");
+const std::string ErasureCodeIsaDefault::DEFAULT_M("3");
+
+
+// -----------------------------------------------------------------------------
+
+int
+ErasureCodeIsa::init(ErasureCodeProfile &profile, ostream *ss)
+{
+ int err = 0;
+ err |= parse(profile, ss);
+ if (err)
+ return err;
+ prepare();
+ return ErasureCode::init(profile, ss);
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned int
+ErasureCodeIsa::get_chunk_size(unsigned int object_size) const
+{
+ unsigned alignment = get_alignment();
+ unsigned chunk_size = ( object_size + k - 1 ) / k;
+ dout(20) << "get_chunk_size: chunk_size " << chunk_size
+ << " must be modulo " << alignment << dendl;
+ unsigned modulo = chunk_size % alignment;
+ if (modulo) {
+ dout(10) << "get_chunk_size: " << chunk_size
+ << " padded to " << chunk_size + alignment - modulo << dendl;
+ chunk_size += alignment - modulo;
+ }
+ return chunk_size;
+}
+
+// -----------------------------------------------------------------------------
+
+int ErasureCodeIsa::encode_chunks(const set<int> &want_to_encode,
+ map<int, bufferlist> *encoded)
+{
+ char *chunks[k + m];
+ for (int i = 0; i < k + m; i++)
+ chunks[i] = (*encoded)[i].c_str();
+ isa_encode(&chunks[0], &chunks[k], (*encoded)[0].length());
+ return 0;
+}
+
+int ErasureCodeIsa::decode_chunks(const set<int> &want_to_read,
+ const map<int, bufferlist> &chunks,
+ map<int, bufferlist> *decoded)
+{
+ unsigned blocksize = (*chunks.begin()).second.length();
+ int erasures[k + m + 1];
+ int erasures_count = 0;
+ char *data[k];
+ char *coding[m];
+ for (int i = 0; i < k + m; i++) {
+ if (chunks.find(i) == chunks.end()) {
+ erasures[erasures_count] = i;
+ erasures_count++;
+ }
+ if (i < k)
+ data[i] = (*decoded)[i].c_str();
+ else
+ coding[i - k] = (*decoded)[i].c_str();
+ }
+ erasures[erasures_count] = -1;
+ ceph_assert(erasures_count > 0);
+ return isa_decode(erasures, data, coding, blocksize);
+}
+
+// -----------------------------------------------------------------------------
+
+void
+ErasureCodeIsaDefault::isa_encode(char **data,
+ char **coding,
+ int blocksize)
+{
+
+ if (m == 1)
+ // single parity stripe
+ region_xor((unsigned char**) data, (unsigned char*) coding[0], k, blocksize);
+ else
+ ec_encode_data(blocksize, k, m, encode_tbls,
+ (unsigned char**) data, (unsigned char**) coding);
+}
+
+// -----------------------------------------------------------------------------
+
+bool
+ErasureCodeIsaDefault::erasure_contains(int *erasures, int i)
+{
+ for (int l = 0; erasures[l] != -1; l++) {
+ if (erasures[l] == i)
+ return true;
+ }
+ return false;
+}
+
+// -----------------------------------------------------------------------------
+
+
+
+// -----------------------------------------------------------------------------
+
+int
+ErasureCodeIsaDefault::isa_decode(int *erasures,
+ char **data,
+ char **coding,
+ int blocksize)
+{
+ int nerrs = 0;
+ int i, r, s;
+
+ // count the errors
+ for (int l = 0; erasures[l] != -1; l++) {
+ nerrs++;
+ }
+
+ unsigned char *recover_source[k];
+ unsigned char *recover_target[m];
+
+ memset(recover_source, 0, sizeof (recover_source));
+ memset(recover_target, 0, sizeof (recover_target));
+
+ // ---------------------------------------------
+ // Assign source and target buffers
+ // ---------------------------------------------
+ for (i = 0, s = 0, r = 0; ((r < k) || (s < nerrs)) && (i < (k + m)); i++) {
+ if (!erasure_contains(erasures, i)) {
+ if (r < k) {
+ if (i < k) {
+ recover_source[r] = (unsigned char*) data[i];
+ } else {
+ recover_source[r] = (unsigned char*) coding[i - k];
+ }
+ r++;
+ }
+ } else {
+ if (s < m) {
+ if (i < k) {
+ recover_target[s] = (unsigned char*) data[i];
+ } else {
+ recover_target[s] = (unsigned char*) coding[i - k];
+ }
+ s++;
+ }
+ }
+ }
+
+ if (m == 1) {
+ // single parity decoding
+ ceph_assert(1 == nerrs);
+ dout(20) << "isa_decode: reconstruct using region xor [" <<
+ erasures[0] << "]" << dendl;
+ region_xor(recover_source, recover_target[0], k, blocksize);
+ return 0;
+ }
+
+
+ if ((matrixtype == kVandermonde) &&
+ (nerrs == 1) &&
+ (erasures[0] < (k + 1))) {
+ // use xor decoding if a data chunk is missing or the first coding chunk
+ dout(20) << "isa_decode: reconstruct using region xor [" <<
+ erasures[0] << "]" << dendl;
+ ceph_assert(1 == s);
+ ceph_assert(k == r);
+ region_xor(recover_source, recover_target[0], k, blocksize);
+ return 0;
+ }
+
+ unsigned char d[k * (m + k)];
+ unsigned char decode_tbls[k * (m + k)*32];
+ unsigned char *p_tbls = decode_tbls;
+
+ int decode_index[k];
+
+ if (nerrs > m)
+ return -1;
+
+ std::string erasure_signature; // describes a matrix configuration for caching
+
+ // ---------------------------------------------
+ // Construct b by removing error rows
+ // ---------------------------------------------
+
+ for (i = 0, r = 0; i < k; i++, r++) {
+ char id[128];
+ while (erasure_contains(erasures, r))
+ r++;
+
+ decode_index[i] = r;
+
+ snprintf(id, sizeof (id), "+%d", r);
+ erasure_signature += id;
+ }
+
+ for (int p = 0; p < nerrs; p++) {
+ char id[128];
+ snprintf(id, sizeof (id), "-%d", erasures[p]);
+ erasure_signature += id;
+ }
+
+ // ---------------------------------------------
+ // Try to get an already computed matrix
+ // ---------------------------------------------
+ if (!tcache.getDecodingTableFromCache(erasure_signature, p_tbls, matrixtype, k, m)) {
+ int j;
+ unsigned char b[k * (m + k)];
+ unsigned char c[k * (m + k)];
+
+ for (i = 0; i < k; i++) {
+ r = decode_index[i];
+ for (j = 0; j < k; j++)
+ b[k * i + j] = encode_coeff[k * r + j];
+ }
+ // ---------------------------------------------
+ // Compute inverted matrix
+ // ---------------------------------------------
+
+ // --------------------------------------------------------
+ // Remark: this may fail for certain Vandermonde matrices !
+ // There is an advanced way trying to use different
+ // source chunks to get an invertible matrix, however
+ // there are also (k,m) combinations which cannot be
+ // inverted when m chunks are lost and this optimizations
+ // does not help. Therefor we keep the code simpler.
+ // --------------------------------------------------------
+ if (gf_invert_matrix(b, d, k) < 0) {
+ dout(0) << "isa_decode: bad matrix" << dendl;
+ return -1;
+ }
+
+ for (int p = 0; p < nerrs; p++) {
+ if (erasures[p] < k) {
+ // decoding matrix elements for data chunks
+ for (j = 0; j < k; j++) {
+ c[k * p + j] = d[k * erasures[p] + j];
+ }
+ } else {
+ // decoding matrix element for coding chunks
+ for (i = 0; i < k; i++) {
+ int s = 0;
+ for (j = 0; j < k; j++)
+ s ^= gf_mul(d[j * k + i],
+ encode_coeff[k * erasures[p] + j]);
+
+ c[k * p + i] = s;
+ }
+ }
+ }
+
+ // ---------------------------------------------
+ // Initialize Decoding Table
+ // ---------------------------------------------
+ ec_init_tables(k, nerrs, c, decode_tbls);
+ tcache.putDecodingTableToCache(erasure_signature, p_tbls, matrixtype, k, m);
+ }
+ // Recover data sources
+ ec_encode_data(blocksize,
+ k, nerrs, decode_tbls, recover_source, recover_target);
+
+
+ return 0;
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned
+ErasureCodeIsaDefault::get_alignment() const
+{
+ return EC_ISA_ADDRESS_ALIGNMENT;
+}
+
+// -----------------------------------------------------------------------------
+
+int ErasureCodeIsaDefault::parse(ErasureCodeProfile &profile,
+ ostream *ss)
+{
+ int err = ErasureCode::parse(profile, ss);
+ err |= to_int("k", profile, &k, DEFAULT_K, ss);
+ err |= to_int("m", profile, &m, DEFAULT_M, ss);
+ err |= sanity_check_k_m(k, m, ss);
+
+ if (matrixtype == kVandermonde) {
+ // these are verified safe values evaluated using the
+ // benchmarktool and 10*(combinatoric for maximum loss) random
+ // full erasures
+ if (k > 32) {
+ *ss << "Vandermonde: m=" << m
+ << " should be less/equal than 32 : revert to k=32" << std::endl;
+ k = 32;
+ err = -EINVAL;
+ }
+
+ if (m > 4) {
+ *ss << "Vandermonde: m=" << m
+ << " should be less than 5 to guarantee an MDS codec:"
+ << " revert to m=4" << std::endl;
+ m = 4;
+ err = -EINVAL;
+ }
+ switch (m) {
+ case 4:
+ if (k > 21) {
+ *ss << "Vandermonde: k=" << k
+ << " should be less than 22 to guarantee an MDS"
+ << " codec with m=4: revert to k=21" << std::endl;
+ k = 21;
+ err = -EINVAL;
+ }
+ break;
+ default:
+ ;
+ }
+ }
+ return err;
+}
+
+// -----------------------------------------------------------------------------
+
+void
+ErasureCodeIsaDefault::prepare()
+{
+ // setup shared encoding table and coefficients
+ unsigned char** p_enc_table =
+ tcache.getEncodingTable(matrixtype, k, m);
+
+ unsigned char** p_enc_coeff =
+ tcache.getEncodingCoefficient(matrixtype, k, m);
+
+ if (!*p_enc_coeff) {
+ dout(10) << "[ cache tables ] creating coeff for k=" <<
+ k << " m=" << m << dendl;
+ // build encoding coefficients which need to be computed once for each (k,m)
+ encode_coeff = (unsigned char*) malloc(k * (m + k));
+
+ if (matrixtype == kVandermonde)
+ gf_gen_rs_matrix(encode_coeff, k + m, k);
+ if (matrixtype == kCauchy)
+ gf_gen_cauchy1_matrix(encode_coeff, k + m, k);
+
+ // either our new created coefficients are stored or if they have been
+ // created in the meanwhile the locally allocated coefficients will be
+ // freed by setEncodingCoefficient
+ encode_coeff = tcache.setEncodingCoefficient(matrixtype, k, m, encode_coeff);
+ } else {
+ encode_coeff = *p_enc_coeff;
+ }
+
+ if (!*p_enc_table) {
+ dout(10) << "[ cache tables ] creating tables for k=" <<
+ k << " m=" << m << dendl;
+ // build encoding table which needs to be computed once for each (k,m)
+ encode_tbls = (unsigned char*) malloc(k * (m + k)*32);
+ ec_init_tables(k, m, &encode_coeff[k * k], encode_tbls);
+
+ // either our new created table is stored or if it has been
+ // created in the meanwhile the locally allocated table will be
+ // freed by setEncodingTable
+ encode_tbls = tcache.setEncodingTable(matrixtype, k, m, encode_tbls);
+ } else {
+ encode_tbls = *p_enc_table;
+ }
+
+ unsigned memory_lru_cache =
+ k * (m + k) * 32 * tcache.decoding_tables_lru_length;
+
+ dout(10) << "[ cache memory ] = " << memory_lru_cache << " bytes" <<
+ " [ matrix ] = " <<
+ ((matrixtype == kVandermonde) ? "Vandermonde" : "Cauchy") << dendl;
+
+ ceph_assert((matrixtype == kVandermonde) || (matrixtype == kCauchy));
+
+}
+// -----------------------------------------------------------------------------
diff --git a/src/erasure-code/isa/ErasureCodeIsa.h b/src/erasure-code/isa/ErasureCodeIsa.h
new file mode 100644
index 00000000..d67b918f
--- /dev/null
+++ b/src/erasure-code/isa/ErasureCodeIsa.h
@@ -0,0 +1,153 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 CERN (Switzerland)
+ *
+ * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+/**
+ * @file ErasureCodeIsa.cc
+ *
+ * @brief Erasure Code CODEC using the INTEL ISA-L library.
+ *
+ * The INTEL ISA-L library supports two pre-defined encoding matrices (cauchy = default, reed_sol_van = default)
+ * The default CODEC implementation using these two matrices is implemented in class ErasureCodeIsaDefault.
+ * ISA-L allows to use custom matrices which might be added later as implementations deriving from the base class ErasoreCodeIsa.
+ */
+
+#ifndef CEPH_ERASURE_CODE_ISA_L_H
+#define CEPH_ERASURE_CODE_ISA_L_H
+
+// -----------------------------------------------------------------------------
+#include "erasure-code/ErasureCode.h"
+#include "ErasureCodeIsaTableCache.h"
+// -----------------------------------------------------------------------------
+
+class ErasureCodeIsa : public ErasureCode {
+public:
+
+ enum eMatrix {
+ kVandermonde = 0, kCauchy = 1
+ };
+
+ int k;
+ int m;
+ int w;
+
+ ErasureCodeIsaTableCache &tcache;
+ const char *technique;
+
+ ErasureCodeIsa(const char *_technique,
+ ErasureCodeIsaTableCache &_tcache) :
+ k(0),
+ m(0),
+ w(0),
+ tcache(_tcache),
+ technique(_technique)
+ {
+ }
+
+
+ ~ErasureCodeIsa() override
+ {
+ }
+
+ unsigned int
+ get_chunk_count() const override
+ {
+ return k + m;
+ }
+
+ unsigned int
+ get_data_chunk_count() const override
+ {
+ return k;
+ }
+
+ unsigned int get_chunk_size(unsigned int object_size) const override;
+
+ int encode_chunks(const std::set<int> &want_to_encode,
+ std::map<int, bufferlist> *encoded) override;
+
+ int decode_chunks(const std::set<int> &want_to_read,
+ const std::map<int, bufferlist> &chunks,
+ std::map<int, bufferlist> *decoded) override;
+
+ int init(ErasureCodeProfile &profile, std::ostream *ss) override;
+
+ virtual void isa_encode(char **data,
+ char **coding,
+ int blocksize) = 0;
+
+
+ virtual int isa_decode(int *erasures,
+ char **data,
+ char **coding,
+ int blocksize) = 0;
+
+ virtual unsigned get_alignment() const = 0;
+
+ virtual void prepare() = 0;
+
+ private:
+ virtual int parse(ErasureCodeProfile &profile,
+ std::ostream *ss) = 0;
+};
+
+// -----------------------------------------------------------------------------
+
+class ErasureCodeIsaDefault : public ErasureCodeIsa {
+private:
+ int matrixtype;
+
+public:
+
+ static const std::string DEFAULT_K;
+ static const std::string DEFAULT_M;
+
+ unsigned char* encode_coeff; // encoding coefficient
+ unsigned char* encode_tbls; // encoding table
+
+ ErasureCodeIsaDefault(ErasureCodeIsaTableCache &_tcache,
+ int matrix = kVandermonde) :
+
+ ErasureCodeIsa("default", _tcache),
+ encode_coeff(0), encode_tbls(0)
+ {
+ matrixtype = matrix;
+ }
+
+
+ ~ErasureCodeIsaDefault() override
+ {
+
+ }
+
+ void isa_encode(char **data,
+ char **coding,
+ int blocksize) override;
+
+ virtual bool erasure_contains(int *erasures, int i);
+
+ int isa_decode(int *erasures,
+ char **data,
+ char **coding,
+ int blocksize) override;
+
+ unsigned get_alignment() const override;
+
+ void prepare() override;
+
+ private:
+ int parse(ErasureCodeProfile &profile,
+ std::ostream *ss) override;
+};
+
+#endif
diff --git a/src/erasure-code/isa/ErasureCodeIsaTableCache.cc b/src/erasure-code/isa/ErasureCodeIsaTableCache.cc
new file mode 100644
index 00000000..7b176de8
--- /dev/null
+++ b/src/erasure-code/isa/ErasureCodeIsaTableCache.cc
@@ -0,0 +1,327 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 CERN (Switzerland)
+ *
+ * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+
+/**
+ * @file ErasureCodeIsaTableCache.cc
+ *
+ * @brief Erasure Code Isa CODEC Table Cache
+ *
+ * The INTEL ISA-L library supports two pre-defined encoding matrices (cauchy = default, reed_sol_van = default)
+ * The default CODEC implementation using these two matrices is implemented in class ErasureCodeIsaDefault.
+ * ISA-L allows to use custom matrices which might be added later as implementations deriving from the base class ErasoreCodeIsa.
+ */
+
+// -----------------------------------------------------------------------------
+#include "ErasureCodeIsaTableCache.h"
+#include "common/debug.h"
+// -----------------------------------------------------------------------------
+
+// -----------------------------------------------------------------------------
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_osd
+#undef dout_prefix
+#define dout_prefix _tc_prefix(_dout)
+// -----------------------------------------------------------------------------
+
+// -----------------------------------------------------------------------------
+
+static ostream&
+_tc_prefix(std::ostream* _dout)
+{
+ return *_dout << "ErasureCodeIsaTableCache: ";
+}
+
+// -----------------------------------------------------------------------------
+
+ErasureCodeIsaTableCache::~ErasureCodeIsaTableCache()
+{
+ Mutex::Locker lock(codec_tables_guard);
+
+ codec_technique_tables_t::const_iterator ttables_it;
+ codec_tables_t::const_iterator tables_it;
+ codec_table_t::const_iterator table_it;
+
+ std::map<int, lru_map_t*>::const_iterator lru_map_it;
+ std::map<int, lru_list_t*>::const_iterator lru_list_it;
+
+ // clean-up all allocated tables
+ for (ttables_it = encoding_coefficient.begin(); ttables_it != encoding_coefficient.end(); ++ttables_it) {
+ for (tables_it = ttables_it->second.begin(); tables_it != ttables_it->second.end(); ++tables_it) {
+ for (table_it = tables_it->second.begin(); table_it != tables_it->second.end(); ++table_it) {
+ if (table_it->second) {
+ if (*(table_it->second)) {
+ delete *(table_it->second);
+ }
+ delete table_it->second;
+ }
+ }
+ }
+ }
+
+ for (ttables_it = encoding_table.begin(); ttables_it != encoding_table.end(); ++ttables_it) {
+ for (tables_it = ttables_it->second.begin(); tables_it != ttables_it->second.end(); ++tables_it) {
+ for (table_it = tables_it->second.begin(); table_it != tables_it->second.end(); ++table_it) {
+ if (table_it->second) {
+ if (*(table_it->second)) {
+ delete *(table_it->second);
+ }
+ delete table_it->second;
+ }
+ }
+ }
+ }
+
+ for (lru_map_it = decoding_tables.begin(); lru_map_it != decoding_tables.end(); ++lru_map_it) {
+ if (lru_map_it->second) {
+ delete lru_map_it->second;
+ }
+ }
+
+ for (lru_list_it = decoding_tables_lru.begin(); lru_list_it != decoding_tables_lru.end(); ++lru_list_it) {
+ if (lru_list_it->second) {
+ delete lru_list_it->second;
+ }
+ }
+}
+
+// -----------------------------------------------------------------------------
+
+int
+ErasureCodeIsaTableCache::getDecodingTableCacheSize(int matrixtype)
+{
+ Mutex::Locker lock(codec_tables_guard);
+ if (decoding_tables[matrixtype])
+ return decoding_tables[matrixtype]->size();
+ else
+ return -1;
+}
+
+// -----------------------------------------------------------------------------
+
+ErasureCodeIsaTableCache::lru_map_t*
+ErasureCodeIsaTableCache::getDecodingTables(int matrix_type)
+{
+ // the caller must hold the guard mutex:
+ // => Mutex::Locker lock(codec_tables_guard);
+
+ // create an lru_map if not yet allocated
+ if (!decoding_tables[matrix_type]) {
+ decoding_tables[matrix_type] = new lru_map_t;
+ }
+ return decoding_tables[matrix_type];
+}
+
+// -----------------------------------------------------------------------------
+
+ErasureCodeIsaTableCache::lru_list_t*
+ErasureCodeIsaTableCache::getDecodingTablesLru(int matrix_type)
+{
+ // the caller must hold the guard mutex:
+ // => Mutex::Locker lock(codec_tables_guard);
+
+ // create an lru_list if not yet allocated
+ if (!decoding_tables_lru[matrix_type]) {
+ decoding_tables_lru[matrix_type] = new lru_list_t;
+ }
+ return decoding_tables_lru[matrix_type];
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned char**
+ErasureCodeIsaTableCache::getEncodingTable(int matrix, int k, int m)
+{
+ Mutex::Locker lock(codec_tables_guard);
+ return getEncodingTableNoLock(matrix,k,m);
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned char**
+ErasureCodeIsaTableCache::getEncodingTableNoLock(int matrix, int k, int m)
+{
+ // create a pointer to store an encoding table address
+ if (!encoding_table[matrix][k][m]) {
+ encoding_table[matrix][k][m] = new (unsigned char*);
+ *encoding_table[matrix][k][m] = 0;
+ }
+ return encoding_table[matrix][k][m];
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned char**
+ErasureCodeIsaTableCache::getEncodingCoefficient(int matrix, int k, int m)
+{
+ Mutex::Locker lock(codec_tables_guard);
+ return getEncodingCoefficientNoLock(matrix,k,m);
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned char**
+ErasureCodeIsaTableCache::getEncodingCoefficientNoLock(int matrix, int k, int m)
+{
+ // create a pointer to store an encoding coefficients address
+ if (!encoding_coefficient[matrix][k][m]) {
+ encoding_coefficient[matrix][k][m] = new (unsigned char*);
+ *encoding_coefficient[matrix][k][m] = 0;
+ }
+ return encoding_coefficient[matrix][k][m];
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned char*
+ErasureCodeIsaTableCache::setEncodingTable(int matrix, int k, int m, unsigned char* ec_in_table)
+{
+ Mutex::Locker lock(codec_tables_guard);
+ unsigned char** ec_out_table = getEncodingTableNoLock(matrix, k, m);
+ if (*ec_out_table) {
+ // somebody might have deposited this table in the meanwhile, so clean
+ // the input table and return the stored one
+ free (ec_in_table);
+ return *ec_out_table;
+ } else {
+ // we store the provided input table and return this one
+ *encoding_table[matrix][k][m] = ec_in_table;
+ return ec_in_table;
+ }
+}
+
+// -----------------------------------------------------------------------------
+
+unsigned char*
+ErasureCodeIsaTableCache::setEncodingCoefficient(int matrix, int k, int m, unsigned char* ec_in_coeff)
+{
+ Mutex::Locker lock(codec_tables_guard);
+ unsigned char** ec_out_coeff = getEncodingCoefficientNoLock(matrix, k, m);
+ if (*ec_out_coeff) {
+ // somebody might have deposited these coefficients in the meanwhile, so clean
+ // the input coefficients and return the stored ones
+ free (ec_in_coeff);
+ return *ec_out_coeff;
+ } else {
+ // we store the provided input coefficients and return these
+ *encoding_coefficient[matrix][k][m] = ec_in_coeff;
+ return ec_in_coeff;
+ }
+}
+
+// -----------------------------------------------------------------------------
+
+Mutex*
+ErasureCodeIsaTableCache::getLock()
+{
+ return &codec_tables_guard;
+}
+
+// -----------------------------------------------------------------------------
+
+bool
+ErasureCodeIsaTableCache::getDecodingTableFromCache(std::string &signature,
+ unsigned char* &table,
+ int matrixtype,
+ int k,
+ int m)
+{
+ // --------------------------------------------------------------------------
+ // LRU decoding matrix cache
+ // --------------------------------------------------------------------------
+
+ dout(12) << "[ get table ] = " << signature << dendl;
+
+ // we try to fetch a decoding table from an LRU cache
+ bool found = false;
+
+ Mutex::Locker lock(codec_tables_guard);
+
+ lru_map_t* decode_tbls_map =
+ getDecodingTables(matrixtype);
+
+ lru_list_t* decode_tbls_lru =
+ getDecodingTablesLru(matrixtype);
+
+ if (decode_tbls_map->count(signature)) {
+ dout(12) << "[ cached table ] = " << signature << dendl;
+ // copy the table out of the cache
+ memcpy(table, (*decode_tbls_map)[signature].second.c_str(), k * (m + k)*32);
+ // find item in LRU queue and push back
+ dout(12) << "[ cache size ] = " << decode_tbls_lru->size() << dendl;
+ decode_tbls_lru->splice( (decode_tbls_lru->begin()), *decode_tbls_lru, (*decode_tbls_map)[signature].first);
+ found = true;
+ }
+
+ return found;
+}
+
+// -----------------------------------------------------------------------------
+
+void
+ErasureCodeIsaTableCache::putDecodingTableToCache(std::string &signature,
+ unsigned char* &table,
+ int matrixtype,
+ int k,
+ int m)
+{
+ // --------------------------------------------------------------------------
+ // LRU decoding matrix cache
+ // --------------------------------------------------------------------------
+
+ dout(12) << "[ put table ] = " << signature << dendl;
+
+ // we store a new table to the cache
+
+ bufferptr cachetable;
+
+ Mutex::Locker lock(codec_tables_guard);
+
+ lru_map_t* decode_tbls_map =
+ getDecodingTables(matrixtype);
+
+ lru_list_t* decode_tbls_lru =
+ getDecodingTablesLru(matrixtype);
+
+ // evt. shrink the LRU queue/map
+ if ((int) decode_tbls_lru->size() >= ErasureCodeIsaTableCache::decoding_tables_lru_length) {
+ dout(12) << "[ shrink lru ] = " << signature << dendl;
+ // reuse old buffer
+ cachetable = (*decode_tbls_map)[decode_tbls_lru->back()].second;
+
+ if ((int) cachetable.length() != (k * (m + k)*32)) {
+ // we need to replace this with a different size buffer
+ cachetable = buffer::create(k * (m + k)*32);
+ }
+
+ // remove from map
+ decode_tbls_map->erase(decode_tbls_lru->back());
+ // remove from lru
+ decode_tbls_lru->pop_back();
+ // add to the head of lru
+ decode_tbls_lru->push_front(signature);
+ // add the new to the map
+ (*decode_tbls_map)[signature] = std::make_pair(decode_tbls_lru->begin(), cachetable);
+ } else {
+ dout(12) << "[ store table ] = " << signature << dendl;
+ // allocate a new buffer
+ cachetable = buffer::create(k * (m + k)*32);
+ decode_tbls_lru->push_front(signature);
+ (*decode_tbls_map)[signature] = std::make_pair(decode_tbls_lru->begin(), cachetable);
+ dout(12) << "[ cache size ] = " << decode_tbls_lru->size() << dendl;
+ }
+
+ // copy-in the new table
+ memcpy(cachetable.c_str(), table, k * (m + k)*32);
+}
diff --git a/src/erasure-code/isa/ErasureCodeIsaTableCache.h b/src/erasure-code/isa/ErasureCodeIsaTableCache.h
new file mode 100644
index 00000000..64aaae75
--- /dev/null
+++ b/src/erasure-code/isa/ErasureCodeIsaTableCache.h
@@ -0,0 +1,105 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 CERN (Switzerland)
+ *
+ * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+/**
+ * @file ErasureCodeIsaTableCache.h
+ *
+ * @brief Erasure Code Isa CODEC Table Cache
+ *
+ * The INTEL ISA-L library supports two pre-defined encoding matrices (cauchy = default, reed_sol_van = default)
+ * The default CODEC implementation using these two matrices is implemented in class ErasureCodeIsaDefault.
+ * ISA-L allows to use custom matrices which might be added later as implementations deriving from the base class ErasoreCodeIsa.
+ */
+
+#ifndef CEPH_ERASURE_CODE_ISA_TABLE_CACHE_H
+#define CEPH_ERASURE_CODE_ISA_TABLE_CACHE_H
+
+// -----------------------------------------------------------------------------
+#include "common/Mutex.h"
+#include "erasure-code/ErasureCodeInterface.h"
+// -----------------------------------------------------------------------------
+#include <list>
+// -----------------------------------------------------------------------------
+
+class ErasureCodeIsaTableCache {
+ // ---------------------------------------------------------------------------
+ // This class implements a table cache for encoding and decoding matrices.
+ // Encoding matrices are shared for the same (k,m) combination. It supplies
+ // a decoding matrix lru cache which is shared for identical
+ // matrix types e.g. there is one cache (lru-list + lru-map) for Cauchy and
+ // one for Vandermonde matrices!
+ // ---------------------------------------------------------------------------
+
+public:
+
+ // the cache size is sufficient up to (12,4) decodings
+
+ static const int decoding_tables_lru_length = 2516;
+
+ typedef std::pair<std::list<std::string>::iterator, bufferptr> lru_entry_t;
+ typedef std::map< int, unsigned char** > codec_table_t;
+ typedef std::map< int, codec_table_t > codec_tables_t;
+ typedef std::map< int, codec_tables_t > codec_technique_tables_t;
+
+ typedef std::map< std::string, lru_entry_t > lru_map_t;
+ typedef std::list< std::string > lru_list_t;
+
+ ErasureCodeIsaTableCache() :
+ codec_tables_guard("isa-lru-cache")
+ {
+ }
+
+ virtual ~ErasureCodeIsaTableCache();
+
+ Mutex codec_tables_guard; // mutex used to protect modifications in encoding/decoding table maps
+
+ bool getDecodingTableFromCache(std::string &signature,
+ unsigned char* &table,
+ int matrixtype,
+ int k,
+ int m);
+
+ void putDecodingTableToCache(std::string&,
+ unsigned char*&,
+ int matrixtype,
+ int k,
+ int m);
+
+ unsigned char** getEncodingTable(int matrix, int k, int m);
+ unsigned char** getEncodingCoefficient(int matrix, int k, int m);
+
+ unsigned char** getEncodingTableNoLock(int matrix, int k, int m);
+ unsigned char** getEncodingCoefficientNoLock(int matrix, int k, int m);
+
+ unsigned char* setEncodingTable(int matrix, int k, int m, unsigned char*);
+ unsigned char* setEncodingCoefficient(int matrix, int k, int m, unsigned char*);
+
+ int getDecodingTableCacheSize(int matrixtype = 0);
+
+private:
+ codec_technique_tables_t encoding_coefficient; // encoding coefficients accessed via table[matrix][k][m]
+ codec_technique_tables_t encoding_table; // encoding coefficients accessed via table[matrix][k][m]
+
+ std::map<int, lru_map_t*> decoding_tables; // decoding table cache accessed via map[matrixtype]
+ std::map<int, lru_list_t*> decoding_tables_lru; // decoding table lru list accessed via list[matrixtype]
+
+ lru_map_t* getDecodingTables(int matrix_type);
+
+ lru_list_t* getDecodingTablesLru(int matrix_type);
+
+ Mutex* getLock();
+
+};
+
+#endif
diff --git a/src/erasure-code/isa/ErasureCodePluginIsa.cc b/src/erasure-code/isa/ErasureCodePluginIsa.cc
new file mode 100644
index 00000000..5eda591b
--- /dev/null
+++ b/src/erasure-code/isa/ErasureCodePluginIsa.cc
@@ -0,0 +1,82 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 CERN (Switzerland)
+ *
+ * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+
+/**
+ * @file ErasureCodePluginIsa.cc
+ *
+ * @brief Erasure Code Plug-in class wrapping the INTEL ISA-L library
+ *
+ * The factory plug-in class allows to call individual encoding techniques.
+ * The INTEL ISA-L library provides two pre-defined encoding matrices
+ * (cauchy, reed_sol_van = default).
+ */
+
+// -----------------------------------------------------------------------------
+#include "ceph_ver.h"
+#include "include/buffer.h"
+#include "ErasureCodePluginIsa.h"
+#include "ErasureCodeIsa.h"
+// -----------------------------------------------------------------------------
+
+int ErasureCodePluginIsa::factory(const std::string &directory,
+ ErasureCodeProfile &profile,
+ ErasureCodeInterfaceRef *erasure_code,
+ std::ostream *ss)
+{
+ ErasureCodeIsa *interface;
+ std::string t;
+ if (profile.find("technique") == profile.end())
+ profile["technique"] = "reed_sol_van";
+ t = profile.find("technique")->second;
+ if ((t == "reed_sol_van")) {
+ interface = new ErasureCodeIsaDefault(tcache,
+ ErasureCodeIsaDefault::kVandermonde);
+ } else {
+ if ((t == "cauchy")) {
+ interface = new ErasureCodeIsaDefault(tcache,
+ ErasureCodeIsaDefault::kCauchy);
+ } else {
+ *ss << "technique=" << t << " is not a valid coding technique. "
+ << " Choose one of the following: "
+ << "reed_sol_van,"
+ << "cauchy" << std::endl;
+ return -ENOENT;
+ }
+ }
+
+ int r = interface->init(profile, ss);
+ if (r) {
+ delete interface;
+ return r;
+ }
+ *erasure_code = ErasureCodeInterfaceRef(interface);
+ return 0;
+}
+
+// -----------------------------------------------------------------------------
+
+const char *__erasure_code_version()
+{
+ return CEPH_GIT_NICE_VER;
+}
+
+// -----------------------------------------------------------------------------
+
+int __erasure_code_init(char *plugin_name, char *directory)
+{
+ ErasureCodePluginRegistry &instance = ErasureCodePluginRegistry::instance();
+
+ return instance.add(plugin_name, new ErasureCodePluginIsa());
+}
diff --git a/src/erasure-code/isa/ErasureCodePluginIsa.h b/src/erasure-code/isa/ErasureCodePluginIsa.h
new file mode 100644
index 00000000..fc52e10b
--- /dev/null
+++ b/src/erasure-code/isa/ErasureCodePluginIsa.h
@@ -0,0 +1,34 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph distributed storage system
+ *
+ * Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+ * Copyright (C) 2014 Red Hat <contact@redhat.com>
+ *
+ * Author: Loic Dachary <loic@dachary.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef CEPH_ERASURE_CODE_PLUGIN_ISA_H
+#define CEPH_ERASURE_CODE_PLUGIN_ISA_H
+
+#include "erasure-code/ErasureCodePlugin.h"
+#include "ErasureCodeIsaTableCache.h"
+
+class ErasureCodePluginIsa : public ErasureCodePlugin {
+public:
+ ErasureCodeIsaTableCache tcache;
+
+ int factory(const std::string &directory,
+ ErasureCodeProfile &profile,
+ ErasureCodeInterfaceRef *erasure_code,
+ std::ostream *ss) override;
+};
+
+#endif
diff --git a/src/erasure-code/isa/README b/src/erasure-code/isa/README
new file mode 100644
index 00000000..f5e9fe59
--- /dev/null
+++ b/src/erasure-code/isa/README
@@ -0,0 +1,63 @@
+============================================
+INTEL ISA library Erasure Coding plugin
+============================================
+
+Build Requirements
+==================
+Plug-in build compiles the included sources of ISA-L v2.10 and links them into the plugin. ISA-L implementation is portable and probes CPU features during runtime. Note that the names of the assembler source files have been renamed from *.asm to *.asm.s to be compatible with Automake.
+
+Run-time Requirements
+=====================
+None
+
+Plug-in Configuration
+=====================
+
+Used parameters are:
+k : number of data chunks
+m : number of coding chunks
+technique : cauchy, reed_sol_van
+
+The plug-in exports only two encoding technique (cauchy, reed_sol_van) using either a Vandermonde matrix or a Cauchy matrix for coding.
+By default a Vandermonde matrix is used. Be aware that sometimes the generated Vandermonde matrix is not always invertible and not fully MDS.
+Therefore the accepted parameter space has limited to maximum (21,4) and (32,3) for Vandermonde matrices.
+
+Run the Test suite
+==================
+cd ceph/src
+make unittest_erasure_code_isa
+./unittest_erasure_code_isa --gtest_filter=*.* --log-to-stderr=true --debug-ods=20
+
+Run the CEPH erasure code benchmark
+===================================
+cd ceph/src
+make ceph_erasure_code_benchmark
+
+# consult ./ceph_erasure_code_benchmark -h for help
+
+# encode performance
+./ceph_erasure_code_benchmark -p isa -P k=8 -P m=3 -S 1048576 -i 1000
+
+# decode performance one lost
+./ceph_erasure_code_benchmark -e 1 -w decode -p isa -P k=8 -P m=3 -S 1048576 -i 1000
+
+# decode performance two lost
+./ceph_erasure_code_benchmark -e 2 -w decode -p isa -P k=8 -P m=3 -S 1048576 -i 1000
+
+# decode performance three lost
+./ceph_erasure_code_benchmark -e 3 -w decode -p isa -P k=8 -P m=3 -S 1048576 -i 1000
+
+
+Developer Notes
+===============
+The plugin provides optimal performance for 32-byte aligned buffer start address and
+k*32 byte aligned buffer length. The encoding tables are computed only once when the EC
+object is created. Decoding Tables have to be computed for each decoding since the available
+data/coding sources may change between calls.
+Decoding tables are cached in an LRU cache which is sufficiently large up to (12,4).
+
+For larger configurations the cache might expire the 'oldest' tables and decoding might
+slow down. The plug-in uses an optimization to use a pure region XOR to decode single disk
+failures if the erased chunk is within the first (k+1) chunks.
+
+The unittest probes all possible failure scenarios for (12,4) Vandermonde and Cauchy matrices.
diff --git a/src/erasure-code/isa/xor_op.cc b/src/erasure-code/isa/xor_op.cc
new file mode 100644
index 00000000..2b56e977
--- /dev/null
+++ b/src/erasure-code/isa/xor_op.cc
@@ -0,0 +1,183 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 CERN (Switzerland)
+ * * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch> *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+// -----------------------------------------------------------------------------
+#include "xor_op.h"
+#include <stdio.h>
+#include <string.h>
+#include "arch/intel.h"
+
+#include "include/ceph_assert.h"
+
+// -----------------------------------------------------------------------------
+
+
+// -----------------------------------------------------------------------------
+
+void
+// -----------------------------------------------------------------------------
+byte_xor(unsigned char* cw, unsigned char* dw, unsigned char* ew)
+// -----------------------------------------------------------------------------
+{
+ while (cw < ew)
+ *dw++ ^= *cw++;
+}
+
+// -----------------------------------------------------------------------------
+
+void
+// -----------------------------------------------------------------------------
+vector_xor(vector_op_t* cw,
+ vector_op_t* dw,
+ vector_op_t* ew)
+// -----------------------------------------------------------------------------
+{
+ ceph_assert(is_aligned(cw, EC_ISA_VECTOR_OP_WORDSIZE));
+ ceph_assert(is_aligned(dw, EC_ISA_VECTOR_OP_WORDSIZE));
+ ceph_assert(is_aligned(ew, EC_ISA_VECTOR_OP_WORDSIZE));
+ while (cw < ew) {
+ *dw++ ^= *cw++;
+ }
+}
+
+
+// -----------------------------------------------------------------------------
+
+void
+// -----------------------------------------------------------------------------
+region_xor(unsigned char** src,
+ unsigned char* parity,
+ int src_size,
+ unsigned size)
+{
+ if (!size) {
+ // nothing to do
+ return;
+ }
+
+ if (!src_size) {
+ // nothing to do
+ return;
+ }
+
+ if (src_size == 1) {
+ // just copy source to parity
+ memcpy(parity, src[0], size);
+ return;
+ }
+
+ unsigned size_left = size;
+
+ // ----------------------------------------------------------
+ // region or vector XOR operations require aligned addresses
+ // ----------------------------------------------------------
+
+ bool src_aligned = true;
+ for (int i = 0; i < src_size; i++) {
+ src_aligned &= is_aligned(src[i], EC_ISA_VECTOR_OP_WORDSIZE);
+ }
+
+ if (src_aligned &&
+ is_aligned(parity, EC_ISA_VECTOR_OP_WORDSIZE)) {
+
+#ifdef __x86_64__
+ if (ceph_arch_intel_sse2) {
+ // -----------------------------
+ // use SSE2 region xor function
+ // -----------------------------
+ unsigned region_size =
+ (size / EC_ISA_VECTOR_SSE2_WORDSIZE) * EC_ISA_VECTOR_SSE2_WORDSIZE;
+
+ size_left -= region_size;
+ // 64-byte region xor
+ region_sse2_xor((char**) src, (char*) parity, src_size, region_size);
+ } else
+#endif
+ {
+ // --------------------------------------------
+ // use region xor based on vector xor operation
+ // --------------------------------------------
+ unsigned vector_words = size / EC_ISA_VECTOR_OP_WORDSIZE;
+ unsigned vector_size = vector_words * EC_ISA_VECTOR_OP_WORDSIZE;
+ memcpy(parity, src[0], vector_size);
+
+ size_left -= vector_size;
+ vector_op_t* p_vec = (vector_op_t*) parity;
+ for (int i = 1; i < src_size; i++) {
+ vector_op_t* s_vec = (vector_op_t*) src[i];
+ vector_op_t* e_vec = s_vec + vector_words;
+ vector_xor(s_vec, p_vec, e_vec);
+ }
+ }
+ }
+
+ if (size_left) {
+ // --------------------------------------------------
+ // xor the not aligned part with byte-wise region xor
+ // --------------------------------------------------
+ memcpy(parity + size - size_left, src[0] + size - size_left, size_left);
+ for (int i = 1; i < src_size; i++) {
+ byte_xor(src[i] + size - size_left, parity + size - size_left, src[i] + size);
+ }
+ }
+}
+
+// -----------------------------------------------------------------------------
+
+void
+// -----------------------------------------------------------------------------
+region_sse2_xor(char** src,
+ char* parity,
+ int src_size,
+ unsigned size)
+// -----------------------------------------------------------------------------
+{
+#ifdef __x86_64__
+ ceph_assert(!(size % EC_ISA_VECTOR_SSE2_WORDSIZE));
+ unsigned char* p;
+ int d, l;
+ unsigned i;
+ unsigned char* vbuf[256];
+
+ for (int v = 0; v < src_size; v++) {
+ vbuf[v] = (unsigned char*) src[v];
+ }
+
+ l = src_size;
+ p = (unsigned char*) parity;
+
+ for (i = 0; i < size; i += EC_ISA_VECTOR_SSE2_WORDSIZE) {
+ asm volatile("movdqa %0,%%xmm0" : : "m" (vbuf[0][i]));
+ asm volatile("movdqa %0,%%xmm1" : : "m" (vbuf[0][i + 16]));
+ asm volatile("movdqa %0,%%xmm2" : : "m" (vbuf[0][i + 32]));
+ asm volatile("movdqa %0,%%xmm3" : : "m" (vbuf[0][i + 48]));
+
+ for (d = 1; d < l; d++) {
+ asm volatile("movdqa %0,%%xmm4" : : "m" (vbuf[d][i]));
+ asm volatile("movdqa %0,%%xmm5" : : "m" (vbuf[d][i + 16]));
+ asm volatile("movdqa %0,%%xmm6" : : "m" (vbuf[d][i + 32]));
+ asm volatile("movdqa %0,%%xmm7" : : "m" (vbuf[d][i + 48]));
+ asm volatile("pxor %xmm4,%xmm0");
+ asm volatile("pxor %xmm5,%xmm1");
+ asm volatile("pxor %xmm6,%xmm2");
+ asm volatile("pxor %xmm7,%xmm3");
+ }
+ asm volatile("movntdq %%xmm0,%0" : "=m" (p[i]));
+ asm volatile("movntdq %%xmm1,%0" : "=m" (p[i + 16]));
+ asm volatile("movntdq %%xmm2,%0" : "=m" (p[i + 32]));
+ asm volatile("movntdq %%xmm3,%0" : "=m" (p[i + 48]));
+ }
+
+ asm volatile("sfence" : : : "memory");
+#endif // __x86_64__
+ return;
+}
diff --git a/src/erasure-code/isa/xor_op.h b/src/erasure-code/isa/xor_op.h
new file mode 100644
index 00000000..978b9a95
--- /dev/null
+++ b/src/erasure-code/isa/xor_op.h
@@ -0,0 +1,87 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 CERN (Switzerland)
+ * \
+ * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch> \
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef EC_ISA_XOR_OP_H
+#define EC_ISA_XOR_OP_H
+
+// -----------------------------------------------------------------------------
+#include <assert.h>
+#include <stdint.h>
+// -----------------------------------------------------------------------------
+
+// -------------------------------------------------------------------------
+// declaration of 64/128-bit vector operations depending on availability
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+#define EC_ISA_ADDRESS_ALIGNMENT 32u
+#define EC_ISA_VECTOR_SSE2_WORDSIZE 64u
+
+#if __GNUC__ > 4 || \
+ ( (__GNUC__ == 4) && (__GNUC_MINOR__ >= 4) ) ||\
+ (__clang__ == 1 )
+#ifdef EC_ISA_VECTOR_OP_DEBUG
+#pragma message "* using 128-bit vector operations in " __FILE__
+#endif
+
+// -------------------------------------------------------------------------
+// use 128-bit pointer
+// -------------------------------------------------------------------------
+typedef long vector_op_t __attribute__((vector_size(16)));
+#define EC_ISA_VECTOR_OP_WORDSIZE 16
+#else
+// -------------------------------------------------------------------------
+// use 64-bit pointer
+// -------------------------------------------------------------------------
+typedef unsigned long long vector_op_t;
+#define EC_ISA_VECTOR_OP_WORDSIZE 8
+#endif
+
+
+// -------------------------------------------------------------------------
+// check if a pointer is aligend to byte_count
+// -------------------------------------------------------------------------
+#define is_aligned(POINTER, BYTE_COUNT) \
+ (((uintptr_t)(const void *)(POINTER)) % (BYTE_COUNT) == 0)
+
+// -------------------------------------------------------------------------
+// compute byte-wise XOR of cw and dw block, ew contains the end address of cw
+// -------------------------------------------------------------------------
+void
+byte_xor(unsigned char* cw, unsigned char* dw, unsigned char* ew);
+
+// -------------------------------------------------------------------------
+// compute word-wise XOR of cw and dw block, ew contains the end address of cw
+// -------------------------------------------------------------------------
+void
+vector_xor(vector_op_t* cw, vector_op_t* dw, vector_op_t* ew);
+
+// -------------------------------------------------------------------------
+// compute region XOR like parity = src[0] ^ src[1] ... ^ src[src_size-]
+// -------------------------------------------------------------------------
+void
+region_xor(unsigned char** src, unsigned char* parity, int src_size, unsigned size);
+
+// -------------------------------------------------------------------------
+// compute region XOR like parity = src[0] ^ src[1] ... ^ src[src_size-]
+// using SSE2 64-byte operations
+// -------------------------------------------------------------------------
+void
+region_sse2_xor(char** src /* array of 64-byte aligned source pointer to xor */,
+ char* parity /* 64-byte aligned output pointer containing the parity */,
+ int src_size /* size of the source pointer array */,
+ unsigned size /* size of the region to xor */);
+
+
+#endif // EC_ISA_XOR_OP_H