diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/erasure-code/isa | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/erasure-code/isa')
-rw-r--r-- | src/erasure-code/isa/CMakeLists.txt | 97 | ||||
-rw-r--r-- | src/erasure-code/isa/ErasureCodeIsa.cc | 422 | ||||
-rw-r--r-- | src/erasure-code/isa/ErasureCodeIsa.h | 153 | ||||
-rw-r--r-- | src/erasure-code/isa/ErasureCodeIsaTableCache.cc | 327 | ||||
-rw-r--r-- | src/erasure-code/isa/ErasureCodeIsaTableCache.h | 103 | ||||
-rw-r--r-- | src/erasure-code/isa/ErasureCodePluginIsa.cc | 82 | ||||
-rw-r--r-- | src/erasure-code/isa/ErasureCodePluginIsa.h | 34 | ||||
-rw-r--r-- | src/erasure-code/isa/README | 63 | ||||
-rw-r--r-- | src/erasure-code/isa/xor_op.cc | 183 | ||||
-rw-r--r-- | src/erasure-code/isa/xor_op.h | 87 |
10 files changed, 1551 insertions, 0 deletions
diff --git a/src/erasure-code/isa/CMakeLists.txt b/src/erasure-code/isa/CMakeLists.txt new file mode 100644 index 000000000..2486692b8 --- /dev/null +++ b/src/erasure-code/isa/CMakeLists.txt @@ -0,0 +1,97 @@ +# ISA +set(isal_src_dir ${CMAKE_SOURCE_DIR}/src/isa-l) +include_directories(${isal_src_dir}/include) + +if(HAVE_NASM_X64_AVX2) + set(CMAKE_ASM_FLAGS "-i ${isal_src_dir}/include/ ${CMAKE_ASM_FLAGS}") + set(isa_srcs + ${isal_src_dir}/erasure_code/ec_base.c + ${isal_src_dir}/erasure_code/gf_2vect_dot_prod_sse.asm + ${isal_src_dir}/erasure_code/gf_3vect_dot_prod_sse.asm + ${isal_src_dir}/erasure_code/gf_4vect_dot_prod_sse.asm + ${isal_src_dir}/erasure_code/gf_5vect_dot_prod_sse.asm + ${isal_src_dir}/erasure_code/gf_6vect_dot_prod_sse.asm + ${isal_src_dir}/erasure_code/gf_vect_dot_prod_sse.asm + ${isal_src_dir}/erasure_code/gf_2vect_mad_avx2.asm + ${isal_src_dir}/erasure_code/gf_3vect_mad_avx2.asm + ${isal_src_dir}/erasure_code/gf_4vect_mad_avx2.asm + ${isal_src_dir}/erasure_code/gf_5vect_mad_avx2.asm + ${isal_src_dir}/erasure_code/gf_6vect_mad_avx2.asm + ${isal_src_dir}/erasure_code/gf_vect_mad_avx2.asm + ${isal_src_dir}/erasure_code/ec_highlevel_func.c + ${isal_src_dir}/erasure_code/gf_2vect_mad_avx.asm + ${isal_src_dir}/erasure_code/gf_3vect_mad_avx.asm + ${isal_src_dir}/erasure_code/gf_4vect_mad_avx.asm + ${isal_src_dir}/erasure_code/gf_5vect_mad_avx.asm + ${isal_src_dir}/erasure_code/gf_6vect_mad_avx.asm + ${isal_src_dir}/erasure_code/gf_vect_mad_avx.asm + ${isal_src_dir}/erasure_code/ec_multibinary.asm + ${isal_src_dir}/erasure_code/gf_2vect_mad_sse.asm + ${isal_src_dir}/erasure_code/gf_3vect_mad_sse.asm + ${isal_src_dir}/erasure_code/gf_4vect_mad_sse.asm + ${isal_src_dir}/erasure_code/gf_5vect_mad_sse.asm + ${isal_src_dir}/erasure_code/gf_6vect_mad_sse.asm + ${isal_src_dir}/erasure_code/gf_vect_mad_sse.asm + ${isal_src_dir}/erasure_code/gf_2vect_dot_prod_avx2.asm + ${isal_src_dir}/erasure_code/gf_3vect_dot_prod_avx2.asm + ${isal_src_dir}/erasure_code/gf_4vect_dot_prod_avx2.asm + ${isal_src_dir}/erasure_code/gf_5vect_dot_prod_avx2.asm + ${isal_src_dir}/erasure_code/gf_6vect_dot_prod_avx2.asm + ${isal_src_dir}/erasure_code/gf_vect_dot_prod_avx2.asm + ${isal_src_dir}/erasure_code/gf_vect_mul_avx.asm + ${isal_src_dir}/erasure_code/gf_2vect_dot_prod_avx.asm + ${isal_src_dir}/erasure_code/gf_3vect_dot_prod_avx.asm + ${isal_src_dir}/erasure_code/gf_4vect_dot_prod_avx.asm + ${isal_src_dir}/erasure_code/gf_5vect_dot_prod_avx.asm + ${isal_src_dir}/erasure_code/gf_6vect_dot_prod_avx.asm + ${isal_src_dir}/erasure_code/gf_vect_dot_prod_avx.asm + ${isal_src_dir}/erasure_code/gf_vect_mul_sse.asm + ${isal_src_dir}/erasure_code/gf_2vect_dot_prod_avx512.asm + ${isal_src_dir}/erasure_code/gf_2vect_mad_avx512.asm + ${isal_src_dir}/erasure_code/gf_3vect_dot_prod_avx512.asm + ${isal_src_dir}/erasure_code/gf_3vect_mad_avx512.asm + ${isal_src_dir}/erasure_code/gf_4vect_dot_prod_avx512.asm + ${isal_src_dir}/erasure_code/gf_4vect_mad_avx512.asm + ${isal_src_dir}/erasure_code/gf_vect_dot_prod_avx512.asm + ${isal_src_dir}/erasure_code/gf_vect_mad_avx512.asm + ErasureCodeIsa.cc + ErasureCodeIsaTableCache.cc + ErasureCodePluginIsa.cc + xor_op.cc + ) +elseif(HAVE_ARMV8_SIMD) + set(isa_srcs + ${isal_src_dir}/erasure_code/ec_base.c + ${isal_src_dir}/erasure_code/aarch64/ec_aarch64_highlevel_func.c + ${isal_src_dir}/erasure_code/aarch64/ec_aarch64_dispatcher.c + ${isal_src_dir}/erasure_code/aarch64/gf_2vect_dot_prod_neon.S + ${isal_src_dir}/erasure_code/aarch64/gf_2vect_mad_neon.S + ${isal_src_dir}/erasure_code/aarch64/gf_3vect_dot_prod_neon.S + ${isal_src_dir}/erasure_code/aarch64/gf_3vect_mad_neon.S + ${isal_src_dir}/erasure_code/aarch64/gf_4vect_dot_prod_neon.S + ${isal_src_dir}/erasure_code/aarch64/gf_4vect_mad_neon.S + ${isal_src_dir}/erasure_code/aarch64/gf_5vect_dot_prod_neon.S + ${isal_src_dir}/erasure_code/aarch64/gf_5vect_mad_neon.S + ${isal_src_dir}/erasure_code/aarch64/gf_6vect_mad_neon.S + ${isal_src_dir}/erasure_code/aarch64/gf_vect_dot_prod_neon.S + ${isal_src_dir}/erasure_code/aarch64/gf_vect_mad_neon.S + ${isal_src_dir}/erasure_code/aarch64/gf_vect_mul_neon.S + ${isal_src_dir}/erasure_code/aarch64/ec_multibinary_arm.S + ErasureCodeIsa.cc + ErasureCodeIsaTableCache.cc + ErasureCodePluginIsa.cc + xor_op.cc + ) + set_source_files_properties( + ${isal_src_dir}/erasure_code/aarch64/ec_multibinary_arm.S + PROPERTIES COMPILE_FLAGS "-D__ASSEMBLY__" + ) +endif() + +add_library(ec_isa SHARED + ${isa_srcs} + $<TARGET_OBJECTS:erasure_code_objs>) +target_link_libraries(ec_isa ${EXTRALIBS}) +set_target_properties(ec_isa PROPERTIES + INSTALL_RPATH "") +install(TARGETS ec_isa DESTINATION ${erasure_plugin_dir}) diff --git a/src/erasure-code/isa/ErasureCodeIsa.cc b/src/erasure-code/isa/ErasureCodeIsa.cc new file mode 100644 index 000000000..58aff4b0e --- /dev/null +++ b/src/erasure-code/isa/ErasureCodeIsa.cc @@ -0,0 +1,422 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 CERN (Switzerland) + * + * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +// ----------------------------------------------------------------------------- +#include <algorithm> +#include <cerrno> +// ----------------------------------------------------------------------------- +#include "common/debug.h" +#include "ErasureCodeIsa.h" +#include "xor_op.h" +#include "include/ceph_assert.h" +using namespace std; +using namespace ceph; + +// ----------------------------------------------------------------------------- +extern "C" { +#include "isa-l/include/erasure_code.h" +} +// ----------------------------------------------------------------------------- +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_osd +#undef dout_prefix +#define dout_prefix _prefix(_dout) +// ----------------------------------------------------------------------------- + +// ----------------------------------------------------------------------------- + +static ostream& +_prefix(std::ostream* _dout) +{ + return *_dout << "ErasureCodeIsa: "; +} +// ----------------------------------------------------------------------------- + +const std::string ErasureCodeIsaDefault::DEFAULT_K("7"); +const std::string ErasureCodeIsaDefault::DEFAULT_M("3"); + + +// ----------------------------------------------------------------------------- + +int +ErasureCodeIsa::init(ErasureCodeProfile &profile, ostream *ss) +{ + int err = 0; + err |= parse(profile, ss); + if (err) + return err; + prepare(); + return ErasureCode::init(profile, ss); +} + +// ----------------------------------------------------------------------------- + +unsigned int +ErasureCodeIsa::get_chunk_size(unsigned int object_size) const +{ + unsigned alignment = get_alignment(); + unsigned chunk_size = ( object_size + k - 1 ) / k; + dout(20) << "get_chunk_size: chunk_size " << chunk_size + << " must be modulo " << alignment << dendl; + unsigned modulo = chunk_size % alignment; + if (modulo) { + dout(10) << "get_chunk_size: " << chunk_size + << " padded to " << chunk_size + alignment - modulo << dendl; + chunk_size += alignment - modulo; + } + return chunk_size; +} + +// ----------------------------------------------------------------------------- + +int ErasureCodeIsa::encode_chunks(const set<int> &want_to_encode, + map<int, bufferlist> *encoded) +{ + char *chunks[k + m]; + for (int i = 0; i < k + m; i++) + chunks[i] = (*encoded)[i].c_str(); + isa_encode(&chunks[0], &chunks[k], (*encoded)[0].length()); + return 0; +} + +int ErasureCodeIsa::decode_chunks(const set<int> &want_to_read, + const map<int, bufferlist> &chunks, + map<int, bufferlist> *decoded) +{ + unsigned blocksize = (*chunks.begin()).second.length(); + int erasures[k + m + 1]; + int erasures_count = 0; + char *data[k]; + char *coding[m]; + for (int i = 0; i < k + m; i++) { + if (chunks.find(i) == chunks.end()) { + erasures[erasures_count] = i; + erasures_count++; + } + if (i < k) + data[i] = (*decoded)[i].c_str(); + else + coding[i - k] = (*decoded)[i].c_str(); + } + erasures[erasures_count] = -1; + ceph_assert(erasures_count > 0); + return isa_decode(erasures, data, coding, blocksize); +} + +// ----------------------------------------------------------------------------- + +void +ErasureCodeIsaDefault::isa_encode(char **data, + char **coding, + int blocksize) +{ + + if (m == 1) + // single parity stripe + region_xor((unsigned char**) data, (unsigned char*) coding[0], k, blocksize); + else + ec_encode_data(blocksize, k, m, encode_tbls, + (unsigned char**) data, (unsigned char**) coding); +} + +// ----------------------------------------------------------------------------- + +bool +ErasureCodeIsaDefault::erasure_contains(int *erasures, int i) +{ + for (int l = 0; erasures[l] != -1; l++) { + if (erasures[l] == i) + return true; + } + return false; +} + +// ----------------------------------------------------------------------------- + + + +// ----------------------------------------------------------------------------- + +int +ErasureCodeIsaDefault::isa_decode(int *erasures, + char **data, + char **coding, + int blocksize) +{ + int nerrs = 0; + int i, r, s; + + // count the errors + for (int l = 0; erasures[l] != -1; l++) { + nerrs++; + } + + unsigned char *recover_source[k]; + unsigned char *recover_target[m]; + + memset(recover_source, 0, sizeof (recover_source)); + memset(recover_target, 0, sizeof (recover_target)); + + // --------------------------------------------- + // Assign source and target buffers + // --------------------------------------------- + for (i = 0, s = 0, r = 0; ((r < k) || (s < nerrs)) && (i < (k + m)); i++) { + if (!erasure_contains(erasures, i)) { + if (r < k) { + if (i < k) { + recover_source[r] = (unsigned char*) data[i]; + } else { + recover_source[r] = (unsigned char*) coding[i - k]; + } + r++; + } + } else { + if (s < m) { + if (i < k) { + recover_target[s] = (unsigned char*) data[i]; + } else { + recover_target[s] = (unsigned char*) coding[i - k]; + } + s++; + } + } + } + + if (m == 1) { + // single parity decoding + ceph_assert(1 == nerrs); + dout(20) << "isa_decode: reconstruct using region xor [" << + erasures[0] << "]" << dendl; + region_xor(recover_source, recover_target[0], k, blocksize); + return 0; + } + + + if ((matrixtype == kVandermonde) && + (nerrs == 1) && + (erasures[0] < (k + 1))) { + // use xor decoding if a data chunk is missing or the first coding chunk + dout(20) << "isa_decode: reconstruct using region xor [" << + erasures[0] << "]" << dendl; + ceph_assert(1 == s); + ceph_assert(k == r); + region_xor(recover_source, recover_target[0], k, blocksize); + return 0; + } + + unsigned char d[k * (m + k)]; + unsigned char decode_tbls[k * (m + k)*32]; + unsigned char *p_tbls = decode_tbls; + + int decode_index[k]; + + if (nerrs > m) + return -1; + + std::string erasure_signature; // describes a matrix configuration for caching + + // --------------------------------------------- + // Construct b by removing error rows + // --------------------------------------------- + + for (i = 0, r = 0; i < k; i++, r++) { + char id[128]; + while (erasure_contains(erasures, r)) + r++; + + decode_index[i] = r; + + snprintf(id, sizeof (id), "+%d", r); + erasure_signature += id; + } + + for (int p = 0; p < nerrs; p++) { + char id[128]; + snprintf(id, sizeof (id), "-%d", erasures[p]); + erasure_signature += id; + } + + // --------------------------------------------- + // Try to get an already computed matrix + // --------------------------------------------- + if (!tcache.getDecodingTableFromCache(erasure_signature, p_tbls, matrixtype, k, m)) { + int j; + unsigned char b[k * (m + k)]; + unsigned char c[k * (m + k)]; + + for (i = 0; i < k; i++) { + r = decode_index[i]; + for (j = 0; j < k; j++) + b[k * i + j] = encode_coeff[k * r + j]; + } + // --------------------------------------------- + // Compute inverted matrix + // --------------------------------------------- + + // -------------------------------------------------------- + // Remark: this may fail for certain Vandermonde matrices ! + // There is an advanced way trying to use different + // source chunks to get an invertible matrix, however + // there are also (k,m) combinations which cannot be + // inverted when m chunks are lost and this optimizations + // does not help. Therefor we keep the code simpler. + // -------------------------------------------------------- + if (gf_invert_matrix(b, d, k) < 0) { + dout(0) << "isa_decode: bad matrix" << dendl; + return -1; + } + + for (int p = 0; p < nerrs; p++) { + if (erasures[p] < k) { + // decoding matrix elements for data chunks + for (j = 0; j < k; j++) { + c[k * p + j] = d[k * erasures[p] + j]; + } + } else { + // decoding matrix element for coding chunks + for (i = 0; i < k; i++) { + int s = 0; + for (j = 0; j < k; j++) + s ^= gf_mul(d[j * k + i], + encode_coeff[k * erasures[p] + j]); + + c[k * p + i] = s; + } + } + } + + // --------------------------------------------- + // Initialize Decoding Table + // --------------------------------------------- + ec_init_tables(k, nerrs, c, decode_tbls); + tcache.putDecodingTableToCache(erasure_signature, p_tbls, matrixtype, k, m); + } + // Recover data sources + ec_encode_data(blocksize, + k, nerrs, decode_tbls, recover_source, recover_target); + + + return 0; +} + +// ----------------------------------------------------------------------------- + +unsigned +ErasureCodeIsaDefault::get_alignment() const +{ + return EC_ISA_ADDRESS_ALIGNMENT; +} + +// ----------------------------------------------------------------------------- + +int ErasureCodeIsaDefault::parse(ErasureCodeProfile &profile, + ostream *ss) +{ + int err = ErasureCode::parse(profile, ss); + err |= to_int("k", profile, &k, DEFAULT_K, ss); + err |= to_int("m", profile, &m, DEFAULT_M, ss); + err |= sanity_check_k_m(k, m, ss); + + if (matrixtype == kVandermonde) { + // these are verified safe values evaluated using the + // benchmarktool and 10*(combinatoric for maximum loss) random + // full erasures + if (k > 32) { + *ss << "Vandermonde: m=" << m + << " should be less/equal than 32 : revert to k=32" << std::endl; + k = 32; + err = -EINVAL; + } + + if (m > 4) { + *ss << "Vandermonde: m=" << m + << " should be less than 5 to guarantee an MDS codec:" + << " revert to m=4" << std::endl; + m = 4; + err = -EINVAL; + } + switch (m) { + case 4: + if (k > 21) { + *ss << "Vandermonde: k=" << k + << " should be less than 22 to guarantee an MDS" + << " codec with m=4: revert to k=21" << std::endl; + k = 21; + err = -EINVAL; + } + break; + default: + ; + } + } + return err; +} + +// ----------------------------------------------------------------------------- + +void +ErasureCodeIsaDefault::prepare() +{ + // setup shared encoding table and coefficients + unsigned char** p_enc_table = + tcache.getEncodingTable(matrixtype, k, m); + + unsigned char** p_enc_coeff = + tcache.getEncodingCoefficient(matrixtype, k, m); + + if (!*p_enc_coeff) { + dout(10) << "[ cache tables ] creating coeff for k=" << + k << " m=" << m << dendl; + // build encoding coefficients which need to be computed once for each (k,m) + encode_coeff = (unsigned char*) malloc(k * (m + k)); + + if (matrixtype == kVandermonde) + gf_gen_rs_matrix(encode_coeff, k + m, k); + if (matrixtype == kCauchy) + gf_gen_cauchy1_matrix(encode_coeff, k + m, k); + + // either our new created coefficients are stored or if they have been + // created in the meanwhile the locally allocated coefficients will be + // freed by setEncodingCoefficient + encode_coeff = tcache.setEncodingCoefficient(matrixtype, k, m, encode_coeff); + } else { + encode_coeff = *p_enc_coeff; + } + + if (!*p_enc_table) { + dout(10) << "[ cache tables ] creating tables for k=" << + k << " m=" << m << dendl; + // build encoding table which needs to be computed once for each (k,m) + encode_tbls = (unsigned char*) malloc(k * (m + k)*32); + ec_init_tables(k, m, &encode_coeff[k * k], encode_tbls); + + // either our new created table is stored or if it has been + // created in the meanwhile the locally allocated table will be + // freed by setEncodingTable + encode_tbls = tcache.setEncodingTable(matrixtype, k, m, encode_tbls); + } else { + encode_tbls = *p_enc_table; + } + + unsigned memory_lru_cache = + k * (m + k) * 32 * tcache.decoding_tables_lru_length; + + dout(10) << "[ cache memory ] = " << memory_lru_cache << " bytes" << + " [ matrix ] = " << + ((matrixtype == kVandermonde) ? "Vandermonde" : "Cauchy") << dendl; + + ceph_assert((matrixtype == kVandermonde) || (matrixtype == kCauchy)); + +} +// ----------------------------------------------------------------------------- diff --git a/src/erasure-code/isa/ErasureCodeIsa.h b/src/erasure-code/isa/ErasureCodeIsa.h new file mode 100644 index 000000000..705a1723a --- /dev/null +++ b/src/erasure-code/isa/ErasureCodeIsa.h @@ -0,0 +1,153 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 CERN (Switzerland) + * + * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +/** + * @file ErasureCodeIsa.cc + * + * @brief Erasure Code CODEC using the INTEL ISA-L library. + * + * The INTEL ISA-L library supports two pre-defined encoding matrices (cauchy = default, reed_sol_van = default) + * The default CODEC implementation using these two matrices is implemented in class ErasureCodeIsaDefault. + * ISA-L allows to use custom matrices which might be added later as implementations deriving from the base class ErasoreCodeIsa. + */ + +#ifndef CEPH_ERASURE_CODE_ISA_L_H +#define CEPH_ERASURE_CODE_ISA_L_H + +// ----------------------------------------------------------------------------- +#include "erasure-code/ErasureCode.h" +#include "ErasureCodeIsaTableCache.h" +// ----------------------------------------------------------------------------- + +class ErasureCodeIsa : public ceph::ErasureCode { +public: + + enum eMatrix { + kVandermonde = 0, kCauchy = 1 + }; + + int k; + int m; + int w; + + ErasureCodeIsaTableCache &tcache; + const char *technique; + + ErasureCodeIsa(const char *_technique, + ErasureCodeIsaTableCache &_tcache) : + k(0), + m(0), + w(0), + tcache(_tcache), + technique(_technique) + { + } + + + ~ErasureCodeIsa() override + { + } + + unsigned int + get_chunk_count() const override + { + return k + m; + } + + unsigned int + get_data_chunk_count() const override + { + return k; + } + + unsigned int get_chunk_size(unsigned int object_size) const override; + + int encode_chunks(const std::set<int> &want_to_encode, + std::map<int, ceph::buffer::list> *encoded) override; + + int decode_chunks(const std::set<int> &want_to_read, + const std::map<int, ceph::buffer::list> &chunks, + std::map<int, ceph::buffer::list> *decoded) override; + + int init(ceph::ErasureCodeProfile &profile, std::ostream *ss) override; + + virtual void isa_encode(char **data, + char **coding, + int blocksize) = 0; + + + virtual int isa_decode(int *erasures, + char **data, + char **coding, + int blocksize) = 0; + + virtual unsigned get_alignment() const = 0; + + virtual void prepare() = 0; + + private: + virtual int parse(ceph::ErasureCodeProfile &profile, + std::ostream *ss) = 0; +}; + +// ----------------------------------------------------------------------------- + +class ErasureCodeIsaDefault : public ErasureCodeIsa { +private: + int matrixtype; + +public: + + static const std::string DEFAULT_K; + static const std::string DEFAULT_M; + + unsigned char* encode_coeff; // encoding coefficient + unsigned char* encode_tbls; // encoding table + + ErasureCodeIsaDefault(ErasureCodeIsaTableCache &_tcache, + int matrix = kVandermonde) : + + ErasureCodeIsa("default", _tcache), + encode_coeff(0), encode_tbls(0) + { + matrixtype = matrix; + } + + + ~ErasureCodeIsaDefault() override + { + + } + + void isa_encode(char **data, + char **coding, + int blocksize) override; + + virtual bool erasure_contains(int *erasures, int i); + + int isa_decode(int *erasures, + char **data, + char **coding, + int blocksize) override; + + unsigned get_alignment() const override; + + void prepare() override; + + private: + int parse(ceph::ErasureCodeProfile &profile, + std::ostream *ss) override; +}; + +#endif diff --git a/src/erasure-code/isa/ErasureCodeIsaTableCache.cc b/src/erasure-code/isa/ErasureCodeIsaTableCache.cc new file mode 100644 index 000000000..8a3318aa1 --- /dev/null +++ b/src/erasure-code/isa/ErasureCodeIsaTableCache.cc @@ -0,0 +1,327 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 CERN (Switzerland) + * + * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + + +/** + * @file ErasureCodeIsaTableCache.cc + * + * @brief Erasure Code Isa CODEC Table Cache + * + * The INTEL ISA-L library supports two pre-defined encoding matrices (cauchy = default, reed_sol_van = default) + * The default CODEC implementation using these two matrices is implemented in class ErasureCodeIsaDefault. + * ISA-L allows to use custom matrices which might be added later as implementations deriving from the base class ErasoreCodeIsa. + */ + +// ----------------------------------------------------------------------------- +#include "ErasureCodeIsaTableCache.h" +#include "common/debug.h" +// ----------------------------------------------------------------------------- + +// ----------------------------------------------------------------------------- +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_osd +#undef dout_prefix +#define dout_prefix _tc_prefix(_dout) +// ----------------------------------------------------------------------------- + +// ----------------------------------------------------------------------------- + +static std::ostream& +_tc_prefix(std::ostream* _dout) +{ + return *_dout << "ErasureCodeIsaTableCache: "; +} + +// ----------------------------------------------------------------------------- + +ErasureCodeIsaTableCache::~ErasureCodeIsaTableCache() +{ + std::lock_guard lock{codec_tables_guard}; + + codec_technique_tables_t::const_iterator ttables_it; + codec_tables_t::const_iterator tables_it; + codec_table_t::const_iterator table_it; + + std::map<int, lru_map_t*>::const_iterator lru_map_it; + std::map<int, lru_list_t*>::const_iterator lru_list_it; + + // clean-up all allocated tables + for (ttables_it = encoding_coefficient.begin(); ttables_it != encoding_coefficient.end(); ++ttables_it) { + for (tables_it = ttables_it->second.begin(); tables_it != ttables_it->second.end(); ++tables_it) { + for (table_it = tables_it->second.begin(); table_it != tables_it->second.end(); ++table_it) { + if (table_it->second) { + if (*(table_it->second)) { + delete *(table_it->second); + } + delete table_it->second; + } + } + } + } + + for (ttables_it = encoding_table.begin(); ttables_it != encoding_table.end(); ++ttables_it) { + for (tables_it = ttables_it->second.begin(); tables_it != ttables_it->second.end(); ++tables_it) { + for (table_it = tables_it->second.begin(); table_it != tables_it->second.end(); ++table_it) { + if (table_it->second) { + if (*(table_it->second)) { + delete *(table_it->second); + } + delete table_it->second; + } + } + } + } + + for (lru_map_it = decoding_tables.begin(); lru_map_it != decoding_tables.end(); ++lru_map_it) { + if (lru_map_it->second) { + delete lru_map_it->second; + } + } + + for (lru_list_it = decoding_tables_lru.begin(); lru_list_it != decoding_tables_lru.end(); ++lru_list_it) { + if (lru_list_it->second) { + delete lru_list_it->second; + } + } +} + +// ----------------------------------------------------------------------------- + +int +ErasureCodeIsaTableCache::getDecodingTableCacheSize(int matrixtype) +{ + std::lock_guard lock{codec_tables_guard}; + if (decoding_tables[matrixtype]) + return decoding_tables[matrixtype]->size(); + else + return -1; +} + +// ----------------------------------------------------------------------------- + +ErasureCodeIsaTableCache::lru_map_t* +ErasureCodeIsaTableCache::getDecodingTables(int matrix_type) +{ + // the caller must hold the guard mutex: + // => std::lock_guard lock{codec_tables_guard}; + + // create an lru_map if not yet allocated + if (!decoding_tables[matrix_type]) { + decoding_tables[matrix_type] = new lru_map_t; + } + return decoding_tables[matrix_type]; +} + +// ----------------------------------------------------------------------------- + +ErasureCodeIsaTableCache::lru_list_t* +ErasureCodeIsaTableCache::getDecodingTablesLru(int matrix_type) +{ + // the caller must hold the guard mutex: + // => std::lock_guard lock{codec_tables_guard}; + + // create an lru_list if not yet allocated + if (!decoding_tables_lru[matrix_type]) { + decoding_tables_lru[matrix_type] = new lru_list_t; + } + return decoding_tables_lru[matrix_type]; +} + +// ----------------------------------------------------------------------------- + +unsigned char** +ErasureCodeIsaTableCache::getEncodingTable(int matrix, int k, int m) +{ + std::lock_guard lock{codec_tables_guard}; + return getEncodingTableNoLock(matrix,k,m); +} + +// ----------------------------------------------------------------------------- + +unsigned char** +ErasureCodeIsaTableCache::getEncodingTableNoLock(int matrix, int k, int m) +{ + // create a pointer to store an encoding table address + if (!encoding_table[matrix][k][m]) { + encoding_table[matrix][k][m] = new (unsigned char*); + *encoding_table[matrix][k][m] = 0; + } + return encoding_table[matrix][k][m]; +} + +// ----------------------------------------------------------------------------- + +unsigned char** +ErasureCodeIsaTableCache::getEncodingCoefficient(int matrix, int k, int m) +{ + std::lock_guard lock{codec_tables_guard}; + return getEncodingCoefficientNoLock(matrix,k,m); +} + +// ----------------------------------------------------------------------------- + +unsigned char** +ErasureCodeIsaTableCache::getEncodingCoefficientNoLock(int matrix, int k, int m) +{ + // create a pointer to store an encoding coefficients address + if (!encoding_coefficient[matrix][k][m]) { + encoding_coefficient[matrix][k][m] = new (unsigned char*); + *encoding_coefficient[matrix][k][m] = 0; + } + return encoding_coefficient[matrix][k][m]; +} + +// ----------------------------------------------------------------------------- + +unsigned char* +ErasureCodeIsaTableCache::setEncodingTable(int matrix, int k, int m, unsigned char* ec_in_table) +{ + std::lock_guard lock{codec_tables_guard}; + unsigned char** ec_out_table = getEncodingTableNoLock(matrix, k, m); + if (*ec_out_table) { + // somebody might have deposited this table in the meanwhile, so clean + // the input table and return the stored one + free (ec_in_table); + return *ec_out_table; + } else { + // we store the provided input table and return this one + *encoding_table[matrix][k][m] = ec_in_table; + return ec_in_table; + } +} + +// ----------------------------------------------------------------------------- + +unsigned char* +ErasureCodeIsaTableCache::setEncodingCoefficient(int matrix, int k, int m, unsigned char* ec_in_coeff) +{ + std::lock_guard lock{codec_tables_guard}; + unsigned char** ec_out_coeff = getEncodingCoefficientNoLock(matrix, k, m); + if (*ec_out_coeff) { + // somebody might have deposited these coefficients in the meanwhile, so clean + // the input coefficients and return the stored ones + free (ec_in_coeff); + return *ec_out_coeff; + } else { + // we store the provided input coefficients and return these + *encoding_coefficient[matrix][k][m] = ec_in_coeff; + return ec_in_coeff; + } +} + +// ----------------------------------------------------------------------------- + +ceph::mutex* +ErasureCodeIsaTableCache::getLock() +{ + return &codec_tables_guard; +} + +// ----------------------------------------------------------------------------- + +bool +ErasureCodeIsaTableCache::getDecodingTableFromCache(std::string &signature, + unsigned char* &table, + int matrixtype, + int k, + int m) +{ + // -------------------------------------------------------------------------- + // LRU decoding matrix cache + // -------------------------------------------------------------------------- + + dout(12) << "[ get table ] = " << signature << dendl; + + // we try to fetch a decoding table from an LRU cache + bool found = false; + + std::lock_guard lock{codec_tables_guard}; + + lru_map_t* decode_tbls_map = + getDecodingTables(matrixtype); + + lru_list_t* decode_tbls_lru = + getDecodingTablesLru(matrixtype); + + if (decode_tbls_map->count(signature)) { + dout(12) << "[ cached table ] = " << signature << dendl; + // copy the table out of the cache + memcpy(table, (*decode_tbls_map)[signature].second.c_str(), k * (m + k)*32); + // find item in LRU queue and push back + dout(12) << "[ cache size ] = " << decode_tbls_lru->size() << dendl; + decode_tbls_lru->splice( (decode_tbls_lru->begin()), *decode_tbls_lru, (*decode_tbls_map)[signature].first); + found = true; + } + + return found; +} + +// ----------------------------------------------------------------------------- + +void +ErasureCodeIsaTableCache::putDecodingTableToCache(std::string &signature, + unsigned char* &table, + int matrixtype, + int k, + int m) +{ + // -------------------------------------------------------------------------- + // LRU decoding matrix cache + // -------------------------------------------------------------------------- + + dout(12) << "[ put table ] = " << signature << dendl; + + // we store a new table to the cache + + ceph::buffer::ptr cachetable; + + std::lock_guard lock{codec_tables_guard}; + + lru_map_t* decode_tbls_map = + getDecodingTables(matrixtype); + + lru_list_t* decode_tbls_lru = + getDecodingTablesLru(matrixtype); + + // evt. shrink the LRU queue/map + if ((int) decode_tbls_lru->size() >= ErasureCodeIsaTableCache::decoding_tables_lru_length) { + dout(12) << "[ shrink lru ] = " << signature << dendl; + // reuse old buffer + cachetable = (*decode_tbls_map)[decode_tbls_lru->back()].second; + + if ((int) cachetable.length() != (k * (m + k)*32)) { + // we need to replace this with a different size buffer + cachetable = ceph::buffer::create(k * (m + k)*32); + } + + // remove from map + decode_tbls_map->erase(decode_tbls_lru->back()); + // remove from lru + decode_tbls_lru->pop_back(); + // add to the head of lru + decode_tbls_lru->push_front(signature); + // add the new to the map + (*decode_tbls_map)[signature] = std::make_pair(decode_tbls_lru->begin(), cachetable); + } else { + dout(12) << "[ store table ] = " << signature << dendl; + // allocate a new buffer + cachetable = ceph::buffer::create(k * (m + k)*32); + decode_tbls_lru->push_front(signature); + (*decode_tbls_map)[signature] = std::make_pair(decode_tbls_lru->begin(), cachetable); + dout(12) << "[ cache size ] = " << decode_tbls_lru->size() << dendl; + } + + // copy-in the new table + memcpy(cachetable.c_str(), table, k * (m + k)*32); +} diff --git a/src/erasure-code/isa/ErasureCodeIsaTableCache.h b/src/erasure-code/isa/ErasureCodeIsaTableCache.h new file mode 100644 index 000000000..8110a4660 --- /dev/null +++ b/src/erasure-code/isa/ErasureCodeIsaTableCache.h @@ -0,0 +1,103 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 CERN (Switzerland) + * + * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +/** + * @file ErasureCodeIsaTableCache.h + * + * @brief Erasure Code Isa CODEC Table Cache + * + * The INTEL ISA-L library supports two pre-defined encoding matrices (cauchy = default, reed_sol_van = default) + * The default CODEC implementation using these two matrices is implemented in class ErasureCodeIsaDefault. + * ISA-L allows to use custom matrices which might be added later as implementations deriving from the base class ErasoreCodeIsa. + */ + +#ifndef CEPH_ERASURE_CODE_ISA_TABLE_CACHE_H +#define CEPH_ERASURE_CODE_ISA_TABLE_CACHE_H + +// ----------------------------------------------------------------------------- +#include "common/ceph_mutex.h" +#include "erasure-code/ErasureCodeInterface.h" +// ----------------------------------------------------------------------------- +#include <list> +// ----------------------------------------------------------------------------- + +class ErasureCodeIsaTableCache { + // --------------------------------------------------------------------------- + // This class implements a table cache for encoding and decoding matrices. + // Encoding matrices are shared for the same (k,m) combination. It supplies + // a decoding matrix lru cache which is shared for identical + // matrix types e.g. there is one cache (lru-list + lru-map) for Cauchy and + // one for Vandermonde matrices! + // --------------------------------------------------------------------------- + +public: + + // the cache size is sufficient up to (12,4) decodings + + static const int decoding_tables_lru_length = 2516; + + typedef std::pair<std::list<std::string>::iterator, ceph::buffer::ptr> lru_entry_t; + typedef std::map< int, unsigned char** > codec_table_t; + typedef std::map< int, codec_table_t > codec_tables_t; + typedef std::map< int, codec_tables_t > codec_technique_tables_t; + + typedef std::map< std::string, lru_entry_t > lru_map_t; + typedef std::list< std::string > lru_list_t; + + ErasureCodeIsaTableCache() = default; + + virtual ~ErasureCodeIsaTableCache(); + + // mutex used to protect modifications in encoding/decoding table maps + ceph::mutex codec_tables_guard = ceph::make_mutex("isa-lru-cache"); + + bool getDecodingTableFromCache(std::string &signature, + unsigned char* &table, + int matrixtype, + int k, + int m); + + void putDecodingTableToCache(std::string&, + unsigned char*&, + int matrixtype, + int k, + int m); + + unsigned char** getEncodingTable(int matrix, int k, int m); + unsigned char** getEncodingCoefficient(int matrix, int k, int m); + + unsigned char** getEncodingTableNoLock(int matrix, int k, int m); + unsigned char** getEncodingCoefficientNoLock(int matrix, int k, int m); + + unsigned char* setEncodingTable(int matrix, int k, int m, unsigned char*); + unsigned char* setEncodingCoefficient(int matrix, int k, int m, unsigned char*); + + int getDecodingTableCacheSize(int matrixtype = 0); + +private: + codec_technique_tables_t encoding_coefficient; // encoding coefficients accessed via table[matrix][k][m] + codec_technique_tables_t encoding_table; // encoding coefficients accessed via table[matrix][k][m] + + std::map<int, lru_map_t*> decoding_tables; // decoding table cache accessed via map[matrixtype] + std::map<int, lru_list_t*> decoding_tables_lru; // decoding table lru list accessed via list[matrixtype] + + lru_map_t* getDecodingTables(int matrix_type); + + lru_list_t* getDecodingTablesLru(int matrix_type); + + ceph::mutex* getLock(); + +}; + +#endif diff --git a/src/erasure-code/isa/ErasureCodePluginIsa.cc b/src/erasure-code/isa/ErasureCodePluginIsa.cc new file mode 100644 index 000000000..ba54feb64 --- /dev/null +++ b/src/erasure-code/isa/ErasureCodePluginIsa.cc @@ -0,0 +1,82 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 CERN (Switzerland) + * + * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + + +/** + * @file ErasureCodePluginIsa.cc + * + * @brief Erasure Code Plug-in class wrapping the INTEL ISA-L library + * + * The factory plug-in class allows to call individual encoding techniques. + * The INTEL ISA-L library provides two pre-defined encoding matrices + * (cauchy, reed_sol_van = default). + */ + +// ----------------------------------------------------------------------------- +#include "ceph_ver.h" +#include "include/buffer.h" +#include "ErasureCodePluginIsa.h" +#include "ErasureCodeIsa.h" +// ----------------------------------------------------------------------------- + +int ErasureCodePluginIsa::factory(const std::string &directory, + ceph::ErasureCodeProfile &profile, + ceph::ErasureCodeInterfaceRef *erasure_code, + std::ostream *ss) +{ + ErasureCodeIsa *interface; + std::string t; + if (profile.find("technique") == profile.end()) + profile["technique"] = "reed_sol_van"; + t = profile.find("technique")->second; + if ((t == "reed_sol_van")) { + interface = new ErasureCodeIsaDefault(tcache, + ErasureCodeIsaDefault::kVandermonde); + } else { + if ((t == "cauchy")) { + interface = new ErasureCodeIsaDefault(tcache, + ErasureCodeIsaDefault::kCauchy); + } else { + *ss << "technique=" << t << " is not a valid coding technique. " + << " Choose one of the following: " + << "reed_sol_van," + << "cauchy" << std::endl; + return -ENOENT; + } + } + + int r = interface->init(profile, ss); + if (r) { + delete interface; + return r; + } + *erasure_code = ceph::ErasureCodeInterfaceRef(interface); + return 0; +} + +// ----------------------------------------------------------------------------- + +const char *__erasure_code_version() +{ + return CEPH_GIT_NICE_VER; +} + +// ----------------------------------------------------------------------------- + +int __erasure_code_init(char *plugin_name, char *directory) +{ + auto& instance = ceph::ErasureCodePluginRegistry::instance(); + + return instance.add(plugin_name, new ErasureCodePluginIsa()); +} diff --git a/src/erasure-code/isa/ErasureCodePluginIsa.h b/src/erasure-code/isa/ErasureCodePluginIsa.h new file mode 100644 index 000000000..117e17a44 --- /dev/null +++ b/src/erasure-code/isa/ErasureCodePluginIsa.h @@ -0,0 +1,34 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph distributed storage system + * + * Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> + * Copyright (C) 2014 Red Hat <contact@redhat.com> + * + * Author: Loic Dachary <loic@dachary.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#ifndef CEPH_ERASURE_CODE_PLUGIN_ISA_H +#define CEPH_ERASURE_CODE_PLUGIN_ISA_H + +#include "erasure-code/ErasureCodePlugin.h" +#include "ErasureCodeIsaTableCache.h" + +class ErasureCodePluginIsa : public ceph::ErasureCodePlugin { +public: + ErasureCodeIsaTableCache tcache; + + int factory(const std::string &directory, + ceph::ErasureCodeProfile &profile, + ceph::ErasureCodeInterfaceRef *erasure_code, + std::ostream *ss) override; +}; + +#endif diff --git a/src/erasure-code/isa/README b/src/erasure-code/isa/README new file mode 100644 index 000000000..f5e9fe595 --- /dev/null +++ b/src/erasure-code/isa/README @@ -0,0 +1,63 @@ +============================================ +INTEL ISA library Erasure Coding plugin +============================================ + +Build Requirements +================== +Plug-in build compiles the included sources of ISA-L v2.10 and links them into the plugin. ISA-L implementation is portable and probes CPU features during runtime. Note that the names of the assembler source files have been renamed from *.asm to *.asm.s to be compatible with Automake. + +Run-time Requirements +===================== +None + +Plug-in Configuration +===================== + +Used parameters are: +k : number of data chunks +m : number of coding chunks +technique : cauchy, reed_sol_van + +The plug-in exports only two encoding technique (cauchy, reed_sol_van) using either a Vandermonde matrix or a Cauchy matrix for coding. +By default a Vandermonde matrix is used. Be aware that sometimes the generated Vandermonde matrix is not always invertible and not fully MDS. +Therefore the accepted parameter space has limited to maximum (21,4) and (32,3) for Vandermonde matrices. + +Run the Test suite +================== +cd ceph/src +make unittest_erasure_code_isa +./unittest_erasure_code_isa --gtest_filter=*.* --log-to-stderr=true --debug-ods=20 + +Run the CEPH erasure code benchmark +=================================== +cd ceph/src +make ceph_erasure_code_benchmark + +# consult ./ceph_erasure_code_benchmark -h for help + +# encode performance +./ceph_erasure_code_benchmark -p isa -P k=8 -P m=3 -S 1048576 -i 1000 + +# decode performance one lost +./ceph_erasure_code_benchmark -e 1 -w decode -p isa -P k=8 -P m=3 -S 1048576 -i 1000 + +# decode performance two lost +./ceph_erasure_code_benchmark -e 2 -w decode -p isa -P k=8 -P m=3 -S 1048576 -i 1000 + +# decode performance three lost +./ceph_erasure_code_benchmark -e 3 -w decode -p isa -P k=8 -P m=3 -S 1048576 -i 1000 + + +Developer Notes +=============== +The plugin provides optimal performance for 32-byte aligned buffer start address and +k*32 byte aligned buffer length. The encoding tables are computed only once when the EC +object is created. Decoding Tables have to be computed for each decoding since the available +data/coding sources may change between calls. +Decoding tables are cached in an LRU cache which is sufficiently large up to (12,4). + +For larger configurations the cache might expire the 'oldest' tables and decoding might +slow down. The plug-in uses an optimization to use a pure region XOR to decode single disk +failures if the erased chunk is within the first (k+1) chunks. + +The unittest probes all possible failure scenarios for (12,4) Vandermonde and Cauchy matrices. diff --git a/src/erasure-code/isa/xor_op.cc b/src/erasure-code/isa/xor_op.cc new file mode 100644 index 000000000..2b56e977c --- /dev/null +++ b/src/erasure-code/isa/xor_op.cc @@ -0,0 +1,183 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 CERN (Switzerland) + * * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch> * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +// ----------------------------------------------------------------------------- +#include "xor_op.h" +#include <stdio.h> +#include <string.h> +#include "arch/intel.h" + +#include "include/ceph_assert.h" + +// ----------------------------------------------------------------------------- + + +// ----------------------------------------------------------------------------- + +void +// ----------------------------------------------------------------------------- +byte_xor(unsigned char* cw, unsigned char* dw, unsigned char* ew) +// ----------------------------------------------------------------------------- +{ + while (cw < ew) + *dw++ ^= *cw++; +} + +// ----------------------------------------------------------------------------- + +void +// ----------------------------------------------------------------------------- +vector_xor(vector_op_t* cw, + vector_op_t* dw, + vector_op_t* ew) +// ----------------------------------------------------------------------------- +{ + ceph_assert(is_aligned(cw, EC_ISA_VECTOR_OP_WORDSIZE)); + ceph_assert(is_aligned(dw, EC_ISA_VECTOR_OP_WORDSIZE)); + ceph_assert(is_aligned(ew, EC_ISA_VECTOR_OP_WORDSIZE)); + while (cw < ew) { + *dw++ ^= *cw++; + } +} + + +// ----------------------------------------------------------------------------- + +void +// ----------------------------------------------------------------------------- +region_xor(unsigned char** src, + unsigned char* parity, + int src_size, + unsigned size) +{ + if (!size) { + // nothing to do + return; + } + + if (!src_size) { + // nothing to do + return; + } + + if (src_size == 1) { + // just copy source to parity + memcpy(parity, src[0], size); + return; + } + + unsigned size_left = size; + + // ---------------------------------------------------------- + // region or vector XOR operations require aligned addresses + // ---------------------------------------------------------- + + bool src_aligned = true; + for (int i = 0; i < src_size; i++) { + src_aligned &= is_aligned(src[i], EC_ISA_VECTOR_OP_WORDSIZE); + } + + if (src_aligned && + is_aligned(parity, EC_ISA_VECTOR_OP_WORDSIZE)) { + +#ifdef __x86_64__ + if (ceph_arch_intel_sse2) { + // ----------------------------- + // use SSE2 region xor function + // ----------------------------- + unsigned region_size = + (size / EC_ISA_VECTOR_SSE2_WORDSIZE) * EC_ISA_VECTOR_SSE2_WORDSIZE; + + size_left -= region_size; + // 64-byte region xor + region_sse2_xor((char**) src, (char*) parity, src_size, region_size); + } else +#endif + { + // -------------------------------------------- + // use region xor based on vector xor operation + // -------------------------------------------- + unsigned vector_words = size / EC_ISA_VECTOR_OP_WORDSIZE; + unsigned vector_size = vector_words * EC_ISA_VECTOR_OP_WORDSIZE; + memcpy(parity, src[0], vector_size); + + size_left -= vector_size; + vector_op_t* p_vec = (vector_op_t*) parity; + for (int i = 1; i < src_size; i++) { + vector_op_t* s_vec = (vector_op_t*) src[i]; + vector_op_t* e_vec = s_vec + vector_words; + vector_xor(s_vec, p_vec, e_vec); + } + } + } + + if (size_left) { + // -------------------------------------------------- + // xor the not aligned part with byte-wise region xor + // -------------------------------------------------- + memcpy(parity + size - size_left, src[0] + size - size_left, size_left); + for (int i = 1; i < src_size; i++) { + byte_xor(src[i] + size - size_left, parity + size - size_left, src[i] + size); + } + } +} + +// ----------------------------------------------------------------------------- + +void +// ----------------------------------------------------------------------------- +region_sse2_xor(char** src, + char* parity, + int src_size, + unsigned size) +// ----------------------------------------------------------------------------- +{ +#ifdef __x86_64__ + ceph_assert(!(size % EC_ISA_VECTOR_SSE2_WORDSIZE)); + unsigned char* p; + int d, l; + unsigned i; + unsigned char* vbuf[256]; + + for (int v = 0; v < src_size; v++) { + vbuf[v] = (unsigned char*) src[v]; + } + + l = src_size; + p = (unsigned char*) parity; + + for (i = 0; i < size; i += EC_ISA_VECTOR_SSE2_WORDSIZE) { + asm volatile("movdqa %0,%%xmm0" : : "m" (vbuf[0][i])); + asm volatile("movdqa %0,%%xmm1" : : "m" (vbuf[0][i + 16])); + asm volatile("movdqa %0,%%xmm2" : : "m" (vbuf[0][i + 32])); + asm volatile("movdqa %0,%%xmm3" : : "m" (vbuf[0][i + 48])); + + for (d = 1; d < l; d++) { + asm volatile("movdqa %0,%%xmm4" : : "m" (vbuf[d][i])); + asm volatile("movdqa %0,%%xmm5" : : "m" (vbuf[d][i + 16])); + asm volatile("movdqa %0,%%xmm6" : : "m" (vbuf[d][i + 32])); + asm volatile("movdqa %0,%%xmm7" : : "m" (vbuf[d][i + 48])); + asm volatile("pxor %xmm4,%xmm0"); + asm volatile("pxor %xmm5,%xmm1"); + asm volatile("pxor %xmm6,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + } + asm volatile("movntdq %%xmm0,%0" : "=m" (p[i])); + asm volatile("movntdq %%xmm1,%0" : "=m" (p[i + 16])); + asm volatile("movntdq %%xmm2,%0" : "=m" (p[i + 32])); + asm volatile("movntdq %%xmm3,%0" : "=m" (p[i + 48])); + } + + asm volatile("sfence" : : : "memory"); +#endif // __x86_64__ + return; +} diff --git a/src/erasure-code/isa/xor_op.h b/src/erasure-code/isa/xor_op.h new file mode 100644 index 000000000..978b9a953 --- /dev/null +++ b/src/erasure-code/isa/xor_op.h @@ -0,0 +1,87 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 CERN (Switzerland) + * \ + * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch> \ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#ifndef EC_ISA_XOR_OP_H +#define EC_ISA_XOR_OP_H + +// ----------------------------------------------------------------------------- +#include <assert.h> +#include <stdint.h> +// ----------------------------------------------------------------------------- + +// ------------------------------------------------------------------------- +// declaration of 64/128-bit vector operations depending on availability +// ------------------------------------------------------------------------- +// ------------------------------------------------------------------------- + +#define EC_ISA_ADDRESS_ALIGNMENT 32u +#define EC_ISA_VECTOR_SSE2_WORDSIZE 64u + +#if __GNUC__ > 4 || \ + ( (__GNUC__ == 4) && (__GNUC_MINOR__ >= 4) ) ||\ + (__clang__ == 1 ) +#ifdef EC_ISA_VECTOR_OP_DEBUG +#pragma message "* using 128-bit vector operations in " __FILE__ +#endif + +// ------------------------------------------------------------------------- +// use 128-bit pointer +// ------------------------------------------------------------------------- +typedef long vector_op_t __attribute__((vector_size(16))); +#define EC_ISA_VECTOR_OP_WORDSIZE 16 +#else +// ------------------------------------------------------------------------- +// use 64-bit pointer +// ------------------------------------------------------------------------- +typedef unsigned long long vector_op_t; +#define EC_ISA_VECTOR_OP_WORDSIZE 8 +#endif + + +// ------------------------------------------------------------------------- +// check if a pointer is aligend to byte_count +// ------------------------------------------------------------------------- +#define is_aligned(POINTER, BYTE_COUNT) \ + (((uintptr_t)(const void *)(POINTER)) % (BYTE_COUNT) == 0) + +// ------------------------------------------------------------------------- +// compute byte-wise XOR of cw and dw block, ew contains the end address of cw +// ------------------------------------------------------------------------- +void +byte_xor(unsigned char* cw, unsigned char* dw, unsigned char* ew); + +// ------------------------------------------------------------------------- +// compute word-wise XOR of cw and dw block, ew contains the end address of cw +// ------------------------------------------------------------------------- +void +vector_xor(vector_op_t* cw, vector_op_t* dw, vector_op_t* ew); + +// ------------------------------------------------------------------------- +// compute region XOR like parity = src[0] ^ src[1] ... ^ src[src_size-] +// ------------------------------------------------------------------------- +void +region_xor(unsigned char** src, unsigned char* parity, int src_size, unsigned size); + +// ------------------------------------------------------------------------- +// compute region XOR like parity = src[0] ^ src[1] ... ^ src[src_size-] +// using SSE2 64-byte operations +// ------------------------------------------------------------------------- +void +region_sse2_xor(char** src /* array of 64-byte aligned source pointer to xor */, + char* parity /* 64-byte aligned output pointer containing the parity */, + int src_size /* size of the source pointer array */, + unsigned size /* size of the region to xor */); + + +#endif // EC_ISA_XOR_OP_H |