From 19fcec84d8d7d21e796c7624e521b60d28ee21ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:45:59 +0200 Subject: Adding upstream version 16.2.11+ds. Signed-off-by: Daniel Baumann --- src/erasure-code/shec/CMakeLists.txt | 33 + src/erasure-code/shec/ErasureCodePluginShec.cc | 82 +++ src/erasure-code/shec/ErasureCodePluginShec.h | 34 + src/erasure-code/shec/ErasureCodeShec.cc | 815 +++++++++++++++++++++ src/erasure-code/shec/ErasureCodeShec.h | 147 ++++ src/erasure-code/shec/ErasureCodeShecTableCache.cc | 318 ++++++++ src/erasure-code/shec/ErasureCodeShecTableCache.h | 120 +++ src/erasure-code/shec/determinant.c | 94 +++ 8 files changed, 1643 insertions(+) create mode 100644 src/erasure-code/shec/CMakeLists.txt create mode 100644 src/erasure-code/shec/ErasureCodePluginShec.cc create mode 100644 src/erasure-code/shec/ErasureCodePluginShec.h create mode 100644 src/erasure-code/shec/ErasureCodeShec.cc create mode 100644 src/erasure-code/shec/ErasureCodeShec.h create mode 100644 src/erasure-code/shec/ErasureCodeShecTableCache.cc create mode 100644 src/erasure-code/shec/ErasureCodeShecTableCache.h create mode 100644 src/erasure-code/shec/determinant.c (limited to 'src/erasure-code/shec') diff --git a/src/erasure-code/shec/CMakeLists.txt b/src/erasure-code/shec/CMakeLists.txt new file mode 100644 index 000000000..0e699203d --- /dev/null +++ b/src/erasure-code/shec/CMakeLists.txt @@ -0,0 +1,33 @@ +#shec plugin + +include_directories(.) + +set(shec_utils_srcs + ${CMAKE_SOURCE_DIR}/src/erasure-code/ErasureCode.cc + ErasureCodePluginShec.cc + ErasureCodeShec.cc + ErasureCodeShecTableCache.cc + determinant.c) + +add_library(shec_utils OBJECT ${shec_utils_srcs}) + +set(ec_shec_objs + $ + $ + $) + +add_library(ec_shec SHARED ${ec_shec_objs}) +set_target_properties(ec_shec PROPERTIES + INSTALL_RPATH "") +target_link_libraries(ec_shec ${EXTRALIBS}) +install(TARGETS ec_shec DESTINATION ${erasure_plugin_dir}) + +# legacy libraries +foreach(flavor ${jerasure_legacy_flavors}) + set(plugin_name "ec_shec_${flavor}") + add_library(${plugin_name} SHARED ${ec_shec_objs}) + set_target_properties(${plugin_name} PROPERTIES + INSTALL_RPATH "") + install(TARGETS ${plugin_name} DESTINATION ${erasure_plugin_dir}) + add_dependencies(ec_shec ${plugin_name}) +endforeach() diff --git a/src/erasure-code/shec/ErasureCodePluginShec.cc b/src/erasure-code/shec/ErasureCodePluginShec.cc new file mode 100644 index 000000000..3eab48f90 --- /dev/null +++ b/src/erasure-code/shec/ErasureCodePluginShec.cc @@ -0,0 +1,82 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 FUJITSU LIMITED + * Copyright (C) 2013,2014 Cloudwatt + * Copyright (C) 2014 Red Hat + * + * Author: Takanori Nakao + * Author: Takeshi Miyamae + * Author: Loic Dachary + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#include "ceph_ver.h" +#include "common/debug.h" +#include "ErasureCodePluginShec.h" +#include "ErasureCodeShecTableCache.h" +#include "ErasureCodeShec.h" +#include "jerasure_init.h" + +#define dout_context g_ceph_context + +#define dout_subsys ceph_subsys_osd +#undef dout_prefix +#define dout_prefix _prefix(_dout) + +static std::ostream& _prefix(std::ostream* _dout) +{ + return *_dout << "ErasureCodePluginShec: "; +} + +int ErasureCodePluginShec::factory(const std::string &directory, + ceph::ErasureCodeProfile &profile, + ceph::ErasureCodeInterfaceRef *erasure_code, + std::ostream *ss) { + ErasureCodeShec *interface; + + if (profile.find("technique") == profile.end()) + profile["technique"] = "multiple"; + std::string t = profile.find("technique")->second; + + if (t == "single"){ + interface = new ErasureCodeShecReedSolomonVandermonde(tcache, ErasureCodeShec::SINGLE); + } else if (t == "multiple"){ + interface = new ErasureCodeShecReedSolomonVandermonde(tcache, ErasureCodeShec::MULTIPLE); + } else { + *ss << "technique=" << t << " is not a valid coding technique. " + << "Choose one of the following: " + << "single, multiple "; + return -ENOENT; + } + int r = interface->init(profile, ss); + if (r) { + delete interface; + return r; + } + *erasure_code = ceph::ErasureCodeInterfaceRef(interface); + + dout(10) << "ErasureCodePluginShec: factory() completed" << dendl; + + return 0; +} + +const char *__erasure_code_version() { return CEPH_GIT_NICE_VER; } + +int __erasure_code_init(char *plugin_name, char *directory = (char *)"") +{ + auto& instance = ceph::ErasureCodePluginRegistry::instance(); + int w[] = { 8, 16, 32 }; + int r = jerasure_init(3, w); + if (r) { + return -r; + } + return instance.add(plugin_name, new ErasureCodePluginShec()); +} diff --git a/src/erasure-code/shec/ErasureCodePluginShec.h b/src/erasure-code/shec/ErasureCodePluginShec.h new file mode 100644 index 000000000..b73e49b46 --- /dev/null +++ b/src/erasure-code/shec/ErasureCodePluginShec.h @@ -0,0 +1,34 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph distributed storage system + * + * Copyright (C) 2014 Cloudwatt + * Copyright (C) 2014 Red Hat + * + * Author: Loic Dachary + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#ifndef CEPH_ERASURE_CODE_PLUGIN_SHEC_H +#define CEPH_ERASURE_CODE_PLUGIN_SHEC_H + +#include "ErasureCodeShecTableCache.h" +#include "erasure-code/ErasureCodePlugin.h" + +class ErasureCodePluginShec : public ceph::ErasureCodePlugin { +public: + ErasureCodeShecTableCache tcache; + + int factory(const std::string &directory, + ceph::ErasureCodeProfile &profile, + ceph::ErasureCodeInterfaceRef *erasure_code, + std::ostream *ss) override; +}; + +#endif diff --git a/src/erasure-code/shec/ErasureCodeShec.cc b/src/erasure-code/shec/ErasureCodeShec.cc new file mode 100644 index 000000000..3634be2fb --- /dev/null +++ b/src/erasure-code/shec/ErasureCodeShec.cc @@ -0,0 +1,815 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 FUJITSU LIMITED + * Copyright (C) 2013,2014 Cloudwatt + * Copyright (C) 2014 Red Hat + * + * Author: Takanori Nakao + * Author: Takeshi Miyamae + * Author: Loic Dachary + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include "common/debug.h" +#include "ErasureCodeShec.h" +extern "C" { +#include "jerasure/include/jerasure.h" +#include "jerasure/include/galois.h" + +extern int calc_determinant(int *matrix, int dim); +extern int* reed_sol_vandermonde_coding_matrix(int k, int m, int w); +} + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_osd +#undef dout_prefix +#define dout_prefix _prefix(_dout) + +using namespace std; +using namespace ceph; + + +static ostream& _prefix(std::ostream* _dout) +{ + return *_dout << "ErasureCodeShec: "; +} + +int ErasureCodeShec::init(ErasureCodeProfile &profile, + ostream *ss) +{ + int err = 0; + err |= parse(profile); + if (err) + return err; + prepare(); + return ErasureCode::init(profile, ss); +} + +unsigned int ErasureCodeShec::get_chunk_size(unsigned int object_size) const +{ + unsigned alignment = get_alignment(); + unsigned tail = object_size % alignment; + unsigned padded_length = object_size + ( tail ? ( alignment - tail ) : 0 ); + + ceph_assert(padded_length % k == 0); + return padded_length / k; +} + +int ErasureCodeShec::_minimum_to_decode(const set &want_to_read, + const set &available_chunks, + set *minimum_chunks) +{ + if (!minimum_chunks) return -EINVAL; + + for (set::iterator it = available_chunks.begin(); it != available_chunks.end(); ++it){ + if (*it < 0 || k+m <= *it) return -EINVAL; + } + + for (set::iterator it = want_to_read.begin(); it != want_to_read.end(); ++it){ + if (*it < 0 || k+m <= *it) return -EINVAL; + } + + int want[k + m]; + int avails[k + m]; + int minimum[k + m]; + + memset(want, 0, sizeof(want)); + memset(avails, 0, sizeof(avails)); + memset(minimum, 0, sizeof(minimum)); + (*minimum_chunks).clear(); + + for (set::const_iterator i = want_to_read.begin(); + i != want_to_read.end(); + ++i) { + want[*i] = 1; + } + + for (set::const_iterator i = available_chunks.begin(); + i != available_chunks.end(); + ++i) { + avails[*i] = 1; + } + + { + int decoding_matrix[k*k]; + int dm_row[k]; + int dm_column[k]; + memset(decoding_matrix, 0, sizeof(decoding_matrix)); + memset(dm_row, 0, sizeof(dm_row)); + memset(dm_column, 0, sizeof(dm_column)); + if (shec_make_decoding_matrix(true, want, avails, decoding_matrix, dm_row, dm_column, minimum) < 0) { + return -EIO; + } + } + + for (int i = 0; i < k + m; i++) { + if (minimum[i] == 1) minimum_chunks->insert(i); + } + + return 0; +} + +int ErasureCodeShec::minimum_to_decode_with_cost(const set &want_to_read, + const map &available, + set *minimum_chunks) +{ + set available_chunks; + + for (map::const_iterator i = available.begin(); + i != available.end(); + ++i) + available_chunks.insert(i->first); + + return _minimum_to_decode(want_to_read, available_chunks, minimum_chunks); +} + +int ErasureCodeShec::encode(const set &want_to_encode, + const bufferlist &in, + map *encoded) +{ + unsigned int k = get_data_chunk_count(); + unsigned int m = get_chunk_count() - k; + bufferlist out; + + if (!encoded || !encoded->empty()){ + return -EINVAL; + } + + int err = encode_prepare(in, *encoded); + if (err) + return err; + encode_chunks(want_to_encode, encoded); + for (unsigned int i = 0; i < k + m; i++) { + if (want_to_encode.count(i) == 0) + encoded->erase(i); + } + return 0; +} + +int ErasureCodeShec::encode_chunks(const set &want_to_encode, + map *encoded) +{ + char *chunks[k + m]; + for (int i = 0; i < k + m; i++){ + chunks[i] = (*encoded)[i].c_str(); + } + shec_encode(&chunks[0], &chunks[k], (*encoded)[0].length()); + return 0; +} + +int ErasureCodeShec::_decode(const set &want_to_read, + const map &chunks, + map *decoded) +{ + vector have; + + if (!decoded || !decoded->empty()){ + return -EINVAL; + } + if (!want_to_read.empty() && chunks.empty()) { + // i need to get the blocksize from the first element of chunks + return -1; + } + + have.reserve(chunks.size()); + for (map::const_iterator i = chunks.begin(); + i != chunks.end(); + ++i) { + have.push_back(i->first); + } + if (includes( + have.begin(), have.end(), want_to_read.begin(), want_to_read.end())) { + for (set::iterator i = want_to_read.begin(); + i != want_to_read.end(); + ++i) { + (*decoded)[*i] = chunks.find(*i)->second; + } + return 0; + } + unsigned int k = get_data_chunk_count(); + unsigned int m = get_chunk_count() - k; + unsigned blocksize = (*chunks.begin()).second.length(); + for (unsigned int i = 0; i < k + m; i++) { + if (chunks.find(i) == chunks.end()) { + bufferlist tmp; + bufferptr ptr(buffer::create_aligned(blocksize, SIMD_ALIGN)); + tmp.push_back(ptr); + tmp.claim_append((*decoded)[i]); + (*decoded)[i].swap(tmp); + } else { + (*decoded)[i] = chunks.find(i)->second; + (*decoded)[i].rebuild_aligned(SIMD_ALIGN); + } + } + return decode_chunks(want_to_read, chunks, decoded); +} + +int ErasureCodeShec::decode_chunks(const set &want_to_read, + const map &chunks, + map *decoded) +{ + unsigned blocksize = (*chunks.begin()).second.length(); + int erased[k + m]; + int erased_count = 0; + int avails[k + m]; + char *data[k]; + char *coding[m]; + + for (int i = 0; i < k + m; i++) { + erased[i] = 0; + if (chunks.find(i) == chunks.end()) { + if (want_to_read.count(i) > 0) { + erased[i] = 1; + erased_count++; + } + avails[i] = 0; + } else { + avails[i] = 1; + } + if (i < k) + data[i] = (*decoded)[i].c_str(); + else + coding[i - k] = (*decoded)[i].c_str(); + } + + if (erased_count > 0) { + return shec_decode(erased, avails, data, coding, blocksize); + } else { + return 0; + } +} + +// +// ErasureCodeShecReedSolomonVandermonde +// + +void ErasureCodeShecReedSolomonVandermonde::shec_encode(char **data, + char **coding, + int blocksize) +{ + jerasure_matrix_encode(k, m, w, matrix, data, coding, blocksize); +} + +int ErasureCodeShecReedSolomonVandermonde::shec_decode(int *erased, + int *avails, + char **data, + char **coding, + int blocksize) +{ + return shec_matrix_decode(erased, avails, data, coding, blocksize); +} + +unsigned ErasureCodeShecReedSolomonVandermonde::get_alignment() const +{ + return k*w*sizeof(int); +} + +int ErasureCodeShecReedSolomonVandermonde::parse(const ErasureCodeProfile &profile) +{ + int err = 0; + // k, m, c + if (profile.find("k") == profile.end() && + profile.find("m") == profile.end() && + profile.find("c") == profile.end()){ + dout(10) << "(k, m, c) default to " << "(" << DEFAULT_K + << ", " << DEFAULT_M << ", " << DEFAULT_C << ")" << dendl; + k = DEFAULT_K; m = DEFAULT_M; c = DEFAULT_C; + } else if (profile.find("k") == profile.end() || + profile.find("m") == profile.end() || + profile.find("c") == profile.end()){ + dout(10) << "(k, m, c) must be chosen" << dendl; + err = -EINVAL; + } else { + std::string err_k, err_m, err_c, value_k, value_m, value_c; + value_k = profile.find("k")->second; + value_m = profile.find("m")->second; + value_c = profile.find("c")->second; + k = strict_strtol(value_k.c_str(), 10, &err_k); + m = strict_strtol(value_m.c_str(), 10, &err_m); + c = strict_strtol(value_c.c_str(), 10, &err_c); + + if (!err_k.empty() || !err_m.empty() || !err_c.empty()){ + if (!err_k.empty()){ + derr << "could not convert k=" << value_k << "to int" << dendl; + } else if (!err_m.empty()){ + derr << "could not convert m=" << value_m << "to int" << dendl; + } else if (!err_c.empty()){ + derr << "could not convert c=" << value_c << "to int" << dendl; + } + err = -EINVAL; + } else if (k <= 0){ + derr << "k=" << k + << " must be a positive number" << dendl; + err = -EINVAL; + } else if (m <= 0){ + derr << "m=" << m + << " must be a positive number" << dendl; + err = -EINVAL; + } else if (c <= 0){ + derr << "c=" << c + << " must be a positive number" << dendl; + err = -EINVAL; + } else if (m < c){ + derr << "c=" << c + << " must be less than or equal to m=" << m << dendl; + err = -EINVAL; + } else if (k > 12){ + derr << "k=" << k + << " must be less than or equal to 12" << dendl; + err = -EINVAL; + } else if (k+m > 20){ + derr << "k+m=" << k+m + << " must be less than or equal to 20" << dendl; + err = -EINVAL; + } else if (ksecond; + w = strict_strtol(value_w.c_str(), 10, &err_w); + + if (!err_w.empty()){ + derr << "could not convert w=" << value_w << "to int" << dendl; + dout(10) << "w default to " << DEFAULT_W << dendl; + w = DEFAULT_W; + + } else if (w != 8 && w != 16 && w != 32) { + derr << "w=" << w + << " must be one of {8, 16, 32}" << dendl; + dout(10) << "w default to " << DEFAULT_W << dendl; + w = DEFAULT_W; + + } else { + dout(10) << "w set to " << w << dendl; + } + } + return 0; +} + +void ErasureCodeShecReedSolomonVandermonde::prepare() +{ + // setup shared encoding table + int** p_enc_table = + tcache.getEncodingTable(technique, k, m, c, w); + + if (!*p_enc_table) { + dout(10) << "[ cache tables ] creating coeff for k=" << + k << " m=" << m << " c=" << c << " w=" << w << dendl; + + matrix = shec_reedsolomon_coding_matrix(technique); + + // either our new created table is stored or if it has been + // created in the meanwhile the locally allocated table will be + // freed by setEncodingTable + matrix = tcache.setEncodingTable(technique, k, m, c, w, matrix); + + dout(10) << "matrix = " << dendl; + for (int i=0; i 0) { + mat[j] = '1'; + } else { + mat[j] = '0'; + } + } + mat[k] = '\0'; + dout(10) << mat << dendl; + } + } else { + matrix = *p_enc_table; + } + + dout(10) << " [ technique ] = " << + ((technique == MULTIPLE) ? "multiple" : "single") << dendl; + + ceph_assert((technique == SINGLE) || (technique == MULTIPLE)); + +} + +// ErasureCodeShec:: +// Mearged from shec.cc. + +double ErasureCodeShec::shec_calc_recovery_efficiency1(int k, int m1, int m2, int c1, int c2){ + int r_eff_k[k]; + double r_e1; + int i, rr, cc, start, end; + int first_flag; + + if (m1 < c1 || m2 < c2) return -1; + if ((m1 == 0 && c1 != 0) || (m2 == 0 && c2 != 0)) return -1; + + for (i=0; i std::numeric_limits::epsilon() && + r_e1 < min_r_e1) { + min_r_e1 = r_e1; + c1_best = c1; + m1_best = m1; + } + } + } + } + m1 = m1_best; + c1 = c1_best; + m2 = m - m1_best; + c2 = c - c1_best; + } else { + m1 = 0; + c1 = 0; + m2 = m; + c2 = c; + } + + // create matrix + matrix = reed_sol_vandermonde_coding_matrix(k, m, w); + + for (rr=0; rr 0) { + want[j] = 1; + } + } + } + } + + if (tcache.getDecodingTableFromCache(decoding_matrix, + dm_row, dm_column, minimum, + technique, + k, m, c, w, + want, avails)) { + return 0; + } + + for (unsigned long long pp = 0; pp < (1ull << m); ++pp) { + + // select parity chunks + int ek = 0; + int p[m]; + for (int i=0; i < m; ++i) { + if (pp & (1ull << i)) { + p[ek++] = i; + } + } + if (ek > minp) { + continue; + } + + // Are selected parity chunks avail? + bool ok = true; + for (int i = 0; i < ek && ok; i++) { + if (!avails[k+p[i]]) { + ok = false; + break; + } + } + + if (!ok) { + continue; + } + + int tmprow[k + m]; + int tmpcolumn[k]; + for (int i = 0; i < k + m; i++) { + tmprow[i] = 0; + } + for (int i = 0; i < k; i++) { + tmpcolumn[i] = 0; + } + + for (int i=0; i < k; i++) { + if (want[i] && !avails[i]) { + tmpcolumn[i] = 1; + } + } + + // Parity chunks which are used to recovery erased data chunks, are added to tmprow. + for (int i = 0; i < ek; i++) { + tmprow[k + p[i]] = 1; + for (int j = 0; j < k; j++) { + int element = matrix[(p[i]) * k + j]; + if (element != 0) { + tmpcolumn[j] = 1; + } + if (element != 0 && avails[j] == 1) { + tmprow[j] = 1; + } + } + } + + int dup_row = 0, dup_column = 0, dup = 0; + for (int i = 0; i < k + m; i++) { + if (tmprow[i]) { + dup_row++; + } + } + + for (int i = 0; i < k; i++) { + if (tmpcolumn[i]) { + dup_column++; + } + } + + if (dup_row != dup_column) { + continue; + } + dup = dup_row; + if (dup == 0) { + mindup = dup; + for (int i = 0; i < k; i++) { + dm_row[i] = -1; + } + for (int i = 0; i < k; i++) { + dm_column[i] = -1; + } + break; + } + + // minimum is updated. + if (dup < mindup) { + int tmpmat[dup * dup]; + { + for (int i = 0, row = 0; i < k + m; i++) { + if (tmprow[i]) { + for (int j = 0, column = 0; j < k; j++) { + if (tmpcolumn[j]) { + if (i < k) { + tmpmat[row * dup + column] = (i == j ? 1 : 0); + } else { + tmpmat[row * dup + column] = matrix[(i - k) * k + j]; + } + column++; + } + } + row++; + } + } + } + int det = calc_determinant(tmpmat, dup); + + if (det != 0) { + int row_id = 0; + int column_id = 0; + for (int i = 0; i < k; i++) { + dm_row[i] = -1; + } + for (int i = 0; i < k; i++) { + dm_column[i] = -1; + } + + mindup = dup; + for (int i=0; i < k + m; i++) { + if (tmprow[i]) { + dm_row[row_id++] = i; + } + } + for (int i=0; i < k; i++) { + if (tmpcolumn[i]) { + dm_column[column_id++] = i; + } + } + minp = ek; + } + } + } + + + if (mindup == k+1) { + dout(10) << __func__ << ": can't find recover matrix." << dendl; + return -1; + } + + for (int i = 0; i < k + m; i++) { + minimum[i] = 0; + } + + for (int i=0; i < k && dm_row[i] != -1; i++) { + minimum[dm_row[i]] = 1; + } + + for (int i = 0; i < k; ++i) { + if (want[i] && avails[i]) { + minimum[i] = 1; + } + } + + for (int i = 0; i < m; ++i) { + if (want[k + i] && avails[k + i] && !minimum[k + i]) { + for (int j = 0; j < k; ++j) { + if (matrix[i * k + j] > 0 && !want[j]) { + minimum[k + i] = 1; + break; + } + } + } + } + + if (mindup == 0) { + return 0; + } + + int tmpmat[mindup * mindup]; + for (int i=0; i < mindup; i++) { + for (int j=0; j < mindup; j++) { + if (dm_row[i] < k) { + tmpmat[i * mindup + j] = (dm_row[i] == dm_column[j] ? 1 : 0); + } else { + tmpmat[i * mindup + j] = matrix[(dm_row[i] - k) * k + dm_column[j]]; + } + } + if (dm_row[i] < k) { + for (int j = 0; j < mindup; j++) { + if (dm_row[i] == dm_column[j]) { + dm_row[i] = j; + } + } + } else { + dm_row[i] -= (k - mindup); + } + } + + if (prepare) { + return 0; + } + + int ret = jerasure_invert_matrix(tmpmat, decoding_matrix, mindup, w); + + tcache.putDecodingTableToCache(decoding_matrix, dm_row, dm_column, minimum, technique, + k, m, c, w, want, avails); + + return ret; +} + +int ErasureCodeShec::shec_matrix_decode(int *want, int *avails, char **data_ptrs, + char **coding_ptrs, int size) +{ + int decoding_matrix[k*k]; + int dm_row[k], dm_column[k]; + int minimum[k + m]; + + memset(decoding_matrix, 0, sizeof(decoding_matrix)); + memset(dm_row, -1, sizeof(dm_row)); + memset(dm_column, -1, sizeof(dm_column)); + memset(minimum, -1, sizeof(minimum)); + + if (w != 8 && w != 16 && w != 32) return -1; + + if (shec_make_decoding_matrix(false, want, avails, decoding_matrix, + dm_row, dm_column, minimum) < 0) { + return -1; + } + + // Get decoding matrix size + int dm_size = 0; + for (int i = 0; i < k; i++) { + if (dm_row[i] == -1) { + break; + } + dm_size++; + } + + char *dm_data_ptrs[dm_size]; + for (int i = 0; i < dm_size; i++) { + dm_data_ptrs[i] = data_ptrs[dm_column[i]]; + } + + // Decode the data drives + for (int i = 0; i < dm_size; i++) { + if (!avails[dm_column[i]]) { + jerasure_matrix_dotprod(dm_size, w, decoding_matrix + (i * dm_size), + dm_row, i, dm_data_ptrs, coding_ptrs, size); + } + } + + // Re-encode any erased coding devices + for (int i = 0; i < m; i++) { + if (want[k+i] && !avails[k+i]) { + jerasure_matrix_dotprod(k, w, matrix + (i * k), NULL, i+k, + data_ptrs, coding_ptrs, size); + } + } + + return 0; +} diff --git a/src/erasure-code/shec/ErasureCodeShec.h b/src/erasure-code/shec/ErasureCodeShec.h new file mode 100644 index 000000000..44a077885 --- /dev/null +++ b/src/erasure-code/shec/ErasureCodeShec.h @@ -0,0 +1,147 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 FUJITSU LIMITED + * Copyright (C) 2013, 2014 Cloudwatt + * Copyright (C) 2014 Red Hat + * + * Author: Takanori Nakao + * Author: Takeshi Miyamae + * Author: Loic Dachary + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#ifndef CEPH_ERASURE_CODE_SHEC_H +#define CEPH_ERASURE_CODE_SHEC_H + +#include "erasure-code/ErasureCode.h" +#include "ErasureCodeShecTableCache.h" + +class ErasureCodeShec : public ceph::ErasureCode { + +public: + enum { + MULTIPLE = 0, + SINGLE = 1 + }; + + ErasureCodeShecTableCache &tcache; + int k; + int DEFAULT_K; + int m; + int DEFAULT_M; + int c; + int DEFAULT_C; + int w; + int DEFAULT_W; + int technique; + int *matrix; + + ErasureCodeShec(const int _technique, + ErasureCodeShecTableCache &_tcache) : + tcache(_tcache), + k(0), + DEFAULT_K(4), + m(0), + DEFAULT_M(3), + c(0), + DEFAULT_C(2), + w(0), + DEFAULT_W(8), + technique(_technique), + matrix(0) + {} + + ~ErasureCodeShec() override {} + + unsigned int get_chunk_count() const override { + return k + m; + } + + unsigned int get_data_chunk_count() const override { + return k; + } + + unsigned int get_chunk_size(unsigned int object_size) const override; + + int _minimum_to_decode(const std::set &want_to_read, + const std::set &available_chunks, + std::set *minimum); + + int minimum_to_decode_with_cost(const std::set &want_to_read, + const std::map &available, + std::set *minimum) override; + + int encode(const std::set &want_to_encode, + const ceph::buffer::list &in, + std::map *encoded) override; + int encode_chunks(const std::set &want_to_encode, + std::map *encoded) override; + + int _decode(const std::set &want_to_read, + const std::map &chunks, + std::map *decoded) override; + int decode_chunks(const std::set &want_to_read, + const std::map &chunks, + std::map *decoded) override; + + int init(ceph::ErasureCodeProfile &profile, std::ostream *ss) override; + virtual void shec_encode(char **data, + char **coding, + int blocksize) = 0; + virtual int shec_decode(int *erasures, + int *avails, + char **data, + char **coding, + int blocksize) = 0; + virtual unsigned get_alignment() const = 0; + virtual void prepare() = 0; + + virtual int shec_matrix_decode(int *erased, int *avails, + char **data_ptrs, char **coding_ptrs, int size); + virtual int* shec_reedsolomon_coding_matrix(int is_single); + +private: + virtual int parse(const ceph::ErasureCodeProfile &profile) = 0; + + virtual double shec_calc_recovery_efficiency1(int k, int m1, int m2, int c1, int c2); + virtual int shec_make_decoding_matrix(bool prepare, + int *want, int *avails, + int *decoding_matrix, + int *dm_row, int *dm_column, + int *minimum); +}; + +class ErasureCodeShecReedSolomonVandermonde final : public ErasureCodeShec { +public: + + ErasureCodeShecReedSolomonVandermonde(ErasureCodeShecTableCache &_tcache, + int technique = MULTIPLE) : + ErasureCodeShec(technique, _tcache) + {} + + ~ErasureCodeShecReedSolomonVandermonde() override { + } + + void shec_encode(char **data, + char **coding, + int blocksize) override; + int shec_decode(int *erasures, + int *avails, + char **data, + char **coding, + int blocksize) override; + unsigned get_alignment() const override; + void prepare() override; +private: + int parse(const ceph::ErasureCodeProfile &profile) override; +}; + +#endif diff --git a/src/erasure-code/shec/ErasureCodeShecTableCache.cc b/src/erasure-code/shec/ErasureCodeShecTableCache.cc new file mode 100644 index 000000000..2f9a60b62 --- /dev/null +++ b/src/erasure-code/shec/ErasureCodeShecTableCache.cc @@ -0,0 +1,318 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 FUJITSU LIMITED + * Copyright (C) 2014 CERN (Switzerland) + * + * Author: Takanori Nakao + * Author: Takeshi Miyamae + * Author: Andreas-Joachim Peters + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +// ----------------------------------------------------------------------------- +#include "ErasureCodeShecTableCache.h" +#include "common/debug.h" +// ----------------------------------------------------------------------------- +using namespace std; + +// ----------------------------------------------------------------------------- +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_osd +#undef dout_prefix +#define dout_prefix _tc_prefix(_dout) +// ----------------------------------------------------------------------------- + +// ----------------------------------------------------------------------------- + +static ostream& +_tc_prefix(std::ostream* _dout) { + return *_dout << "ErasureCodeShecTableCache: "; +} + +// ----------------------------------------------------------------------------- + +ErasureCodeShecTableCache::~ErasureCodeShecTableCache() +{ + std::lock_guard lock{codec_tables_guard}; + + // clean-up all allocated tables + { + codec_technique_tables_t::const_iterator ttables_it; + codec_tables_t::const_iterator tables_it; + codec_tables_t_::const_iterator tables_it_; + codec_tables_t__::const_iterator tables_it__; + codec_table_t::const_iterator table_it; + + for (ttables_it = encoding_table.begin(); ttables_it != encoding_table.end(); ++ttables_it) { + for (tables_it = ttables_it->second.begin(); tables_it != ttables_it->second.end(); ++tables_it) { + for (tables_it_ = tables_it->second.begin(); tables_it_ != tables_it->second.end(); ++tables_it_) { + for (tables_it__ = tables_it_->second.begin(); tables_it__ != tables_it_->second.end(); ++tables_it__) { + for (table_it = tables_it__->second.begin(); table_it != tables_it__->second.end(); ++table_it) { + if (table_it->second) { + if (*(table_it->second)) { + delete *(table_it->second); + } + delete table_it->second; + } + } + } + } + } + } + } + + { + std::map::const_iterator lru_map_it; + std::map::const_iterator lru_list_it; + + for (lru_map_it = decoding_tables.begin(); + lru_map_it != decoding_tables.end(); + ++lru_map_it) { + if (lru_map_it->second) { + delete lru_map_it->second; + } + } + + for (lru_list_it = decoding_tables_lru.begin(); + lru_list_it != decoding_tables_lru.end(); + ++lru_list_it) { + if (lru_list_it->second) { + delete lru_list_it->second; + } + } + } +} + +ErasureCodeShecTableCache::lru_map_t* +ErasureCodeShecTableCache::getDecodingTables(int technique) { + // the caller must hold the guard mutex: + // => std::lock_guard lock{codec_tables_guard}; + + // create an lru_map if not yet allocated + if (!decoding_tables[technique]) { + decoding_tables[technique] = new lru_map_t; + } + return decoding_tables[technique]; +} + +ErasureCodeShecTableCache::lru_list_t* +ErasureCodeShecTableCache::getDecodingTablesLru(int technique) { + // the caller must hold the guard mutex: + // => std::lock_guard lock{codec_tables_guard}; + + // create an lru_list if not yet allocated + if (!decoding_tables_lru[technique]) { + decoding_tables_lru[technique] = new lru_list_t; + } + return decoding_tables_lru[technique]; +} + +int** +ErasureCodeShecTableCache::getEncodingTable(int technique, int k, int m, int c, int w) +{ + std::lock_guard lock{codec_tables_guard}; + return getEncodingTableNoLock(technique,k,m,c,w); +} + +// ----------------------------------------------------------------------------- + +int** +ErasureCodeShecTableCache::getEncodingTableNoLock(int technique, int k, int m, int c, int w) +{ + // create a pointer to store an encoding table address + if (!encoding_table[technique][k][m][c][w]) { + encoding_table[technique][k][m][c][w] = new (int*); + *encoding_table[technique][k][m][c][w] = 0; + } + return encoding_table[technique][k][m][c][w]; +} + +int* +ErasureCodeShecTableCache::setEncodingTable(int technique, int k, int m, int c, int w, int* ec_in_table) +{ + std::lock_guard lock{codec_tables_guard}; + int** ec_out_table = getEncodingTableNoLock(technique, k, m, c, w); + if (*ec_out_table) { + // somebody might have deposited this table in the meanwhile, so clean + // the input table and return the stored one + free (ec_in_table); + return *ec_out_table; + } else { + // we store the provided input table and return this one + *encoding_table[technique][k][m][c][w] = ec_in_table; + return ec_in_table; + } +} + +ceph::mutex* +ErasureCodeShecTableCache::getLock() +{ + return &codec_tables_guard; +} + +uint64_t +ErasureCodeShecTableCache::getDecodingCacheSignature(int k, int m, int c, int w, + int *erased, int *avails) { + uint64_t signature = 0; + signature = (uint64_t)k; + signature |= ((uint64_t)m << 6); + signature |= ((uint64_t)c << 12); + signature |= ((uint64_t)w << 18); + + for (int i=0; i < k+m; i++) { + signature |= ((uint64_t)(avails[i] ? 1 : 0) << (24+i)); + } + for (int i=0; i < k+m; i++) { + signature |= ((uint64_t)(erased[i] ? 1 : 0) << (44+i)); + } + return signature; +} + +bool +ErasureCodeShecTableCache::getDecodingTableFromCache(int* decoding_matrix, + int* dm_row, + int* dm_column, + int* minimum, + int technique, + int k, + int m, + int c, + int w, + int* erased, + int* avails) { + // -------------------------------------------------------------------------- + // LRU decoding matrix cache + // -------------------------------------------------------------------------- + + uint64_t signature = getDecodingCacheSignature(k, m, c, w, erased, avails); + std::lock_guard lock{codec_tables_guard}; + + dout(20) << "[ get table ] = " << signature << dendl; + + // we try to fetch a decoding table from an LRU cache + lru_map_t* decode_tbls_map = + getDecodingTables(technique); + + lru_list_t* decode_tbls_lru = + getDecodingTablesLru(technique); + + lru_map_t::iterator decode_tbls_map_it = decode_tbls_map->find(signature); + if (decode_tbls_map_it == decode_tbls_map->end()) { + return false; + } + + dout(20) << "[ cached table ] = " << signature << dendl; + // copy parameters out of the cache + + memcpy(decoding_matrix, + decode_tbls_map_it->second.second.decoding_matrix, + k * k * sizeof(int)); + memcpy(dm_row, + decode_tbls_map_it->second.second.dm_row, + k * sizeof(int)); + memcpy(dm_column, + decode_tbls_map_it->second.second.dm_column, + k * sizeof(int)); + memcpy(minimum, + decode_tbls_map_it->second.second.minimum, + (k+m) * sizeof(int)); + + // find item in LRU queue and push back + decode_tbls_lru->splice(decode_tbls_lru->end(), + *decode_tbls_lru, + decode_tbls_map_it->second.first); + return true; +} + +void +ErasureCodeShecTableCache::putDecodingTableToCache(int* decoding_matrix, + int* dm_row, + int* dm_column, + int* minimum, + int technique, + int k, + int m, + int c, + int w, + int* erased, + int* avails) { + // -------------------------------------------------------------------------- + // LRU decoding matrix cache + // -------------------------------------------------------------------------- + + std::lock_guard lock{codec_tables_guard}; + + uint64_t signature = getDecodingCacheSignature(k, m, c, w, erased, avails); + dout(20) << "[ put table ] = " << signature << dendl; + + // we store a new table to the cache + + // bufferptr cachetable; + + lru_map_t* decode_tbls_map = + getDecodingTables(technique); + + lru_list_t* decode_tbls_lru = + getDecodingTablesLru(technique); + + if (decode_tbls_map->count(signature)) { + dout(20) << "[ already on table ] = " << signature << dendl; + + // find item in LRU queue and push back + decode_tbls_lru->splice(decode_tbls_lru->end(), + *decode_tbls_lru, + (*decode_tbls_map)[signature].first); + return; + } + + // evt. shrink the LRU queue/map + if ((int)decode_tbls_lru->size() >= + ErasureCodeShecTableCache::decoding_tables_lru_length) { + dout(20) << "[ shrink lru ] = " << signature << dendl; + // remove from map + decode_tbls_map->erase(decode_tbls_lru->front()); + // remove from lru + decode_tbls_lru->pop_front(); + } + + { + dout(20) << "[ store table ] = " << signature << dendl; + + decode_tbls_lru->push_back(signature); + + // allocate a new buffer + lru_list_t::iterator it_end = decode_tbls_lru->end(); + --it_end; + + lru_entry_t &map_value = + (*decode_tbls_map)[signature] = + std::make_pair(it_end, DecodingCacheParameter()); + map_value.second.decoding_matrix = new int[k*k]; + map_value.second.dm_row = new int[k]; + map_value.second.dm_column = new int[k]; + map_value.second.minimum = new int[k+m]; + + memcpy(map_value.second.decoding_matrix, + decoding_matrix, + k * k * sizeof(int)); + memcpy(map_value.second.dm_row, + dm_row, + k * sizeof(int)); + memcpy(map_value.second.dm_column, + dm_column, + k * sizeof(int)); + memcpy(map_value.second.minimum, + minimum, + (k+m) * sizeof(int)); + + dout(20) << "[ cache size ] = " << decode_tbls_lru->size() << dendl; + } +} diff --git a/src/erasure-code/shec/ErasureCodeShecTableCache.h b/src/erasure-code/shec/ErasureCodeShecTableCache.h new file mode 100644 index 000000000..bed7fa018 --- /dev/null +++ b/src/erasure-code/shec/ErasureCodeShecTableCache.h @@ -0,0 +1,120 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 FUJITSU LIMITED + * Copyright (C) 2014 CERN (Switzerland) + * + * Author: Takanori Nakao + * Author: Takeshi Miyamae + * Author: Andreas-Joachim Peters + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#ifndef CEPH_ERASURE_CODE_SHEC_TABLE_CACHE_H +#define CEPH_ERASURE_CODE_SHEC_TABLE_CACHE_H + +// ----------------------------------------------------------------------------- +#include "common/ceph_mutex.h" +#include "erasure-code/ErasureCodeInterface.h" +// ----------------------------------------------------------------------------- +#include +// ----------------------------------------------------------------------------- + +class ErasureCodeShecTableCache { + // --------------------------------------------------------------------------- + // This class implements a table cache for encoding and decoding matrices. + // Encoding matrices are shared for the same (k,m,c,w) combination. + // It supplies a decoding matrix lru cache which is shared for identical + // matrix types e.g. there is one cache (lru-list + lru-map) + // --------------------------------------------------------------------------- + + class DecodingCacheParameter { + public: + int* decoding_matrix; // size: k*k + int* dm_row; // size: k + int* dm_column; // size: k + int* minimum; // size: k+m + DecodingCacheParameter() { + decoding_matrix = 0; + dm_row = 0; + dm_column = 0; + minimum = 0; + } + ~DecodingCacheParameter() { + if (decoding_matrix) { + delete[] decoding_matrix; + } + if (dm_row) { + delete[] dm_row; + } + if (dm_column) { + delete[] dm_column; + } + if (minimum) { + delete[] minimum; + } + } + }; + + public: + + static const int decoding_tables_lru_length = 10000; + typedef std::pair::iterator, + DecodingCacheParameter> lru_entry_t; + typedef std::map< int, int** > codec_table_t; + typedef std::map< int, codec_table_t > codec_tables_t__; + typedef std::map< int, codec_tables_t__ > codec_tables_t_; + typedef std::map< int, codec_tables_t_ > codec_tables_t; + typedef std::map< int, codec_tables_t > codec_technique_tables_t; + // int** matrix = codec_technique_tables_t[technique][k][m][c][w] + + typedef std::map< uint64_t, lru_entry_t > lru_map_t; + typedef std::list< uint64_t > lru_list_t; + + ErasureCodeShecTableCache() = default; + virtual ~ErasureCodeShecTableCache(); + // mutex used to protect modifications in encoding/decoding table maps + ceph::mutex codec_tables_guard = ceph::make_mutex("shec-lru-cache"); + + bool getDecodingTableFromCache(int* matrix, + int* dm_row, int* dm_column, + int* minimum, + int technique, + int k, int m, int c, int w, + int* want, int* avails); + + void putDecodingTableToCache(int* matrix, + int* dm_row, int* dm_column, + int* minimum, + int technique, + int k, int m, int c, int w, + int* want, int* avails); + + int** getEncodingTable(int technique, int k, int m, int c, int w); + int** getEncodingTableNoLock(int technique, int k, int m, int c, int w); + int* setEncodingTable(int technique, int k, int m, int c, int w, int*); + + private: + // encoding table accessed via table[matrix][k][m][c][w] + // decoding table cache accessed via map[matrixtype] + // decoding table lru list accessed via list[matrixtype] + codec_technique_tables_t encoding_table; + std::map decoding_tables; + std::map decoding_tables_lru; + + lru_map_t* getDecodingTables(int technique); + lru_list_t* getDecodingTablesLru(int technique); + uint64_t getDecodingCacheSignature(int k, int m, int c, int w, + int *want, int *avails); + + ceph::mutex* getLock(); +}; + +#endif diff --git a/src/erasure-code/shec/determinant.c b/src/erasure-code/shec/determinant.c new file mode 100644 index 000000000..15b62c9ef --- /dev/null +++ b/src/erasure-code/shec/determinant.c @@ -0,0 +1,94 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Fujitsu Laboratories + * + * Author: Takanori Nakao + * Author: Takeshi Miyamae + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include + +#include "jerasure/include/galois.h" + +void print_matrix(int *mat, int dim) +{ + int i, j; + + for (i=0; i