summaryrefslogtreecommitdiffstats
path: root/src/erasure-code/ErasureCode.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/erasure-code/ErasureCode.cc')
-rw-r--r--src/erasure-code/ErasureCode.cc349
1 files changed, 349 insertions, 0 deletions
diff --git a/src/erasure-code/ErasureCode.cc b/src/erasure-code/ErasureCode.cc
new file mode 100644
index 000000000..0cebf9a98
--- /dev/null
+++ b/src/erasure-code/ErasureCode.cc
@@ -0,0 +1,349 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph distributed storage system
+ *
+ * Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+ * Copyright (C) 2014 Red Hat <contact@redhat.com>
+ *
+ * Author: Loic Dachary <loic@dachary.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <algorithm>
+#include <cerrno>
+
+#include "ErasureCode.h"
+
+#include "common/strtol.h"
+#include "include/buffer.h"
+#include "crush/CrushWrapper.h"
+#include "osd/osd_types.h"
+
+#define DEFAULT_RULE_ROOT "default"
+#define DEFAULT_RULE_FAILURE_DOMAIN "host"
+
+using std::make_pair;
+using std::map;
+using std::ostream;
+using std::pair;
+using std::set;
+using std::string;
+using std::vector;
+
+using ceph::bufferlist;
+
+namespace ceph {
+const unsigned ErasureCode::SIMD_ALIGN = 32;
+
+int ErasureCode::init(
+ ErasureCodeProfile &profile,
+ std::ostream *ss)
+{
+ int err = 0;
+ err |= to_string("crush-root", profile,
+ &rule_root,
+ DEFAULT_RULE_ROOT, ss);
+ err |= to_string("crush-failure-domain", profile,
+ &rule_failure_domain,
+ DEFAULT_RULE_FAILURE_DOMAIN, ss);
+ err |= to_string("crush-device-class", profile,
+ &rule_device_class,
+ "", ss);
+ if (err)
+ return err;
+ _profile = profile;
+ return 0;
+}
+
+int ErasureCode::create_rule(
+ const std::string &name,
+ CrushWrapper &crush,
+ std::ostream *ss) const
+{
+ int ruleid = crush.add_simple_rule(
+ name,
+ rule_root,
+ rule_failure_domain,
+ rule_device_class,
+ "indep",
+ pg_pool_t::TYPE_ERASURE,
+ ss);
+
+ if (ruleid < 0)
+ return ruleid;
+
+ crush.set_rule_mask_max_size(ruleid, get_chunk_count());
+ return ruleid;
+}
+
+int ErasureCode::sanity_check_k_m(int k, int m, ostream *ss)
+{
+ if (k < 2) {
+ *ss << "k=" << k << " must be >= 2" << std::endl;
+ return -EINVAL;
+ }
+ if (m < 1) {
+ *ss << "m=" << m << " must be >= 1" << std::endl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int ErasureCode::chunk_index(unsigned int i) const
+{
+ return chunk_mapping.size() > i ? chunk_mapping[i] : i;
+}
+
+int ErasureCode::_minimum_to_decode(const set<int> &want_to_read,
+ const set<int> &available_chunks,
+ set<int> *minimum)
+{
+ if (includes(available_chunks.begin(), available_chunks.end(),
+ want_to_read.begin(), want_to_read.end())) {
+ *minimum = want_to_read;
+ } else {
+ unsigned int k = get_data_chunk_count();
+ if (available_chunks.size() < (unsigned)k)
+ return -EIO;
+ set<int>::iterator i;
+ unsigned j;
+ for (i = available_chunks.begin(), j = 0; j < (unsigned)k; ++i, j++)
+ minimum->insert(*i);
+ }
+ return 0;
+}
+
+int ErasureCode::minimum_to_decode(const set<int> &want_to_read,
+ const set<int> &available_chunks,
+ map<int, vector<pair<int, int>>> *minimum)
+{
+ set<int> minimum_shard_ids;
+ int r = _minimum_to_decode(want_to_read, available_chunks, &minimum_shard_ids);
+ if (r != 0) {
+ return r;
+ }
+ vector<pair<int, int>> default_subchunks;
+ default_subchunks.push_back(make_pair(0, get_sub_chunk_count()));
+ for (auto &&id : minimum_shard_ids) {
+ minimum->insert(make_pair(id, default_subchunks));
+ }
+ return 0;
+}
+
+int ErasureCode::minimum_to_decode_with_cost(const set<int> &want_to_read,
+ const map<int, int> &available,
+ set<int> *minimum)
+{
+ set <int> available_chunks;
+ for (map<int, int>::const_iterator i = available.begin();
+ i != available.end();
+ ++i)
+ available_chunks.insert(i->first);
+ return _minimum_to_decode(want_to_read, available_chunks, minimum);
+}
+
+int ErasureCode::encode_prepare(const bufferlist &raw,
+ map<int, bufferlist> &encoded) const
+{
+ unsigned int k = get_data_chunk_count();
+ unsigned int m = get_chunk_count() - k;
+ unsigned blocksize = get_chunk_size(raw.length());
+ unsigned padded_chunks = k - raw.length() / blocksize;
+ bufferlist prepared = raw;
+
+ for (unsigned int i = 0; i < k - padded_chunks; i++) {
+ bufferlist &chunk = encoded[chunk_index(i)];
+ chunk.substr_of(prepared, i * blocksize, blocksize);
+ chunk.rebuild_aligned_size_and_memory(blocksize, SIMD_ALIGN);
+ ceph_assert(chunk.is_contiguous());
+ }
+ if (padded_chunks) {
+ unsigned remainder = raw.length() - (k - padded_chunks) * blocksize;
+ bufferptr buf(buffer::create_aligned(blocksize, SIMD_ALIGN));
+
+ raw.begin((k - padded_chunks) * blocksize).copy(remainder, buf.c_str());
+ buf.zero(remainder, blocksize - remainder);
+ encoded[chunk_index(k-padded_chunks)].push_back(std::move(buf));
+
+ for (unsigned int i = k - padded_chunks + 1; i < k; i++) {
+ bufferptr buf(buffer::create_aligned(blocksize, SIMD_ALIGN));
+ buf.zero();
+ encoded[chunk_index(i)].push_back(std::move(buf));
+ }
+ }
+ for (unsigned int i = k; i < k + m; i++) {
+ bufferlist &chunk = encoded[chunk_index(i)];
+ chunk.push_back(buffer::create_aligned(blocksize, SIMD_ALIGN));
+ }
+
+ return 0;
+}
+
+int ErasureCode::encode(const set<int> &want_to_encode,
+ const bufferlist &in,
+ map<int, bufferlist> *encoded)
+{
+ unsigned int k = get_data_chunk_count();
+ unsigned int m = get_chunk_count() - k;
+ bufferlist out;
+ int err = encode_prepare(in, *encoded);
+ if (err)
+ return err;
+ encode_chunks(want_to_encode, encoded);
+ for (unsigned int i = 0; i < k + m; i++) {
+ if (want_to_encode.count(i) == 0)
+ encoded->erase(i);
+ }
+ return 0;
+}
+
+int ErasureCode::_decode(const set<int> &want_to_read,
+ const map<int, bufferlist> &chunks,
+ map<int, bufferlist> *decoded)
+{
+ vector<int> have;
+ have.reserve(chunks.size());
+ for (map<int, bufferlist>::const_iterator i = chunks.begin();
+ i != chunks.end();
+ ++i) {
+ have.push_back(i->first);
+ }
+ if (includes(
+ have.begin(), have.end(), want_to_read.begin(), want_to_read.end())) {
+ for (set<int>::iterator i = want_to_read.begin();
+ i != want_to_read.end();
+ ++i) {
+ (*decoded)[*i] = chunks.find(*i)->second;
+ }
+ return 0;
+ }
+ unsigned int k = get_data_chunk_count();
+ unsigned int m = get_chunk_count() - k;
+ unsigned blocksize = (*chunks.begin()).second.length();
+ for (unsigned int i = 0; i < k + m; i++) {
+ if (chunks.find(i) == chunks.end()) {
+ bufferlist tmp;
+ bufferptr ptr(buffer::create_aligned(blocksize, SIMD_ALIGN));
+ tmp.push_back(ptr);
+ tmp.claim_append((*decoded)[i]);
+ (*decoded)[i].swap(tmp);
+ } else {
+ (*decoded)[i] = chunks.find(i)->second;
+ (*decoded)[i].rebuild_aligned(SIMD_ALIGN);
+ }
+ }
+ return decode_chunks(want_to_read, chunks, decoded);
+}
+
+int ErasureCode::decode(const set<int> &want_to_read,
+ const map<int, bufferlist> &chunks,
+ map<int, bufferlist> *decoded, int chunk_size)
+{
+ return _decode(want_to_read, chunks, decoded);
+}
+
+int ErasureCode::parse(const ErasureCodeProfile &profile,
+ ostream *ss)
+{
+ return to_mapping(profile, ss);
+}
+
+const vector<int> &ErasureCode::get_chunk_mapping() const {
+ return chunk_mapping;
+}
+
+int ErasureCode::to_mapping(const ErasureCodeProfile &profile,
+ ostream *ss)
+{
+ if (profile.find("mapping") != profile.end()) {
+ std::string mapping = profile.find("mapping")->second;
+ int position = 0;
+ vector<int> coding_chunk_mapping;
+ for(std::string::iterator it = mapping.begin(); it != mapping.end(); ++it) {
+ if (*it == 'D')
+ chunk_mapping.push_back(position);
+ else
+ coding_chunk_mapping.push_back(position);
+ position++;
+ }
+ chunk_mapping.insert(chunk_mapping.end(),
+ coding_chunk_mapping.begin(),
+ coding_chunk_mapping.end());
+ }
+ return 0;
+}
+
+int ErasureCode::to_int(const std::string &name,
+ ErasureCodeProfile &profile,
+ int *value,
+ const std::string &default_value,
+ ostream *ss)
+{
+ if (profile.find(name) == profile.end() ||
+ profile.find(name)->second.size() == 0)
+ profile[name] = default_value;
+ std::string p = profile.find(name)->second;
+ std::string err;
+ int r = strict_strtol(p.c_str(), 10, &err);
+ if (!err.empty()) {
+ *ss << "could not convert " << name << "=" << p
+ << " to int because " << err
+ << ", set to default " << default_value << std::endl;
+ *value = strict_strtol(default_value.c_str(), 10, &err);
+ return -EINVAL;
+ }
+ *value = r;
+ return 0;
+}
+
+int ErasureCode::to_bool(const std::string &name,
+ ErasureCodeProfile &profile,
+ bool *value,
+ const std::string &default_value,
+ ostream *ss)
+{
+ if (profile.find(name) == profile.end() ||
+ profile.find(name)->second.size() == 0)
+ profile[name] = default_value;
+ const std::string p = profile.find(name)->second;
+ *value = (p == "yes") || (p == "true");
+ return 0;
+}
+
+int ErasureCode::to_string(const std::string &name,
+ ErasureCodeProfile &profile,
+ std::string *value,
+ const std::string &default_value,
+ ostream *ss)
+{
+ if (profile.find(name) == profile.end() ||
+ profile.find(name)->second.size() == 0)
+ profile[name] = default_value;
+ *value = profile[name];
+ return 0;
+}
+
+int ErasureCode::decode_concat(const map<int, bufferlist> &chunks,
+ bufferlist *decoded)
+{
+ set<int> want_to_read;
+
+ for (unsigned int i = 0; i < get_data_chunk_count(); i++) {
+ want_to_read.insert(chunk_index(i));
+ }
+ map<int, bufferlist> decoded_map;
+ int r = _decode(want_to_read, chunks, &decoded_map);
+ if (r == 0) {
+ for (unsigned int i = 0; i < get_data_chunk_count(); i++) {
+ decoded->claim_append(decoded_map[chunk_index(i)]);
+ }
+ }
+ return r;
+}
+}