diff options
Diffstat (limited to 'src/os/filestore/HashIndex.h')
-rw-r--r-- | src/os/filestore/HashIndex.h | 462 |
1 files changed, 462 insertions, 0 deletions
diff --git a/src/os/filestore/HashIndex.h b/src/os/filestore/HashIndex.h new file mode 100644 index 00000000..7e34d155 --- /dev/null +++ b/src/os/filestore/HashIndex.h @@ -0,0 +1,462 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_HASHINDEX_H +#define CEPH_HASHINDEX_H + +#include "include/buffer_fwd.h" +#include "include/encoding.h" +#include "LFNIndex.h" + +extern string reverse_hexdigit_bits_string(string l); + +/** + * Implements collection prehashing. + * + * @verbatim + * (root) - 0 - 0 + * - 1 + * - E + * - 1 + * - 2 - D - 0 + * . + * . + * . + * - F - 0 + * @endverbatim + * + * A file is located at the longest existing directory from the root + * given by the hex characters in the hash beginning with the least + * significant. + * + * ex: ghobject_t("object", CEPH_NO_SNAP, 0xA4CEE0D2) + * would be located in (root)/2/D/0/ + * + * Subdirectories are created when the number of objects in a + * directory exceed 16 * (abs(merge_threshhold) * split_multiplier + + * split_rand_factor). The number of objects in a directory is encoded + * as subdir_info_s in an xattr on the directory. + */ +class HashIndex : public LFNIndex { +private: + /// Attribute name for storing subdir info @see subdir_info_s + static const string SUBDIR_ATTR; + /// Attribute name for storing index-wide settings + static const string SETTINGS_ATTR; + /// Attribute name for storing in progress op tag + static const string IN_PROGRESS_OP_TAG; + /// Size (bits) in object hash + static const int PATH_HASH_LEN = 32; + /// Max length of hashed path + static const int MAX_HASH_LEVEL = (PATH_HASH_LEN/4); + + /** + * Merges occur when the number of object drops below + * merge_threshold and splits occur when the number of objects + * exceeds: + * + * 16 * (abs(merge_threshold) * split_multiplier + split_rand_factor) + * + * Please note if merge_threshold is less than zero, it will never + * do merging + */ + int merge_threshold; + int split_multiplier; + + /// Encodes current subdir state for determining when to split/merge. + struct subdir_info_s { + uint64_t objs; ///< Objects in subdir. + uint32_t subdirs; ///< Subdirs in subdir. + uint32_t hash_level; ///< Hashlevel of subdir. + + subdir_info_s() : objs(0), subdirs(0), hash_level(0) {} + + void encode(bufferlist &bl) const + { + using ceph::encode; + __u8 v = 1; + encode(v, bl); + encode(objs, bl); + encode(subdirs, bl); + encode(hash_level, bl); + } + + void decode(bufferlist::const_iterator &bl) + { + using ceph::decode; + __u8 v; + decode(v, bl); + ceph_assert(v == 1); + decode(objs, bl); + decode(subdirs, bl); + decode(hash_level, bl); + } + }; + + struct settings_s { + uint32_t split_rand_factor; ///< random factor added to split threshold (only on root of collection) + settings_s() : split_rand_factor(0) {} + void encode(bufferlist &bl) const + { + using ceph::encode; + __u8 v = 1; + encode(v, bl); + encode(split_rand_factor, bl); + } + void decode(bufferlist::const_iterator &bl) + { + using ceph::decode; + __u8 v; + decode(v, bl); + decode(split_rand_factor, bl); + } + } settings; + + /// Encodes in progress split or merge + struct InProgressOp { + static const int SPLIT = 0; + static const int MERGE = 1; + static const int COL_SPLIT = 2; + int op; + vector<string> path; + + InProgressOp(int op, const vector<string> &path) + : op(op), path(path) {} + + explicit InProgressOp(bufferlist::const_iterator &bl) { + decode(bl); + } + + bool is_split() const { return op == SPLIT; } + bool is_col_split() const { return op == COL_SPLIT; } + bool is_merge() const { return op == MERGE; } + + void encode(bufferlist &bl) const { + using ceph::encode; + __u8 v = 1; + encode(v, bl); + encode(op, bl); + encode(path, bl); + } + + void decode(bufferlist::const_iterator &bl) { + using ceph::decode; + __u8 v; + decode(v, bl); + ceph_assert(v == 1); + decode(op, bl); + decode(path, bl); + } + }; + + +public: + /// Constructor. + HashIndex( + CephContext* cct, + coll_t collection, ///< [in] Collection + const char *base_path, ///< [in] Path to the index root. + int merge_at, ///< [in] Merge threshold. + int split_multiple, ///< [in] Split threshold. + uint32_t index_version,///< [in] Index version + double retry_probability=0) ///< [in] retry probability + : LFNIndex(cct, collection, base_path, index_version, retry_probability), + merge_threshold(merge_at), + split_multiplier(split_multiple) + {} + + int read_settings() override; + + /// @see CollectionIndex + uint32_t collection_version() override { return index_version; } + + /// @see CollectionIndex + int cleanup() override; + + /// @see CollectionIndex + int prep_delete() override; + + /// @see CollectionIndex + int _split( + uint32_t match, + uint32_t bits, + CollectionIndex* dest + ) override; + + /// @see CollectionIndex + int _merge( + uint32_t bits, + CollectionIndex* dest + ) override; + + int _merge_dirs( + HashIndex& from, + HashIndex& to, + const vector<string>& path); + + /// @see CollectionIndex + int apply_layout_settings(int target_level) override; + +protected: + int _init() override; + + int _created( + const vector<string> &path, + const ghobject_t &oid, + const string &mangled_name + ) override; + int _remove( + const vector<string> &path, + const ghobject_t &oid, + const string &mangled_name + ) override; + int _lookup( + const ghobject_t &oid, + vector<string> *path, + string *mangled_name, + int *hardlink + ) override; + + /** + * Pre-hash the collection to create folders according to the expected number + * of objects in this collection. + */ + int _pre_hash_collection( + uint32_t pg_num, + uint64_t expected_num_objs + ) override; + + int _collection_list_partial( + const ghobject_t &start, + const ghobject_t &end, + int max_count, + vector<ghobject_t> *ls, + ghobject_t *next + ) override; +private: + /// Internal recursively remove path and its subdirs + int _recursive_remove( + const vector<string> &path, ///< [in] path to remove + bool top ///< [in] internal tracking of first caller + ); /// @return Error Code, 0 on success + /// Recursively remove path and its subdirs + int recursive_remove( + const vector<string> &path ///< [in] path to remove + ); /// @return Error Code, 0 on success + /// Tag root directory at beginning of col_split + int start_col_split( + const vector<string> &path ///< [in] path to split + ); ///< @return Error Code, 0 on success + /// Tag root directory at beginning of split + int start_split( + const vector<string> &path ///< [in] path to split + ); ///< @return Error Code, 0 on success + /// Tag root directory at beginning of split + int start_merge( + const vector<string> &path ///< [in] path to merge + ); ///< @return Error Code, 0 on success + /// Remove tag at end of split or merge + int end_split_or_merge( + const vector<string> &path ///< [in] path to split or merged + ); ///< @return Error Code, 0 on success + /// Gets info from the xattr on the subdir represented by path + int get_info( + const vector<string> &path, ///< [in] Path from which to read attribute. + subdir_info_s *info ///< [out] Attribute value + ); /// @return Error Code, 0 on success + + /// Sets info to the xattr on the subdir represented by path + int set_info( + const vector<string> &path, ///< [in] Path on which to set attribute. + const subdir_info_s &info ///< [in] Value to set + ); /// @return Error Code, 0 on success + + /// Encapsulates logic for when to split. + bool must_merge( + const subdir_info_s &info ///< [in] Info to check + ); /// @return True if info must be merged, False otherwise + + /// Encapsulates logic for when to merge. + bool must_split( + const subdir_info_s &info, ///< [in] Info to check + int target_level = 0 + ); /// @return True if info must be split, False otherwise + + /// Initiates merge + int initiate_merge( + const vector<string> &path, ///< [in] Subdir to merge + subdir_info_s info ///< [in] Info attached to path + ); /// @return Error Code, 0 on success + + /// Completes merge + int complete_merge( + const vector<string> &path, ///< [in] Subdir to merge + subdir_info_s info ///< [in] Info attached to path + ); /// @return Error Code, 0 on success + + /// Resets attr to match actual subdir contents + int reset_attr( + const vector<string> &path ///< [in] path to cleanup + ); + + /// Initiate Split + int initiate_split( + const vector<string> &path, ///< [in] Subdir to split + subdir_info_s info ///< [in] Info attached to path + ); /// @return Error Code, 0 on success + + /// Completes Split + int complete_split( + const vector<string> &path, ///< [in] Subdir to split + subdir_info_s info ///< [in] Info attached to path + ); /// @return Error Code, 0 on success + + /// Determine path components from hoid hash + void get_path_components( + const ghobject_t &oid, ///< [in] Object for which to get path components + vector<string> *path ///< [out] Path components for hoid. + ); + + /// Pre-hash and split folders to avoid runtime splitting + /// according to the given expected object number. + int pre_split_folder(uint32_t pg_num, uint64_t expected_num_objs); + + /// Initialize the folder (dir info) with the given hash + /// level and number of its subdirs. + int init_split_folder(vector<string> &path, uint32_t hash_level); + + /// do collection split for path + static int col_split_level( + HashIndex &from, ///< [in] from index + HashIndex &dest, ///< [in] to index + const vector<string> &path, ///< [in] path to split + uint32_t bits, ///< [in] num bits to match + uint32_t match, ///< [in] bits to match + unsigned *mkdirred ///< [in,out] path[:mkdirred] has been mkdirred + ); + + + /** + * Get string representation of ghobject_t/hash + * + * e.g: 0x01234567 -> "76543210" + */ + static string get_path_str( + const ghobject_t &oid ///< [in] Object to get hash string for + ); ///< @return Hash string for hoid. + + /// Get string from hash, @see get_path_str + static string get_hash_str( + uint32_t hash ///< [in] Hash to convert to a string. + ); ///< @return String representation of hash + + /// Get hash from hash prefix string e.g. "FFFFAB" -> 0xFFFFAB00 + static uint32_t hash_prefix_to_hash( + string prefix ///< [in] string to convert + ); ///< @return Hash + + /// Get hash mod from path + static void path_to_hobject_hash_prefix( + const vector<string> &path,///< [in] path to convert + uint32_t *bits, ///< [out] bits + uint32_t *hash ///< [out] hash + ) { + string hash_str; + for (vector<string>::const_iterator i = path.begin(); + i != path.end(); + ++i) { + hash_str.push_back(*i->begin()); + } + uint32_t rev_hash = hash_prefix_to_hash(hash_str); + if (hash) + *hash = rev_hash; + if (bits) + *bits = path.size() * 4; + } + + /// Calculate the number of bits. + static int calc_num_bits(uint64_t n) { + int ret = 0; + while (n > 0) { + n = n >> 1; + ret++; + } + return ret; + } + + /// Convert a number to hex string (upper case). + static string to_hex(int n) { + ceph_assert(n >= 0 && n < 16); + char c = (n <= 9 ? ('0' + n) : ('A' + n - 10)); + string str; + str.append(1, c); + return str; + } + + struct CmpPairBitwise { + bool operator()(const pair<string, ghobject_t>& l, + const pair<string, ghobject_t>& r) const + { + if (l.first < r.first) + return true; + if (l.first > r.first) + return false; + if (cmp(l.second, r.second) < 0) + return true; + return false; + } + }; + + struct CmpHexdigitStringBitwise { + bool operator()(const string& l, const string& r) const { + return reverse_hexdigit_bits_string(l) < reverse_hexdigit_bits_string(r); + } + }; + + /// Get path contents by hash + int get_path_contents_by_hash_bitwise( + const vector<string> &path, /// [in] Path to list + const ghobject_t *next_object, /// [in] list > *next_object + set<string, CmpHexdigitStringBitwise> *hash_prefixes, /// [out] prefixes in dir + set<pair<string, ghobject_t>, CmpPairBitwise> *objects /// [out] objects + ); + + /// List objects in collection in ghobject_t order + int list_by_hash( + const vector<string> &path, /// [in] Path to list + const ghobject_t &end, /// [in] List only objects < end + int max_count, /// [in] List at most max_count + ghobject_t *next, /// [in,out] List objects >= *next + vector<ghobject_t> *out /// [out] Listed objects + ); ///< @return Error Code, 0 on success + /// List objects in collection in ghobject_t order + int list_by_hash_bitwise( + const vector<string> &path, /// [in] Path to list + const ghobject_t &end, /// [in] List only objects < end + int max_count, /// [in] List at most max_count + ghobject_t *next, /// [in,out] List objects >= *next + vector<ghobject_t> *out /// [out] Listed objects + ); ///< @return Error Code, 0 on success + + /// Create the given levels of sub directories from the given root. + /// The contents of *path* is not changed after calling this function. + int recursive_create_path(vector<string>& path, int level); + + /// split each dir below the given path + int split_dirs(const vector<string> &path, int target_level = 0); + + int write_settings(); +}; + +#endif |