天天看点

Object#yyds干货盘点#

1、简介

对象 Object是默认4MB大小的数据块;

一个对象就对应本地文件系统中的一个文件;

在代码实现中,有 object、sobject、hobject、ghobject等不同的类名;

2、object

object.h 文件中定义;

name 就是对象名;

2.1 结构体

struct object_t { string name;

object_t() {} // cppcheck-suppress noExplicitConstructor object_t(const char *s) : name(s) {} // cppcheck-suppress noExplicitConstructor object_t(const string& s) : name(s) {}

void swap(object_t& o) { name.swap(o.name); } void clear() { name.clear(); } void encode(bufferlist &bl) const { ::encode(name, bl); } void decode(bufferlist::iterator &bl) { ::decode(name, bl); }};

2.2 代码解析

3、soject

sobject.h 文件中定义;

sobject_t 在 object_t 之上增加了 snapshot信息,用于标识是否是快照对象;

数据成员 snap 为快照对象的对应的序列号;

如果一个对象不是快照对象(也就是head对象),那么snap字段就会被设置为 CEPH_NOSNAP值;

3.1 结构体

struct sobject_t { object_t oid; snapid_t snap;

sobject_t() : snap(0) {} sobject_t(object_t o, snapid_t s) : oid(o), snap(s) {}

void swap(sobject_t& o) { oid.swap(o.oid); snapid_t t = snap; snap = o.snap; o.snap = t; }

void encode(bufferlist& bl) const { ::encode(oid, bl); ::encode(snap, bl); } void decode(bufferlist::iterator& bl) { ::decode(oid, bl); ::decode(snap, bl); }};

3.2 代码分析

4、hobject

hobject.h 文件中定义;

hobject_t 是 hash object的缩写;

4.1 结构体

struct hobject_t { object_t oid; snapid_t snap;private: uint32_t hash; //hash值一般设置为就是 pg的id值 bool max; uint32_t nibblewise_key_cache; uint32_t hash_reverse_bits; static const int64_t POOL_META = -1; static const int64_t POOL_TEMP_START = -2; // and then negative friend class spg_t; // for POOL_TEMP_STARTpublic: int64_t pool; //所在 pool的id string nspace; //nspace 一般为空,它用于标识特殊的对象

private: string key; //对象的特殊标记

class hobject_t_max {};

public: const string &get_key() const { return key; }

void set_key(const std::string &key_) { if (key_ == oid.name) key.clear(); else key = key_; }

string to_str() const; uint32_t get_hash() const { return hash; } void set_hash(uint32_t value) { hash = value; build_hash_cache(); }

static bool match_hash(uint32_t to_check, uint32_t bits, uint32_t match) { return (match & ~((~0)<<bits)) == (to_check & ~((~0)<<bits)); } bool match(uint32_t bits, uint32_t match) const { return match_hash(hash, bits, match); }

bool is_temp() const { return pool <= POOL_TEMP_START && pool != INT64_MIN; } bool is_meta() const { return pool == POOL_META; }

hobject_t() : snap(0), hash(0), max(false), pool(INT64_MIN) { build_hash_cache(); }

hobject_t(const hobject_t &rhs) = default; hobject_t(hobject_t &&rhs) = default; hobject_t(hobject_t_max &&singleon) : hobject_t() { max = true; } hobject_t &operator=(const hobject_t &rhs) = default; hobject_t &operator=(hobject_t &&rhs) = default; hobject_t &operator=(hobject_t_max &&singleton) { *this = hobject_t(); max = true; return *this; }

// maximum sorted value. static hobject_t_max get_max() { return hobject_t_max(); }

hobject_t(object_t oid, const string& key, snapid_t snap, uint32_t hash, int64_t pool, string nspace) : oid(oid), snap(snap), hash(hash), max(false), pool(pool), nspace(nspace), key(oid.name == key ? string() : key) { build_hash_cache(); }

hobject_t(const sobject_t &soid, const string &key, uint32_t hash, int64_t pool, string nspace) : oid(soid.oid), snap(soid.snap), hash(hash), max(false), pool(pool), nspace(nspace), key(soid.oid.name == key ? string() : key) { build_hash_cache(); }

/// @return min hobject_t ret s.t. ret.hash == this->hash hobject_t get_boundary() const { if (is_max()) return *this; hobject_t ret; ret.set_hash(hash); ret.pool = pool; return ret; }

hobject_t get_object_boundary() const { if (is_max()) return *this; hobject_t ret = *this; ret.snap = 0; return ret; }

/// @return head version of this hobject_t hobject_t get_head() const { hobject_t ret(*this); ret.snap = CEPH_NOSNAP; return ret; }

/// @return snapdir version of this hobject_t hobject_t get_snapdir() const { hobject_t ret(*this); ret.snap = CEPH_SNAPDIR; return ret; }

/// @return true if object is snapdir bool is_snapdir() const { return snap == CEPH_SNAPDIR; }

/// @return true if object is head bool is_head() const { return snap == CEPH_NOSNAP; }

/// @return true if object is neither head nor snapdir nor max bool is_snap() const { return !is_max() && !is_head() && !is_snapdir(); }

/// @return true iff the object should have a snapset in it's attrs bool has_snapset() const { return is_head() || is_snapdir(); }

/* Do not use when a particular hash function is needed */ explicit hobject_t(const sobject_t &o) : oid(o.oid), snap(o.snap), max(false), pool(POOL_META) { set_hash(std::hash<sobject_t>()(o)); }

bool is_max() const { return max; } bool is_min() const { // this needs to match how it's constructed return snap == 0 && hash == 0 && !max && pool == INT64_MIN; }

static uint32_t _reverse_bits(uint32_t v) { if (v == 0) return v; // reverse bits // swap odd and even bits v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1); // swap consecutive pairs v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2); // swap nibbles ... v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4); // swap bytes v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8); // swap 2-byte long pairs v = ( v >> 16 ) | ( v << 16); return v; } static uint32_t _reverse_nibbles(uint32_t retval) { // reverse nibbles retval = ((retval & 0x0f0f0f0f) << 4) | ((retval & 0xf0f0f0f0) >> 4); retval = ((retval & 0x00ff00ff) << 8) | ((retval & 0xff00ff00) >> 8); retval = ((retval & 0x0000ffff) << 16) | ((retval & 0xffff0000) >> 16); return retval; }

/** * Returns set S of strings such that for any object * h where h.match(bits, mask), there is some string * s \f$\in\f$ S such that s is a prefix of h.to_str(). * Furthermore, for any s $f\in\f$ S, s is a prefix of * h.str() implies that h.match(bits, mask). */ static set<string> get_prefixes( uint32_t bits, uint32_t mask, int64_t pool);

// filestore nibble-based key uint32_t get_nibblewise_key_u32() const { assert(!max); return nibblewise_key_cache; } uint64_t get_nibblewise_key() const { return max ? 0x100000000ull : nibblewise_key_cache; }

// newer bit-reversed key uint32_t get_bitwise_key_u32() const { assert(!max); return hash_reverse_bits; } uint64_t get_bitwise_key() const { return max ? 0x100000000ull : hash_reverse_bits; }

void build_hash_cache() { nibblewise_key_cache = _reverse_nibbles(hash); hash_reverse_bits = _reverse_bits(hash); } void set_nibblewise_key_u32(uint32_t value) { hash = _reverse_nibbles(value); build_hash_cache(); } void set_bitwise_key_u32(uint32_t value) { hash = _reverse_bits(value); build_hash_cache(); }

const string& get_effective_key() const { if (key.length()) return key; return oid.name; }

void swap(hobject_t &o) { hobject_t temp(o); o = (*this); (*this) = temp; }

const string &get_namespace() const { return nspace; }

bool parse(const string& s);

void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bl); void decode(json_spirit::Value& v); void dump(Formatter *f) const; static void generate_test_instances(list<hobject_t*>& o); friend int cmp_nibblewise(const hobject_t& l, const hobject_t& r); friend int cmp_bitwise(const hobject_t& l, const hobject_t& r); friend bool operator==(const hobject_t&, const hobject_t&); friend bool operator!=(const hobject_t&, const hobject_t&); friend struct ghobject_t;

struct NibblewiseComparator { bool operator()(const hobject_t& l, const hobject_t& r) const { return cmp_nibblewise(l, r) < 0; } };

struct BitwiseComparator { bool operator()(const hobject_t& l, const hobject_t& r) const { return cmp_bitwise(l, r) < 0; } };

struct Comparator { bool bitwise; explicit Comparator(bool b) : bitwise(b) {} bool operator()(const hobject_t& l, const hobject_t& r) const { if (bitwise) return cmp_bitwise(l, r) < 0; else return cmp_nibblewise(l, r) < 0; } }; struct ComparatorWithDefault { bool bitwise; explicit ComparatorWithDefault(bool b=true) : bitwise(b) {} bool operator()(const hobject_t& l, const hobject_t& r) const { if (bitwise) return cmp_bitwise(l, r) < 0; else return cmp_nibblewise(l, r) < 0; } };};

4.2 代码分析

5、ghobject

ghobject_t 在对象 hobject_t的基础上,添加了 generation字段 和 shard_id字段,这个用于 EC模式下的PG

shard_id用于标识对象所在的osd在EC类型的PG中的序号,对应EC来说,每个osd在PG中的序号在数据恢复时非常关键;如果时Replicate类型的PG,那么字段就设置为 NO_SHARED(-1),该字段对于replicate是没用的

generation用于记录对象的版本号;当PG为EC时,写操作需要区分写前后两个版本的object,写操作保存对象的上一个版本(generation)的对象,当EC写失败时,可以rollback到上一个版本

5.1 结构体