天天看點

redis源碼閱讀-資料結構篇-hyperloglog

文章目錄

      • 5. HyperLogLog 實作 hyperloglog.c
        • 資料結構定義
        • Helper函數(可跳過,需要時閱讀)
        • 元素哈希處理 O(1)
        • 添加基數 O(1)
        • ...

5. HyperLogLog 實作 hyperloglog.c

資料結構定義

  • hllhdr
    struct hllhdr {
        char magic[4];      /* "HYLL" */
        uint8_t encoding;   /* HLL_DENSE or HLL_SPARSE. */
        uint8_t notused[3]; /* Reserved for future use, must be zero. */
        uint8_t card[8];    /* Cached cardinality, little endian. */
        uint8_t registers[]; /* Data bytes. */
    };
               
  • 一些宏定義
    /* The cached cardinality MSB is used to signal validity of the cached value. */
    #define HLL_INVALIDATE_CACHE(hdr) (hdr)->card[0] |= (1<<7)
    #define HLL_VALID_CACHE(hdr) (((hdr)->card[0] & (1<<7)) == 0)
    
    #define HLL_P 14 /* The greater is P, the smaller the error. */
    #define HLL_REGISTERS (1<<HLL_P) /* With P=14, 16384 registers. */
    #define HLL_P_MASK (HLL_REGISTERS-1) /* Mask to index register. */
    #define HLL_BITS 6 /* Enough to count up to 63 leading zeroes. */
    #define HLL_REGISTER_MAX ((1<<HLL_BITS)-1)
    #define HLL_HDR_SIZE sizeof(struct hllhdr)
    #define HLL_DENSE_SIZE (HLL_HDR_SIZE+((HLL_REGISTERS*HLL_BITS+7)/8))
    #define HLL_DENSE 0 /* Dense encoding. */
    #define HLL_SPARSE 1 /* Sparse encoding. */
    #define HLL_RAW 255 /* Only used internally, never exposed. */
    #define HLL_MAX_ENCODING 1
    
    static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected\r\n";
    ...
               

Helper函數(可跳過,需要時閱讀)

  • 哈希函數 O(1)
    uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
        const uint64_t m = 0xc6a4a7935bd1e995;
        const int r = 47;
        uint64_t h = seed ^ (len * m);
        const uint8_t *data = (const uint8_t *)key;
        const uint8_t *end = data + (len-(len&7));
    
        while(data != end) {
            uint64_t k;
    #if (BYTE_ORDER == LITTLE_ENDIAN)
            k = *((uint64_t*)data);
    #else
            k = (uint64_t) data[0];
            k |= (uint64_t) data[1] << 8;
            k |= (uint64_t) data[2] << 16;
            k |= (uint64_t) data[3] << 24;
            k |= (uint64_t) data[4] << 32;
            k |= (uint64_t) data[5] << 40;
            k |= (uint64_t) data[6] << 48;
            k |= (uint64_t) data[7] << 56;
    #endif
            k *= m;
            k ^= k >> r;
            k *= m;
            h ^= k;
            h *= m;
            data += 8;
        }
        switch(len & 7) {
        case 7: h ^= (uint64_t)data[6] << 48;
        case 6: h ^= (uint64_t)data[5] << 40;
        case 5: h ^= (uint64_t)data[4] << 32;
        case 4: h ^= (uint64_t)data[3] << 24;
        case 3: h ^= (uint64_t)data[2] << 16;
        case 2: h ^= (uint64_t)data[1] << 8;
        case 1: h ^= (uint64_t)data[0];
                h *= m;
        };
        h ^= h >> r;
        h *= m;
        h ^= h >> r;
        return h;
    }
               

元素哈希處理 O(1)

// 傳回000...1的長度
// regp 儲存哈希桶index
int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
    uint64_t hash, bit, index;
    int count;
	// 擷取哈希值
    hash = MurmurHash64A(ele,elesize,0xadc83b19ULL);
    index = hash & HLL_P_MASK; 	// (1 << 14)個桶的index
    hash |= ((uint64_t)1<<63);	// 確定至少一個1
    bit = HLL_REGISTERS; 		// 第15位開始算起
    count = 1; 					// 第一個出現1的rank
    while((hash & bit) == 0) {	// 直到找到1
        count++;
        bit <<= 1;
    }
    *regp = (int) index;		// (1 << 14)個桶的index
    return count;				// 傳回000...1的長度
}
           

添加基數 O(1)

int hllDenseAdd(uint8_t *registers, unsigned char *ele, size_t elesize) {
    uint8_t oldcount, count;
    long index;
    // 元素哈希處理
    count = hllPatLen(ele,elesize,&index);
    // 000...1的長度更長則添加
    HLL_DENSE_GET_REGISTER(oldcount,registers,index);
    if (count > oldcount) {
        HLL_DENSE_SET_REGISTER(registers,index,count);
        return 1;
    } else {
        return 0;
    }
}
           

關于HyperLogLog可以詳見本人之前的部落格解析

繼續閱讀