HashTable,也叫哈希表/散清單,是一種通過key值直接通路在記憶體存儲位置的資料結構
這種資料結構在插入、删除、查找的操作上具有“常數平均時間”的表現
哈希表最大的優點,就是把資料的存儲和查找消耗的時間大大降低,幾乎可以看成是常數時間;而代價僅僅是消耗比較多的記憶體。然而在目前可利用記憶體越來越多的情況下,用空間換時間的做法是值得的。
散列函數:使用某種映射函數,将大數映射為小數(所給鍵值映射為較小的友善作為下标的數字)
(<stl_hash_fun>中定義size_t __stl_hash_string(const char* s))
使用hash function可能會帶來“碰撞”問題,解決這個問題有多種方法,包括線性探測、二次探測、開鍊法
負載因子:元素個數/表格大小 除了開鍊法,其它情況負載因子都在0-1
插入:利用hash function計算出元素插入位置,如果該位置的空間不可用,則往下尋找,直到找到一
個可用空間
查找:利用hash function計算出元素位置,如果該位置的元素不是我們所要找的元素,則往下尋找,
直到找到
删除:必須使用惰性删除,也就是隻标記删除記号,實際删除操作則待表格重新整理時進行,因
為hashtable中的每一個元素不僅表述它自己,也關系到其它元素的排列
線性探測:H+i (i=0,1,2,3,4...)
二測探測:H+i^2 (i=0,1,2,3,4...)
開鍊法:在每一個表格元素維護一個list 雖然是線性操作,但如果每個list夠短,速度還是夠快
//_Capacity()中new不出空間
#pragma once
#include<string>
#include<ostream>
template<class K>
struct HASHFUNC
{
size_t operator()(const K& key, size_t capacity)
{
return key%capacity;
}
};
template<>
struct HASHFUNC<std::string>
{
size_t operator()(const std::string& s,int capacity)
{
const char* str = s.c_str();
return BKDRHash(str)%capacity;
}
static size_t BKDRHash(const char * str)
{
unsigned int seed = 131; // 31 131 1313 13131 131313
unsigned int hash = 0;
while (*str)
{
hash = hash * seed + (*str++);
}
return (hash & 0x7FFFFFFF);
}
};
//hashtable的删除采用的是惰性删除,也就是隻标記删除記号,實際删除操作則待表格重新整理時進行,
//這是因為hashtable中每一個元素不僅表述它自己,也關系到其它元素的排列
enum Status
{
EMPTY,
EXIST,
DELETE
};
template<class K, class V>
struct KeyValue
{
KeyValue(const K key=K(), const V value=V())
:_key(key)
,_value(value)
{}
friend std::ostream& operator<<(std::ostream &_cout, const KeyValue<K, V>& kv);
K _key;
V _value;
};
template<class K, class V, class COM = HASHFUNC<K>>
class Hash
{
public:
Hash();
bool Insert(K key, V value);
KeyValue<K, V>* Find(K key);
bool Remove(K key);
void Print();
~Hash();
//friend std::ostream& operator<<(std::ostream &_cout, const KeyValue<K, V>& kv);
protected:
void _Capacity()
{
const int _PrimeSize = 28;
static const unsigned long _PrimeList[_PrimeSize] =
{
53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
//确定容量
size_t NewCapacity = _capacity;
if (NewCapacity == 0){
NewCapacity = _PrimeList[0];
}
else if (_size * 10 / NewCapacity >= 7){
for (int i = 0; i < _PrimeSize; ++i){
if (NewCapacity < _PrimeList[i]){
NewCapacity = _PrimeList[i];
break;
}
}
}
//把原表中的資料哈希到擴容後的表中
if ( NewCapacity != _capacity){
Status* NewStatus = new Status[NewCapacity];
KeyValue<K, V>* NewTables = new KeyValue<K, V>[NewCapacity];//************************new不出空間
memset(NewStatus, EMPTY, sizeof(NewStatus)*NewCapacity);
if (_capacity != 0){
for (size_t index = 0; index < _capacity; ++index)
{
if (_status[index] == EXIST){
size_t i = HashFunc(_tables[index]._key, _capacity);//得到該關鍵字對應的位置
NewTables[i]._key = _tables[index]._key;
NewTables[i]._value = _tables[index]._value;
NewStatus[i] = EXIST;
}
}
}
std::swap(NewTables, _tables);
std::swap(NewStatus, _status);
_capacity = NewCapacity;
}
}
size_t HashFunc(const K& key, size_t capacity)
{
return COM()(key, capacity);
}
size_t HashFunc2(const K& key,size_t capacity)
{
size_t i = 0;
size_t res = HashFunc(key, capacity);
while(_status[(res + (i*i)) % capacity] == EXIST){
i++;
}
return (res + (i*i)) % capacity;
}
int _Find(K key);
protected:
size_t _size;
size_t _capacity;
KeyValue<K,V>* _tables;
Status* _status; //哈希表位置的狀态
};
template<class K, class V, class COM = HASHFUNC<K>>
Hash<K, V, COM >::Hash()
: _size(0)
, _capacity(0)
, _tables(NULL)
, _status(NULL)
//, _tables(new KeyValue<K,V>[_capacity])
//, _status(new Status[_capacity])
{
/*memset(_tables, NULL, sizeof(KeyValue<K,V>)*_capacity);
memset(_status, EMPTY, sizeof(Status)*_capacity);*/
}
template<class K, class V, class COM = HASHFUNC<K>>
bool Hash<K, V, COM >::Insert(K key, V value)
{
_Capacity();//檢查容量,擴容
size_t index = HashFunc2(key,_capacity);//****************
//while (_status[index]!=EMPTY){
// if (_tables[index]._key == key)
// return false;
// index++;
//}
//while (_status[index] == EXIST){
// index++;
//}
_tables[index]._key = key;
_tables[index]._value = value;
_status[index] = EXIST;
++_size;
return true;
}
template<class K, class V, class COM = HASHFUNC<K>>
int Hash<K, V, COM >::_Find(K key)
{
size_t k = HashFunc(key, _capacity);
if (_tables[k]._key == key)
return k;
size_t i = 1;
size_t index = (k + i*i) % _capacity;
while (_tables[index]._key!=_tables[k]._key){
if (_status[index] == EXIST){
if (_tables[index]._key == key){
return index;
}
}
i++;
index = (k + i*i)%_capacity;
}
std::cout << "未找到key為" << key << "的資料" << std::endl;
return -1;
}
template<class K, class V, class COM = HASHFUNC<K>>
KeyValue<K, V >* Hash<K, V, COM >::Find(K key)
{
size_t index=_Find(key);
if (index != -1){
std::cout << "已找到key為" << key << "的資料,下标為" << index << std::endl;
return &_tables[index];
}
return NULL;
}
template<class K, class V, class COM = HASHFUNC<K>>
bool Hash<K, V, COM >::Remove(K key)
{
int index = _Find(key);
if (index== -1)
return false;
_status[index] = DELETE;
std::cout << "已删除key為"<<key<<"的資料" << std::endl;
return true;
}
template<class K, class V, class COM = HASHFUNC<K>>
void Hash<K, V, COM >::Print()
{
std::cout << "_tables:" << std::endl;
for (size_t i = 0; i < _capacity; ++i){
if (_status[i] == EXIST){
std::cout << " _tables[" << i << "]-->" << _tables[i]._key<<","<<_tables[i]._value << std::endl;
}
}
std::cout << std::endl;
}
template<class K, class V, class COM = HASHFUNC<K>>
Hash<K, V, COM >::~Hash()
{
for (size_t i = 0; i < _capacity; ++i){
if (_status[i] == EXIST)
_status[i] = DELETE;
}
delete[] _tables;
delete[] _status;
}
template<class K, class V>
std::ostream& operator<<(std::ostream &_cout, const KeyValue<K, V>& kv)
{
_cout <<kv._value << std::endl;
return _cout;
}
#pragma once
#include<vector>
#include<string>
template<class K>
struct HashFuncDefault
{
size_t operator()(const K& key,const size_t& capacity)
{
return key%capacity;
}
};
template<>
struct HashFuncDefault<std::string>
{
static size_t BKDRHash(const char * str)
{
unsigned int seed = 131; // 31 131 1313 13131 131313
unsigned int hash = 0;
while (*str)
{
hash = hash * seed + (*str++);
}
return (hash & 0x7FFFFFFF);
}
size_t operator()(const std::string& key, const size_t& capacity)
{
const char* str = key.c_str();
return BKDRHash(str)%capacity;
}
};
template<class K,class V>
struct KeyValue
{
KeyValue()
:_key(0)
, _value(0)
, _next(NULL)
{}
KeyValue(const K& key,const V& value)
:_key(key)
, _value(value)
, _next(NULL)
{}
K _key;
V _value;
KeyValue<K, V>* _next;
};
template<class K, class V, class HashFunc = HashFuncDefault<K>>
class HashTablesBucket
{
public:
HashTablesBucket();
HashTablesBucket(const HashTablesBucket<K,V,HashFunc>& hst);
HashTablesBucket<K,V,HashFunc>& operator=(const HashTablesBucket<K, V, HashFunc>& hst);
bool Insert(const K& key, const V& value);
KeyValue<K, V>* Find(const K& key);
bool Remove(const K& key);
void Print();
~HashTablesBucket();
protected:
void _Copy(const HashTablesBucket<K,V,HashFunc>& hsb)
{
_tables.resize(hsb._tables.size());
for (size_t i = 0; i < hsb._tables.size(); ++i){
KeyValue<K, V>*cur = hsb._tables[i];
while (cur){
KeyValue<K, V>* NewCur = new KeyValue<K, V>(cur->_key,cur->_value);
NewCur->_next = _tables[i];
_tables[i]= NewCur;
cur = cur->_next;
}
}
}
//檢查容量
void _Capacity()
{
const int _PrimeSize = 28;
static const unsigned long _PrimeList[_PrimeSize] =
{
53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
//擴容
size_t NewCapacity = _tables.size();
if (_size == _tables.size()){
for (size_t i = 0; i < _PrimeSize; ++i){
if (_tables.size()< _PrimeList[i]){
NewCapacity = _PrimeList[i];
break;
}
i++;
}
}
//說明有擴容,需把原來的資料哈希到新的表中
HashTablesBucket<K, V, HashFunc> NewHTB;
NewHTB._tables.resize(NewCapacity);
if (NewCapacity != _tables.size()){
for(size_t i = 0; i < _tables.size(); ++i){
KeyValue<K, V>* cur = _tables[i];
while (cur){
size_t index=HashTablesBucketFunc(cur->_key);
_tables[i] = cur->_next;
NewHTB._tables[index]= cur;
cur = _tables[i];
}
}
swap(NewHTB._tables, _tables);
}
}
//傳回下标
size_t HashTablesBucketFunc(const K& key)
{
return HashFunc()(key,_tables.size());
}
//析構函數
void _Deatructor(KeyValue<K,V>* cur)
{
while (cur){
KeyValue<K, V>* tmp = cur;
cur = cur->_next;
delete tmp;
tmp = NULL;
}
}
protected:
size_t _size;
std::vector<KeyValue<K, V>*> _tables;
};
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>::HashTablesBucket()
:_size(0)
{}
//拷貝構造
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>::HashTablesBucket(const HashTablesBucket<K, V, HashFunc>& hst)
: _size(hst._size)
{
_Copy(hst);
}
//指派操作符的重載
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>& HashTablesBucket<K, V, HashFunc>::operator=(const HashTablesBucket<K, V, HashFunc>& hst)
{
if (this != &hst){
for (size_t i = 0; i < _tables.size(); ++i){
_Deatructor(_tables[i]);
_tables[i] = NULL;
}
_size=hst._tables.size();
_Copy(hst);
}
return *this;
}
template<class K, class V, class HashFunc>
bool HashTablesBucket<K, V, HashFunc>::Insert(const K& key, const V& value)
{
_Capacity();
size_t index = HashTablesBucketFunc(key);//二次探測
KeyValue<K, V>* cur = _tables[index];
while (cur)
{
if (cur->_key == key){
return false;
}
cur = cur->_next;
}
KeyValue<K, V>* kv = new KeyValue<K, V>(key, value);
if (_tables[index]==NULL){
_tables[index] = kv;
}
else{
kv->_next = _tables[index]->_next;
_tables[index]->_next = kv;
}
_size++;
return true;
}
template<class K, class V, class HashFunc>
KeyValue<K, V>* HashTablesBucket<K, V, HashFunc>::Find(const K& key)
{
size_t index = HashTablesBucketFunc(key);
KeyValue<K, V>* cur = _tables[index];
while (cur){
if (cur->_key == key){
return cur;
}
cur = cur->_next;
}
return NULL;
}
template<class K, class V, class HashFunc>
bool HashTablesBucket<K, V, HashFunc>::Remove(const K& key)
{
KeyValue<K, V>* tmp = Find(key);
if (tmp == NULL){
std::cout << "未找到key為" << key << "的資料"<<std::endl;
return false;
}
size_t index = HashTablesBucketFunc(key);
KeyValue<K, V>* cur = _tables[index];
if (cur == NULL){
std::cout << "未找到key為" << key << "的資料" << std::endl;
return false;
}
if (cur->_key == key){
_tables[index] = cur->_next;
cur->_next = NULL;
delete cur;
cur = NULL;
--_size;
return true;
}
while (cur->_next){
if (cur->_next->_key == key){
KeyValue<K, V>* tmp = cur->_next;
cur->_next = cur->_next->_next;
tmp->_next = NULL;
return true;
}
cur = cur->_next;
}
std::cout << "未找到key為" << key << "的資料" << std::endl;
return false;
}
template<class K, class V, class HashFunc>
void HashTablesBucket<K, V, HashFunc>::Print()
{
std::cout << "_tables:" << std::endl;
for (size_t i = 0; i < _tables.size(); ++i){
KeyValue<K, V>* cur = _tables[i];
if (cur != NULL){
std::cout << " _tables[" << i << "]-->";
while (cur){
std::cout << cur->_key << "," << cur->_value << " ";
cur = cur->_next;
}
std::cout << std::endl;
}
}
}
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>::~HashTablesBucket()
{
for (size_t i = 0; i < _tables.size(); ++i){
KeyValue<K, V>* cur = _tables[i];
_Deatructor(cur);
_tables[i] = NULL;
}
}