HashTable,也叫哈希表/散列表,是一种通过key值直接访问在内存存储位置的数据结构
这种数据结构在插入、删除、查找的操作上具有“常数平均时间”的表现
哈希表最大的优点,就是把数据的存储和查找消耗的时间大大降低,几乎可以看成是常数时间;而代价仅仅是消耗比较多的内存。然而在当前可利用内存越来越多的情况下,用空间换时间的做法是值得的。
散列函数:使用某种映射函数,将大数映射为小数(所给键值映射为较小的方便作为下标的数字)
(<stl_hash_fun>中定义size_t __stl_hash_string(const char* s))
使用hash function可能会带来“碰撞”问题,解决这个问题有多种方法,包括线性探测、二次探测、开链法
负载因子:元素个数/表格大小 除了开链法,其它情况负载因子都在0-1
插入:利用hash function计算出元素插入位置,如果该位置的空间不可用,则往下寻找,直到找到一
个可用空间
查找:利用hash function计算出元素位置,如果该位置的元素不是我们所要找的元素,则往下寻找,
直到找到
删除:必须使用惰性删除,也就是只标记删除记号,实际删除操作则待表格重新整理时进行,因
为hashtable中的每一个元素不仅表述它自己,也关系到其它元素的排列
线性探测:H+i (i=0,1,2,3,4...)
二测探测:H+i^2 (i=0,1,2,3,4...)
开链法:在每一个表格元素维护一个list 虽然是线性操作,但如果每个list够短,速度还是够快
//_Capacity()中new不出空间
#pragma once
#include<string>
#include<ostream>
template<class K>
struct HASHFUNC
{
size_t operator()(const K& key, size_t capacity)
{
return key%capacity;
}
};
template<>
struct HASHFUNC<std::string>
{
size_t operator()(const std::string& s,int capacity)
{
const char* str = s.c_str();
return BKDRHash(str)%capacity;
}
static size_t BKDRHash(const char * str)
{
unsigned int seed = 131; // 31 131 1313 13131 131313
unsigned int hash = 0;
while (*str)
{
hash = hash * seed + (*str++);
}
return (hash & 0x7FFFFFFF);
}
};
//hashtable的删除采用的是惰性删除,也就是只标记删除记号,实际删除操作则待表格重新整理时进行,
//这是因为hashtable中每一个元素不仅表述它自己,也关系到其它元素的排列
enum Status
{
EMPTY,
EXIST,
DELETE
};
template<class K, class V>
struct KeyValue
{
KeyValue(const K key=K(), const V value=V())
:_key(key)
,_value(value)
{}
friend std::ostream& operator<<(std::ostream &_cout, const KeyValue<K, V>& kv);
K _key;
V _value;
};
template<class K, class V, class COM = HASHFUNC<K>>
class Hash
{
public:
Hash();
bool Insert(K key, V value);
KeyValue<K, V>* Find(K key);
bool Remove(K key);
void Print();
~Hash();
//friend std::ostream& operator<<(std::ostream &_cout, const KeyValue<K, V>& kv);
protected:
void _Capacity()
{
const int _PrimeSize = 28;
static const unsigned long _PrimeList[_PrimeSize] =
{
53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
//确定容量
size_t NewCapacity = _capacity;
if (NewCapacity == 0){
NewCapacity = _PrimeList[0];
}
else if (_size * 10 / NewCapacity >= 7){
for (int i = 0; i < _PrimeSize; ++i){
if (NewCapacity < _PrimeList[i]){
NewCapacity = _PrimeList[i];
break;
}
}
}
//把原表中的数据哈希到扩容后的表中
if ( NewCapacity != _capacity){
Status* NewStatus = new Status[NewCapacity];
KeyValue<K, V>* NewTables = new KeyValue<K, V>[NewCapacity];//************************new不出空间
memset(NewStatus, EMPTY, sizeof(NewStatus)*NewCapacity);
if (_capacity != 0){
for (size_t index = 0; index < _capacity; ++index)
{
if (_status[index] == EXIST){
size_t i = HashFunc(_tables[index]._key, _capacity);//得到该关键字对应的位置
NewTables[i]._key = _tables[index]._key;
NewTables[i]._value = _tables[index]._value;
NewStatus[i] = EXIST;
}
}
}
std::swap(NewTables, _tables);
std::swap(NewStatus, _status);
_capacity = NewCapacity;
}
}
size_t HashFunc(const K& key, size_t capacity)
{
return COM()(key, capacity);
}
size_t HashFunc2(const K& key,size_t capacity)
{
size_t i = 0;
size_t res = HashFunc(key, capacity);
while(_status[(res + (i*i)) % capacity] == EXIST){
i++;
}
return (res + (i*i)) % capacity;
}
int _Find(K key);
protected:
size_t _size;
size_t _capacity;
KeyValue<K,V>* _tables;
Status* _status; //哈希表位置的状态
};
template<class K, class V, class COM = HASHFUNC<K>>
Hash<K, V, COM >::Hash()
: _size(0)
, _capacity(0)
, _tables(NULL)
, _status(NULL)
//, _tables(new KeyValue<K,V>[_capacity])
//, _status(new Status[_capacity])
{
/*memset(_tables, NULL, sizeof(KeyValue<K,V>)*_capacity);
memset(_status, EMPTY, sizeof(Status)*_capacity);*/
}
template<class K, class V, class COM = HASHFUNC<K>>
bool Hash<K, V, COM >::Insert(K key, V value)
{
_Capacity();//检查容量,扩容
size_t index = HashFunc2(key,_capacity);//****************
//while (_status[index]!=EMPTY){
// if (_tables[index]._key == key)
// return false;
// index++;
//}
//while (_status[index] == EXIST){
// index++;
//}
_tables[index]._key = key;
_tables[index]._value = value;
_status[index] = EXIST;
++_size;
return true;
}
template<class K, class V, class COM = HASHFUNC<K>>
int Hash<K, V, COM >::_Find(K key)
{
size_t k = HashFunc(key, _capacity);
if (_tables[k]._key == key)
return k;
size_t i = 1;
size_t index = (k + i*i) % _capacity;
while (_tables[index]._key!=_tables[k]._key){
if (_status[index] == EXIST){
if (_tables[index]._key == key){
return index;
}
}
i++;
index = (k + i*i)%_capacity;
}
std::cout << "未找到key为" << key << "的数据" << std::endl;
return -1;
}
template<class K, class V, class COM = HASHFUNC<K>>
KeyValue<K, V >* Hash<K, V, COM >::Find(K key)
{
size_t index=_Find(key);
if (index != -1){
std::cout << "已找到key为" << key << "的数据,下标为" << index << std::endl;
return &_tables[index];
}
return NULL;
}
template<class K, class V, class COM = HASHFUNC<K>>
bool Hash<K, V, COM >::Remove(K key)
{
int index = _Find(key);
if (index== -1)
return false;
_status[index] = DELETE;
std::cout << "已删除key为"<<key<<"的数据" << std::endl;
return true;
}
template<class K, class V, class COM = HASHFUNC<K>>
void Hash<K, V, COM >::Print()
{
std::cout << "_tables:" << std::endl;
for (size_t i = 0; i < _capacity; ++i){
if (_status[i] == EXIST){
std::cout << " _tables[" << i << "]-->" << _tables[i]._key<<","<<_tables[i]._value << std::endl;
}
}
std::cout << std::endl;
}
template<class K, class V, class COM = HASHFUNC<K>>
Hash<K, V, COM >::~Hash()
{
for (size_t i = 0; i < _capacity; ++i){
if (_status[i] == EXIST)
_status[i] = DELETE;
}
delete[] _tables;
delete[] _status;
}
template<class K, class V>
std::ostream& operator<<(std::ostream &_cout, const KeyValue<K, V>& kv)
{
_cout <<kv._value << std::endl;
return _cout;
}
#pragma once
#include<vector>
#include<string>
template<class K>
struct HashFuncDefault
{
size_t operator()(const K& key,const size_t& capacity)
{
return key%capacity;
}
};
template<>
struct HashFuncDefault<std::string>
{
static size_t BKDRHash(const char * str)
{
unsigned int seed = 131; // 31 131 1313 13131 131313
unsigned int hash = 0;
while (*str)
{
hash = hash * seed + (*str++);
}
return (hash & 0x7FFFFFFF);
}
size_t operator()(const std::string& key, const size_t& capacity)
{
const char* str = key.c_str();
return BKDRHash(str)%capacity;
}
};
template<class K,class V>
struct KeyValue
{
KeyValue()
:_key(0)
, _value(0)
, _next(NULL)
{}
KeyValue(const K& key,const V& value)
:_key(key)
, _value(value)
, _next(NULL)
{}
K _key;
V _value;
KeyValue<K, V>* _next;
};
template<class K, class V, class HashFunc = HashFuncDefault<K>>
class HashTablesBucket
{
public:
HashTablesBucket();
HashTablesBucket(const HashTablesBucket<K,V,HashFunc>& hst);
HashTablesBucket<K,V,HashFunc>& operator=(const HashTablesBucket<K, V, HashFunc>& hst);
bool Insert(const K& key, const V& value);
KeyValue<K, V>* Find(const K& key);
bool Remove(const K& key);
void Print();
~HashTablesBucket();
protected:
void _Copy(const HashTablesBucket<K,V,HashFunc>& hsb)
{
_tables.resize(hsb._tables.size());
for (size_t i = 0; i < hsb._tables.size(); ++i){
KeyValue<K, V>*cur = hsb._tables[i];
while (cur){
KeyValue<K, V>* NewCur = new KeyValue<K, V>(cur->_key,cur->_value);
NewCur->_next = _tables[i];
_tables[i]= NewCur;
cur = cur->_next;
}
}
}
//检查容量
void _Capacity()
{
const int _PrimeSize = 28;
static const unsigned long _PrimeList[_PrimeSize] =
{
53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
//扩容
size_t NewCapacity = _tables.size();
if (_size == _tables.size()){
for (size_t i = 0; i < _PrimeSize; ++i){
if (_tables.size()< _PrimeList[i]){
NewCapacity = _PrimeList[i];
break;
}
i++;
}
}
//说明有扩容,需把原来的数据哈希到新的表中
HashTablesBucket<K, V, HashFunc> NewHTB;
NewHTB._tables.resize(NewCapacity);
if (NewCapacity != _tables.size()){
for(size_t i = 0; i < _tables.size(); ++i){
KeyValue<K, V>* cur = _tables[i];
while (cur){
size_t index=HashTablesBucketFunc(cur->_key);
_tables[i] = cur->_next;
NewHTB._tables[index]= cur;
cur = _tables[i];
}
}
swap(NewHTB._tables, _tables);
}
}
//返回下标
size_t HashTablesBucketFunc(const K& key)
{
return HashFunc()(key,_tables.size());
}
//析构函数
void _Deatructor(KeyValue<K,V>* cur)
{
while (cur){
KeyValue<K, V>* tmp = cur;
cur = cur->_next;
delete tmp;
tmp = NULL;
}
}
protected:
size_t _size;
std::vector<KeyValue<K, V>*> _tables;
};
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>::HashTablesBucket()
:_size(0)
{}
//拷贝构造
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>::HashTablesBucket(const HashTablesBucket<K, V, HashFunc>& hst)
: _size(hst._size)
{
_Copy(hst);
}
//赋值操作符的重载
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>& HashTablesBucket<K, V, HashFunc>::operator=(const HashTablesBucket<K, V, HashFunc>& hst)
{
if (this != &hst){
for (size_t i = 0; i < _tables.size(); ++i){
_Deatructor(_tables[i]);
_tables[i] = NULL;
}
_size=hst._tables.size();
_Copy(hst);
}
return *this;
}
template<class K, class V, class HashFunc>
bool HashTablesBucket<K, V, HashFunc>::Insert(const K& key, const V& value)
{
_Capacity();
size_t index = HashTablesBucketFunc(key);//二次探测
KeyValue<K, V>* cur = _tables[index];
while (cur)
{
if (cur->_key == key){
return false;
}
cur = cur->_next;
}
KeyValue<K, V>* kv = new KeyValue<K, V>(key, value);
if (_tables[index]==NULL){
_tables[index] = kv;
}
else{
kv->_next = _tables[index]->_next;
_tables[index]->_next = kv;
}
_size++;
return true;
}
template<class K, class V, class HashFunc>
KeyValue<K, V>* HashTablesBucket<K, V, HashFunc>::Find(const K& key)
{
size_t index = HashTablesBucketFunc(key);
KeyValue<K, V>* cur = _tables[index];
while (cur){
if (cur->_key == key){
return cur;
}
cur = cur->_next;
}
return NULL;
}
template<class K, class V, class HashFunc>
bool HashTablesBucket<K, V, HashFunc>::Remove(const K& key)
{
KeyValue<K, V>* tmp = Find(key);
if (tmp == NULL){
std::cout << "未找到key为" << key << "的数据"<<std::endl;
return false;
}
size_t index = HashTablesBucketFunc(key);
KeyValue<K, V>* cur = _tables[index];
if (cur == NULL){
std::cout << "未找到key为" << key << "的数据" << std::endl;
return false;
}
if (cur->_key == key){
_tables[index] = cur->_next;
cur->_next = NULL;
delete cur;
cur = NULL;
--_size;
return true;
}
while (cur->_next){
if (cur->_next->_key == key){
KeyValue<K, V>* tmp = cur->_next;
cur->_next = cur->_next->_next;
tmp->_next = NULL;
return true;
}
cur = cur->_next;
}
std::cout << "未找到key为" << key << "的数据" << std::endl;
return false;
}
template<class K, class V, class HashFunc>
void HashTablesBucket<K, V, HashFunc>::Print()
{
std::cout << "_tables:" << std::endl;
for (size_t i = 0; i < _tables.size(); ++i){
KeyValue<K, V>* cur = _tables[i];
if (cur != NULL){
std::cout << " _tables[" << i << "]-->";
while (cur){
std::cout << cur->_key << "," << cur->_value << " ";
cur = cur->_next;
}
std::cout << std::endl;
}
}
}
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>::~HashTablesBucket()
{
for (size_t i = 0; i < _tables.size(); ++i){
KeyValue<K, V>* cur = _tables[i];
_Deatructor(cur);
_tables[i] = NULL;
}
}