1.线性探测
2.二次探测
3.载荷因子
4.扩容
- 扩容一：
5.删除（查找，删除，判断状态）
6.插入特殊值时
- 初试（字符串）
- BKDR（解决字符串）
- BKDR之后的负数和int
- 默认的不需要传第三个参数
7.哈希冲突
8.开散列
- insert
- Erase
- 开散列整体代码
9.key对象的要求
10.套用map和set
- insert套用
- ++
- - UnorderedSet.h
  - UnorderedMap.h
  - HashTable.h
- map[]
- 拷贝构造
- 深拷贝
- 析构函数
11.延伸：扩容
12.最终代码
- UnorderedSet.h
- UnorderedSet.h
- HashTable.h

1.线性探测

bool Insert(const pair<K, V>& kv)
	{
		size_t start = kv.first % _tables.size();
		size_t i = 0;
		size_t index = start + i;
		while (_tables[index].first == EXITS)
		{
			i++;
			index = start + i;
			index %= _tables;
		}

		_tables[index]._kv = kv;
		_table[index]._status = EXIST;
		++_n;

	}

C++下“hash“的简单模拟实现以及实现中遇见的问题1.线性探测2.二次探测3.载荷因子4.扩容5.删除（查找，删除，判断状态）6.插入特殊值时7.哈希冲突8.开散列9.key对象的要求10.套用map和set11.延伸：扩容12.最终代码

2.二次探测

bool Insert(const pair<K, V>& kv)
	{
		size_t start = kv.first % _tables.size();
		size_t i = 0;
		size_t index = start + i;
		while (_tables[index].first == EXITS)
		{
			i++;
			index = start + i*i;
			index %= _tables;
		}

		_tables[index]._kv = kv;
		_table[index]._status = EXIST;
		++_n;

	}

3.载荷因子

引入：当线性探测和二次探测中，列表中的值满了，或者将近满的时候，会导致效率降低

问题：哈希表什么情况下进行扩容？如何扩容？

散列表的载荷因子定义为：α=填入表中的元素个数/散列表的长度

α 是散列表装满程度的标志因子，由于表长是定值，α与“填入表中的元素个数”成正比，所以α越大，表明填入表中的元素越多，产生冲突的可能性就越大；反之，α越小，表明填入表中的元素越少，产生冲突的可能性就越小，实际上，散列表的平均查找长度是载荷因子的α的函数，只是不同处理冲突的方法有不同的函数。

对于开放定址法，载荷因子是特别重要因素，应严格限制在 0.7到0.8以下，超多0.8，查表时的CPU缓存不命中按照指数曲线上升，因此，一些采用开放定址法的hash库，如Java的系统库限制了载荷因子为0.75，超过此值将resize散列表。

4.扩容

bool Insert(const pair<K, V>& kv)
	{
		//负载因子到0.7，扩容
		//负载因子越小，冲突概率越低，效率越高，空间浪费越多
		//负载因子越大，冲突概率越高，效率越低，空间浪费越少
		if (_tables.size()==0 || _n*10 / _tables.size() >= 7)
		{
			//扩容
			size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
			//如何扩容
		}

		//线性探测和二次探测代码省略。。。
	}

问题：当我们扩容时，原来的数据该如何？

扩容一：

#pragma once
#include<vector>

enum Status
{
	EXIST,
	EMPTY,
	DELETE
};

template<class K, class V>
struct HashData
{
	pair<K, V> _kv;
	Status _status = EMPTY;
};



template<class K, class V>
class HashTable
{
public:
	bool Insert(const pair<K, V>& kv)
	{
		//负载因子到0.7，扩容
		//负载因子越小，冲突概率越低，效率越高，空间浪费越多
		//负载因子越大，冲突概率越高，效率越低，空间浪费越少
		if (_tables.size() == 0 || _n * 10 / _tables.size() >= 7)
		{
			//扩容
			size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;

			HashTable<K, V> newHT;
			newHT._tables.resize(newSize);
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				if (_tables[i]._status == EXIST)
				{
					newHT.Insert(_tables[i]._kv);
				}
			}
			_tables.swap(newHT._tables);
		}

		size_t start = kv.first % _tables.size();
		size_t i = 0;
		size_t index = start;
		while (_tables[index]._status == EXIST)
		{
			++i;
			//index = start + i * i;//二次探测
			index = start + i;//线性探测
			index %= _tables.size();
		}

		_tables[index]._kv = kv;
		_tables[index]._status = EXIST;
		++_n;

		return true;
	}

private:
	vector<HashData<K, V>> _tables;
	size_t _n = 0;//有效数据个数
};

void TestHashTables()
{
	HashTable<int, int> ht;
	int a[] = { 2, 12, 22, 32, 42, 52, 62 };
	for (auto e : a)
	{
		ht.Insert(make_pair(e, e));
	}
	ht.Insert(make_pair(72, 72));

}

5.删除（查找，删除，判断状态）

#pragma once
#include<vector>

enum Status
{
	EXIST,
	EMPTY,
	DELETE
};

template<class K,class V>
struct HashData
{
	pair<K, V> _kv;
	Status _status = EMPTY;
};



template<class K, class V>
class HashTable
{
public:
	bool Erase(const K& key)
	{
		HashData<K, V>* ret = Find(key);
		if (ret)
		{
			ret->_status = DELETE;
			--_n;
			return true;
		}
		return false;
	}

	HashData<K, V>* Find(const K& key)
	{
		if (_tables.size() == 0)
			return nullptr;
		size_t start = key % _tables.size();
		size_t i = 0;
		size_t index = start;
		while (_tables[index]._status != EMPTY)
		{
			if (_tables[index]._kv.first == key && _tables[index]._status==EXIST)
			{
				return &_tables[index];
			}
			++i;
			//index = start + i * i;//二次探测
			index = start + i;//线性探测
			index %= _tables.size();
		}
		return nullptr;
	}


	bool Insert(const pair<K, V>& kv)
	{
		//负载因子到0.7，扩容
		//负载因子越小，冲突概率越低，效率越高，空间浪费越多
		//负载因子越大，冲突概率越高，效率越低，空间浪费越少
		if (_tables.size()==0 || _n*10 / _tables.size() >= 7)
		{
			//扩容
			size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;

			HashTable<K, V> newHT;
			newHT._tables.resize(newSize);
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				if (_tables[i]._status == EXIST)
				{
					newHT.Insert(_tables[i]._kv);
				}
			}
			_tables.swap(newHT._tables);
		}

		size_t start = kv.first % _tables.size();
		size_t i = 0;
		size_t index = start;
		while (_tables[index]._status == EXIST)
		{
			++i;
			//index = start + i * i;//二次探测
			index = start + i;//线性探测
			index %= _tables.size();
		}

		_tables[index]._kv = kv;
		_tables[index]._status = EXIST;
		++_n;

		return true;
	}

private:
	vector<HashData<K, V>> _tables;
	size_t _n=0;//有效数据个数
};

void TestHashTables()
{
	HashTable<int, int> ht;
	int a[] = { 2,12,22,32,42,52,62 }; 
	for (auto e : a)
	{
		ht.Insert(make_pair(e,e));
	}
	ht.Insert(make_pair(72, 72));
	ht.Insert(make_pair(42,42));//测试查找重复值

	cout << ht.Find(12) << endl;
	ht.Erase(12);
	cout << ht.Find(12) << endl;
}

6.插入特殊值时

初试（字符串）

当插入的是字符串时，

struct HashStr
{
	size_t operator()(const string& s)
	{
		size_t value = 0;
		for (auto ch : s)
		{
			value += ch;
		}
		return value;
	}
};

void Hashstring()
{
	HashStr hs;
	cout << hs("left") << endl;
	cout << hs("right") << endl;
	cout << hs("abcd") << endl;
	cout << hs("aadd") << endl;
	cout << hs("acbd") << endl;
}

缺点：

BKDR（解决字符串）

struct HashStr
{
	size_t operator()(const string& s)
	{
		//BKDR
		size_t value=0;
		for (auto ch : s)
		{
			value *= 31;
			value += ch;
		}
		return value;
	}
};
void Hashstring()
{
	HashStr hs;
	cout << hs("left") << endl;
	cout << hs("right") << endl;
	cout << hs("abcd") << endl;
	cout << hs("aadd") << endl;
	cout << hs("acbd") << endl;
}

BKDR之后的负数和int

/begin//

此处为相比之前的修改或难点

//end

#pragma once
#include<vector>

enum Status
{
	EXIST,
	EMPTY,
	DELETE
};

template<class K,class V>
struct HashData
{
	pair<K, V> _kv;
	Status _status = EMPTY;
};
//begin/
template<class K>
struct Hash
{
	size_t operator()(const K& key)
	{
		return key;
	}
};
//end/

struct HashStr
{
	size_t operator()(const string& s)
	{
		//BKDR
		size_t value=0;
		for (auto ch : s)
		{
			value *= 31;
			value += ch;
		}
		return value;
	}
};
/begin//
template<class K, class V, class HashFunc>
/end//
class HashTable
{
public:
	bool Erase(const K& key)
	{
		HashData<K, V>* ret = Find(key);
		if (ret)
		{
			ret->_status = DELETE;
			--_n;
			return true;
		}
		return false;
	}

	HashData<K, V>* Find(const K& key)
	{
		if (_tables.size() == 0)
			return nullptr;

		HashFunc hf;
		size_t start = hf(key) % _tables.size();
		size_t i = 0;
		size_t index = start;
		while (_tables[index]._status != EMPTY)
		{
			if (_tables[index]._kv.first == key && _tables[index]._status==EXIST)
			{
				return &_tables[index];
			}
			++i;
			//index = start + i * i;//二次探测
			index = start + i;//线性探测
			index %= _tables.size();
		}
		return nullptr;
	}


	bool Insert(const pair<K, V>& kv)
	{
		//负载因子到0.7，扩容
		//负载因子越小，冲突概率越低，效率越高，空间浪费越多
		//负载因子越大，冲突概率越高，效率越低，空间浪费越少
		if (_tables.size()==0 || _n*10 / _tables.size() >= 7)
		{
			//扩容
			size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;

			HashTable<K, V,HashFunc> newHT;
			newHT._tables.resize(newSize);
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				if (_tables[i]._status == EXIST)
				{
					newHT.Insert(_tables[i]._kv);
				}
			}
			_tables.swap(newHT._tables);
		}

		HashFunc hf;
		size_t start = hf(kv.first) % _tables.size();
		size_t i = 0;
		size_t index = start;
		while (_tables[index]._status == EXIST)
		{
			++i;
			//index = start + i * i;//二次探测
			index = start + i;//线性探测
			index %= _tables.size();
		}

		_tables[index]._kv = kv;
		_tables[index]._status = EXIST;
		++_n;

		return true;
	}

private:
	vector<HashData<K, V>> _tables;
	size_t _n=0;//有效数据个数
};

void test3()
{
	Hash<int> hint;
	cout << hint(-9) << endl;
	cout << hint(9) << endl;
}

当传入int时，也可以隐式类型转化

默认的不需要传第三个参数

采用特化

//特化
template<>
struct Hash<string>
{
	size_t operator()(const string& s)
	{
		//BKDR
		size_t value = 0;
		for (auto ch : s)
		{
			value *= 31;
			value += ch;
		}
		return value;
	}

};

//struct HashStr
//{
//	size_t operator()(const string& s)
//	{
//		//BKDR
//		size_t value=0;
//		for (auto ch : s)
//		{
//			value *= 31;
//			value += ch;
//		}
//		return value;
//	}
//};

//...

void test4()
{
	HashTable<string, string> ht;
	ht.Insert(make_pair("red", "红色"));
	ht.Insert(make_pair("blue", "蓝色"));
}

7.哈希冲突

闭散列/开放定址法：不同位置冲突的数据会互相影响，相邻位置的冲突会争抢位置，互相影响效率。

优化

开散列/拉链法/哈希桶：相邻位置冲突，不会再争抢位置，不会互相影响

控制负载因子–负载因子到了就扩容

极端场景防止一个桶位置冲突太多，链表太长

当一个桶长度超过一定值以后，转化红黑树

补充：不仅可以尾插，也可以头插，头插更方便点。

8.开散列

思路参考上一个标题“哈希冲突”，不同点：这里是头插

insert

namespace LinkHash
{
	template<class K,class V>
	struct HashNode
	{
		pair<K, V> _kv;
		HashNode<K, V>* _next = nullptr;

		HashNode(const pair<K,V>& kv)
			:_kv(kv)
			,_next(nullptr)
		{}
	};
	template<class K>
	struct Hash
	{
		size_t operator()(const K& key)
		{
			return key;
		}
	};

	//特化
	template<>
	struct Hash<string>
	{
		size_t operator()(const string& s)
		{
			//BKDR
			size_t value = 0;
			for (auto ch : s)
			{
				value *= 31;
				value += ch;
			}
			return value;
		}

	};


	template<class K,class V,class HashFunc = Hash<K>>
	class HashTable
	{
		typedef HashNode<K, V> Node;
	public:

		Node* Find(const K& key)
		{
			if (_tables.empty())
				return nullptr;

			HashFunc hf;
			size_t index = hf(key) % _tables.size();
			Node* cur = _tables[index];
			while (cur)
			{
				if (cur->_kv.first == key)
					return cur;
				cur = cur->_next;
			}
			return nullptr;
		}
		bool Insert(const pair<K, V>& kv)
		{
			Node* ret = Find(kv.first);
			if (ret)
				return false;
			if (_n ==  _tables.size())
			{
				size_t newSize = _tables.size() == 0 ? 10 : 2 * _tables.size();

				vector<Node*> newTables;
				newTables.resize(newSize);
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while(cur)
					{
						Node* next = cur->_next;
						HashFunc hf;
						size_t index = hf(cur->_kv.first) % newTables.size();
						//头插，效率比较高
						cur->_next = newTables[index];
						newTables[index] = cur;

						cur = next;

					}
					_tables[i] = nullptr;
				}
				newTables.swap(_tables);
			}
			HashFunc hf;
			size_t index = hf(kv.first) % _tables.size();
			Node* newnode = new Node(kv);
			//头插
			newnode->_next = _tables[index];
			_tables[index] = newnode;

			++_n;
			return true;
		}

	private:
		vector<Node*> _tables;
		size_t _n=0;//有效数据个数
	};

	void TestHashTable()
	{
		int a[] = { 2,12,22,32,42,52,62,72,82,5,15,25};
		HashTable<int, int> ht;
		for (auto e : a)
		{
			ht.Insert(make_pair(e,e));
		}

		ht.Insert(make_pair(1, 1));
	}
}

Erase

bool Erase(const K& key)
		{
			if (_tables.empty())
				return false;

			HashFunc hf;
			size_t index = hf(key) % _tables.size();
			Node* prev = nullptr;
			Node* cur = _tables[index];
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					if (prev == nullptr) {
						//头删
						_tables[index] = cur->_next;
					}
					else {
						//中间删除and尾删
						prev->_next = cur->_next;
					}
					--_n;
					delete cur;
					return true;
				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}
			return false;
		}

开散列整体代码

namespace LinkHash
{
	template<class K,class V>
	struct HashNode
	{
		pair<K, V> _kv;
		HashNode<K, V>* _next = nullptr;

		HashNode(const pair<K,V>& kv)
			:_kv(kv)
			,_next(nullptr)
		{}
	};
	template<class K>
	struct Hash
	{
		size_t operator()(const K& key)
		{
			return key;
		}
	};

	//特化
	template<>
	struct Hash<string>
	{
		size_t operator()(const string& s)
		{
			//BKDR
			size_t value = 0;
			for (auto ch : s)
			{
				value *= 31;
				value += ch;
			}
			return value;
		}

	};


	template<class K,class V,class HashFunc = Hash<K>>
	class HashTable
	{
		typedef HashNode<K, V> Node;
	public:
		bool Erase(const K& key)
		{
			if (_tables.empty())
				return false;

			HashFunc hf;
			size_t index = hf(key) % _tables.size();
			Node* prev = nullptr;
			Node* cur = _tables[index];
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					if (prev == nullptr) {
						//头删
						_tables[index] = cur->_next;
					}
					else {
						//中间删除and尾删
						prev->_next = cur->_next;
					}
					--_n;
					delete cur;
					return true;
				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}
			return false;
		}

		Node* Find(const K& key)
		{
			if (_tables.empty())
				return nullptr;

			HashFunc hf;
			size_t index = hf(key) % _tables.size();
			Node* cur = _tables[index];
			while (cur)
			{
				if (cur->_kv.first == key)
					return cur;
				cur = cur->_next;
			}
			return nullptr;
		}
		bool Insert(const pair<K, V>& kv)
		{
			Node* ret = Find(kv.first);
			if (ret)
				return false;
			if (_n ==  _tables.size())
			{
				size_t newSize = _tables.size() == 0 ? 10 : 2 * _tables.size();

				vector<Node*> newTables;
				newTables.resize(newSize);
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while(cur)
					{
						Node* next = cur->_next;
						HashFunc hf;
						size_t index = hf(cur->_kv.first) % newTables.size();
						//头插，效率比较高
						cur->_next = newTables[index];
						newTables[index] = cur;

						cur = next;

					}
					_tables[i] = nullptr;
				}
				newTables.swap(_tables);
			}
			HashFunc hf;
			size_t index = hf(kv.first) % _tables.size();
			Node* newnode = new Node(kv);
			//头插
			newnode->_next = _tables[index];
			_tables[index] = newnode;

			++_n;
			return true;
		}

	private:
		vector<Node*> _tables;
		size_t _n=0;//有效数据个数
	};

	void TestHashTable()
	{
		int a[] = { 2,12,22,32,42,52,62,72,82,5,15,25};
		HashTable<int, int> ht;
		for (auto e : a)
		{
			ht.Insert(make_pair(e,e));
		}

		ht.Insert(make_pair(1, 1));
	}
}

9.key对象的要求

10.套用map和set

insert套用

#pragma once
#include"HashTable.h"

namespace sakeww
{
	template<class K,class hash= Hash<K>>
	class unordered_set
	{
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};
	public:
		bool insert(const K& key)
		{
			return _ht.Insert(key);
		}

	private:
		LinkHash::HashTable<K, K,SetKeyOfT,hash> _ht;
	};
}

#pragma once
#include"HashTable.h"

namespace sakeww
{
	template<class K,class V,class hash = Hash<K>>
	class unordered_map
	{
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};

		bool insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);
		} 

	private:
		LinkHash::HashTable<K, pair<K, V>,MapKeyOfT,hash> _ht;
	};
}

++

困难点：

UnorderedSet ，HashTale二者要互相调用对方的命名空间

++的模拟实现

因为使用的是单向链表，所以只能++，不能减减

UnorderedSet.h

#pragma once
#include"HashTable.h"

namespace sakeww
{
	template<class K,class hash= Hash<K>>
	class unordered_set
	{
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};
	public:
		typedef typename LinkHash::HashTable<K, K, SetKeyOfT, hash>::iterator iterator;
		iterator begin()
		{
			return _ht.begin();
		}
		iterator end()
		{
			return _ht.end();
		}
		
		bool insert(const K& key)
		{
			return _ht.Insert(key);
		}

	private:
		LinkHash::HashTable<K, K,SetKeyOfT,hash> _ht;
	};
	
	void test2()
	{
		unordered_set<int> us;
		us.insert(2);
		us.insert(22);
		us.insert(12);
		us.insert(32);
		us.insert(42);
		us.insert(2);
		us.insert(33);
		us.insert(45);

		unordered_set<int>::iterator it = us.begin();
		while (it != us.end())
		{
			cout << *it << " ";
			++it;
		}
		cout << endl;
	}
}

UnorderedMap.h

#pragma once
#include "HashTable.h"

namespace sakeww
{
	template<class K, class V, class hash = Hash<K>>
	class unordered_map
	{
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};
	public:
		typedef typename LinkHash::HashTable<K, pair<K, V>, MapKeyOfT, hash>::iterator iterator;
		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}
		
		pair<iterator,bool> insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);
		}

	private:
		LinkHash::HashTable<K, pair<K, V>, MapKeyOfT, hash> _ht;
	};
	void testMap()
	{
		unordered_map<string, string> dict;
		dict.insert(make_pair("sort", "排序"));
		dict.insert(make_pair("string", "字符串"));
		dict.insert(make_pair("map", "地图"));
		cout << dict["sort"] << endl;//充当查找
		cout << dict["left"] << endl;//插入
		dict["right"] = "右边";//插入和修改

		unordered_map<string, string>::iterator it = dict.begin();
		while (it != dict.end())
		{
			cout << it->first << ":" << it->second << endl;
			++it;
		}
		cout << endl;
	}
}

HashTable.h

namespace LinkHash
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		HashNode(const T& data)
			:_data(data)
			,_next(nullptr)
		{}
	};
begin//
	template<class K, class T, class KeyOfT, class HashFunc>
	class HashTable;
///end///
	template<class K, class T, class Ref, class Ptr, class KeyOfT, class HashFunc>
	struct __HTIterator
	{
		typedef HashNode<T> Node;
		typedef __HTIterator<K, T, Ref, Ptr, KeyOfT, HashFunc> Self;
		 
		Node* _node; 
		HashTable<K, T, KeyOfT, HashFunc>* _pht;

		__HTIterator(Node* node, HashTable<K, T, KeyOfT, HashFunc>* pht)
			:_node(node)
			,_pht(pht)
		{}

		Ref& operator*()
		{
			return _node->_data;
		}
		Ptr& operator->()
		{
			return &_node->_data;
		}

		Self& operator++()
		{
			if (_node->_next)
			{
				_node = _node->_next;
			}
			else
			{
				KeyOfT kot;
				HashFunc hf;
				size_t index = hf(kot(_node->_data)) % _pht->_tables.size();
				++index;

				// 找下一个不为空的桶
				while (index< _pht->_tables.size()) //_pht->_tables.size()表的大小
				{
					if (_pht->_tables[index])
					{
						break;
					}
					else
						++index;
				}

				if (index == _pht->_tables.size())
				{
					_node = nullptr;
				}
				else
				{
					_node = _pht->_tables[index];
				}
			}
			return *this;
		}

		bool operator!=(const Self& s) const
		{
			return _node != s._node;
		}

		bool operator==(const Self& s) const
		{
			return _node == s._node;
		}
	};



	template<class K,class T,class KeyOfT,class HashFunc>
	class HashTable
	{
		typedef HashNode<T> Node;
/begin/
		template<class K, class T, class Ref, class Ptr, class KeyOfT, class HashFunc>
		friend struct __HTIterator;
//end
	public:
		typedef __HTIterator<K, T, T&, T*, KeyOfT, HashFunc> iterator;

		iterator begin()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				if (_tables[i])
				{
					return iterator(_tables[i],this);
				}
			}
			return end();
		}
		iterator end()
		{
			return iterator(nullptr, this);
		}

		bool Erase(const K& key)
		{
			if (_tables.empty())
				return false;

			HashFunc hf;
			size_t index = hf(key) % _tables.size();
			Node* prev = nullptr;
			Node* cur = _tables[index];
			KeyOfT kot;
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					if (prev == nullptr) {
						//头删
						_tables[index] = cur->_next;
					}
					else {
						//中间删除and尾删
						prev->_next = cur->_next;
					}
					--_n;
					delete cur;
					return true;
				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}
			return false;
		}

		Node* Find(const K& key)
		{
			if (_tables.empty())
				return nullptr;

			HashFunc hf;
			size_t index = hf(key) % _tables.size();
			Node* cur = _tables[index];
			KeyOfT kot;
			while (cur)
			{
				if (kot(cur->_data) == key)
					return cur;
				cur = cur->_next;
			}
			return nullptr;
		}
		bool Insert(const T& data)
		{
			KeyOfT kot;
			Node* ret = Find(kot(data));
			if (ret)
				return false;
			if (_n ==  _tables.size())
			{
				size_t newSize = _tables.size() == 0 ? 10 : 2 * _tables.size();

				vector<Node*> newTables;
				newTables.resize(newSize);
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while(cur)
					{
						Node* next = cur->_next;
						HashFunc hf;
						size_t index = hf(kot(cur->_data)) % newTables.size();
						//头插，效率比较高
						cur->_next = newTables[index];
						newTables[index] = cur;

						cur = next;

					}
					_tables[i] = nullptr;
				}
				newTables.swap(_tables);
			}
			HashFunc hf;
			size_t index = hf(kot(data)) % _tables.size();
			Node* newnode = new Node(data);
			//头插
			newnode->_next = _tables[index];
			_tables[index] = newnode;

			++_n;
			return true;
		}

	private:
		vector<Node*> _tables;
		size_t _n=0;//有效数据个数
	};


}

map[]

V& operator[](const K& key)
		{
			auto ret = _ht.Insert(make_pair(key, V()));
			return ret.first->second;
		}

将返回值改为迭代器

iterator Find(const K& key)
		{
			if (_tables.empty())
				return end();

			HashFunc hf;
			size_t index = hf(key) % _tables.size();
			Node* cur = _tables[index];
			KeyOfT kot;
			while (cur)
			{
				if (kot(cur->_data) == key)
					return iterator(cur, this);
				else
					cur = cur->_next;
			}
			return end();
		}

将返回值改为 pair<iterator,bool> 类型

pair<iterator,bool> Insert(const T& data)
		{
			KeyOfT kot;
			iterator ret = Find(kot(data));
			if (ret != end())
				return make_pair(ret,false);
			if (_n ==  _tables.size())
			{
				size_t newSize = _tables.size() == 0 ? 10 : 2 * _tables.size();

				vector<Node*> newTables;
				newTables.resize(newSize);
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while(cur)
					{
						Node* next = cur->_next;
						HashFunc hf;
						size_t index = hf(kot(cur->_data)) % newTables.size();
						//头插，效率比较高
						cur->_next = newTables[index];
						newTables[index] = cur;

						cur = next;

					}
					_tables[i] = nullptr;
				}
				newTables.swap(_tables);
			}
			HashFunc hf;
			size_t index = hf(kot(data)) % _tables.size();
			Node* newnode = new Node(data);
			//头插
			newnode->_next = _tables[index];
			_tables[index] = newnode;

			++_n;
			return make_pair(iterator(newnode,this), true);
		}

拷贝构造

HashTable(const HashTable<K, T, KeyOfT, HashFunc>& ht)
		{
			_tables.resize(ht._tables.size());
			for (size_t i = 0; i < ht._tables.size(); ++i)
			{
				Node* cur = ht._tables[i];
				while (cur)
				{
					Node* copy = new Node(cur->_data);
					copy->_next = _tables[i];
					_tables[i] = copy;
					cur = cur->_next;
				}
			}
		}

因为自己已经写了拷贝构造函数，所以系统就不会默认生成一份拷贝构造函数

那么

private:
		vector<Node*> _tables;
		size_t _n=0;//有效数据个数

系统就不会默认生成一份，

解决：

//方法一：
		//初始化列表，
		//HashTable()
		//{}

		//方法二：
		//显示指定编译器去生成默认构造函数
		HashTable() = default;

		HashTable(const HashTable<K, T, KeyOfT, HashFunc>& ht)
		{
			_tables.resize(ht._tables.size());
			for (size_t i = 0; i < ht._tables.size(); ++i)
			{
				Node* cur = ht._tables[i];
				while (cur)
				{
					Node* copy = new Node(cur->_data);
					copy->_next = _tables[i];
					_tables[i] = copy;
					cur = cur->_next;
				}
			}
		}

深拷贝

//可以不写模板参数，拷贝构造和赋值可以不写
		//const HashTable<K, T, KeyOfT, HashFunc>& operator=(const HashTable<K, T, KeyOfT, HashFunc>)
		Self& operator=(Self copy)
		{
			swap(_n, copy._n);
			_tables.swap(copy._tables);
			return *this;
		}

析构函数

~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_tables[i] = nullptr;
			}
		}

11.延伸：扩容

hash的大小为素数最好（大佬证明的）

那么扩容时，应该如何扩容？

size_t GetNextPrime(size_t num)
		{
			static const unsigned long __stl_prime_list[28] =
			{
				53, 97, 193, 389, 769,
				1543, 3079, 6151, 12289, 24593,
				49157, 98317, 196613, 393241, 786433,
				1572869, 3145739, 6291469, 12582917, 25165843,
				50331653, 100663319, 201326611, 402653189, 805306457,
				1610612741, 3221225473, 4294967291
			};

			for (size_t i = 0; i < 28; ++i)
			{
				if (__stl_prime_list[i] > num)
				{
					return __stl_prime_list[i];
				}
			}

			return __stl_prime_list[27];
		}

		pair<iterator, bool> Insert(const T& data)
		{
			KeyOfT kot;
			iterator ret = Find(kot(data));
			if (ret != end())
				return make_pair(ret, false);

			HashFunc hf;
			// 负载因子 == 1时扩容
			if (_n == _tables.size())
			{
				//size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				size_t newSize = GetNextPrime(_tables.size());
				
				if (newSize > _tables.size())
				{
					vector<Node*> newTables;
					newTables.resize(newSize);
					for (size_t i = 0; i < _tables.size(); ++i)
					{
						Node* cur = _tables[i];
						while (cur)
						{
							Node* next = cur->_next;

							size_t index = hf(kot(cur->_data)) % newTables.size();
							// 头插
							cur->_next = newTables[index];
							newTables[index] = cur;

							cur = next;
						}

						_tables[i] = nullptr;
					}

					_tables.swap(newTables);
				}
			}


			size_t index = hf(kot(data)) % _tables.size();
			Node* newnode = new Node(data);
			// 头插
			newnode->_next = _tables[index];
			_tables[index] = newnode;

			++_n;
			return make_pair(iterator(newnode, this), false);;
		}

12.最终代码

UnorderedSet.h

#pragma once
#include"HashTable.h"

namespace sakeww
{
	template<class K,class hash= Hash<K>>
	class unordered_set
	{
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};
	public:
		typedef typename LinkHash::HashTable<K, K, SetKeyOfT, hash>::iterator iterator;
		iterator begin()
		{
			return _ht.begin();
		}
		iterator end()
		{
			return _ht.end();
		}
		
		pair<iterator, bool> insert(const K& key)
		{
			return _ht.Insert(key);
		}

	private:
		LinkHash::HashTable<K, K,SetKeyOfT,hash> _ht;
	};

	void test1()
	{
		unordered_set<int> us;
		us.insert(2);
		us.insert(22);
		us.insert(12);
		us.insert(32);
		us.insert(42);
		us.insert(2);
		us.insert(33);
		us.insert(45);
		
		unordered_set<string> uss;
		uss.insert("left");
		uss.insert("right");
		uss.insert("red");
	}
	void test2()
	{
		unordered_set<int> us;
		us.insert(2);
		us.insert(22);
		us.insert(12);
		us.insert(32);
		us.insert(42);
		us.insert(2);
		us.insert(33);
		us.insert(45);

		unordered_set<int>::iterator it = us.begin();
		while (it != us.end())
		{
			cout << *it << " ";
			++it;
		}
		cout << endl;


		unordered_set<int> copy(us);
		unordered_set<int>::iterator cit = copy.begin();
		while (cit != us.end())
		{
			cout << *cit << " ";
			++cit;
		}
		cout << endl;
	}
}

UnorderedSet.h

#pragma once
#include "HashTable.h"

namespace sakeww
{
	template<class K, class V, class hash = Hash<K>>
	class unordered_map
	{
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};
	public:
		typedef typename LinkHash::HashTable<K, pair<K, V>, MapKeyOfT, hash>::iterator iterator;
		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

		V& operator[](const K& key)
		{
			auto ret = _ht.Insert(make_pair(key, V()));
			return ret.first->second;
		}

		pair<iterator,bool> insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);
		}

	private:
		LinkHash::HashTable<K, pair<K, V>, MapKeyOfT, hash> _ht;
	};
	void testMap()
	{
		unordered_map<string, string> dict;
		dict.insert(make_pair("sort", "排序"));
		dict.insert(make_pair("string", "字符串"));
		dict.insert(make_pair("map", "地图"));
		cout << dict["sort"] << endl;//充当查找
		cout << dict["left"] << endl;//插入
		dict["right"] = "右边";//插入和修改

		unordered_map<string, string>::iterator it = dict.begin();
		while (it != dict.end())
		{
			cout << it->first << ":" << it->second << endl;
			++it;
		}
		cout << endl;

		unordered_map<string, string> copy(dict);
		for (auto& kv : copy)
		{
			cout << kv.first <<":" << kv.second << endl;
		}

	}
}

HashTable.h

#pragma once
#include<vector>

namespace CloseHash
{
	enum Status
	{
		EXIST,
		EMPTY,
		DELETE
	};

	template<class K, class V>
	struct HashData
	{
		pair<K, V> _kv;
		Status _status = EMPTY;
	};

	template<class K>
	struct Hash
	{
		size_t operator()(const K& key)
		{
			return key;
		}
	};

	//特化
	template<>
	struct Hash<string>
	{
		size_t operator()(const string& s)
		{
			//BKDR
			size_t value = 0;
			for (auto ch : s)
			{
				value *= 31;
				value += ch;
			}
			return value;
		}

	};

	//struct HashStr
	//{
	//	size_t operator()(const string& s)
	//	{
	//		//BKDR
	//		size_t value=0;
	//		for (auto ch : s)
	//		{
	//			value *= 31;
	//			value += ch;
	//		}
	//		return value;
	//	}
	//};

	template<class K, class V, class HashFunc = Hash<K>>
	class HashTable
	{
	public:
		bool Erase(const K& key)
		{
			HashData<K, V>* ret = Find(key);
			if (ret)
			{
				ret->_status = DELETE;
				--_n;
				return true;
			}
				
			return false;
		}

		HashData<K, V>* Find(const K& key)
		{
			if (_tables.size() == 0)
				return nullptr;

			HashFunc hf;
			size_t start = hf(key) % _tables.size();
			size_t i = 0;
			size_t index = start;
			while (_tables[index]._status != EMPTY)
			{
				if (_tables[index]._kv.first == key && _tables[index]._status == EXIST)
				{
					return &_tables[index];
				}
				++i;
				//index = start + i * i;//二次探测
				index = start + i;//线性探测
				index %= _tables.size();
			}
			return nullptr;
		}


		bool Insert(const pair<K, V>& kv)
		{
			//负载因子到0.7，扩容
			//负载因子越小，冲突概率越低，效率越高，空间浪费越多
			//负载因子越大，冲突概率越高，效率越低，空间浪费越少
			if (_tables.size() == 0 || _n * 10 / _tables.size() >= 7)
			{
				//扩容
				size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;

				HashTable<K, V, HashFunc> newHT;
				newHT._tables.resize(newSize);
				for (size_t i = 0; i < _tables.size(); ++i)
				{
					if (_tables[i]._status == EXIST)
					{
						newHT.Insert(_tables[i]._kv);
					}
				}
				_tables.swap(newHT._tables);
			}

			HashFunc hf;
			size_t start = hf(kv.first) % _tables.size();
			size_t i = 0;
			size_t index = start;
			while (_tables[index]._status == EXIST)
			{
				++i;
				//index = start + i * i;//二次探测
				index = start + i;//线性探测
				index %= _tables.size();
			}

			_tables[index]._kv = kv;
			_tables[index]._status = EXIST;
			++_n;

			return true;
		}

	private:
		vector<HashData<K, V>> _tables;
		size_t _n = 0;//有效数据个数
	};

	void TestHashTables()
	{
		HashTable<int, int> ht;
		int a[] = { 2,12,22,32,42,52,62 };
		for (auto e : a)
		{
			ht.Insert(make_pair(e, e));
		}
		ht.Insert(make_pair(72, 72));
		ht.Insert(make_pair(42, 42));

		cout << ht.Find(12) << endl;
		ht.Erase(12);
		cout << ht.Find(12) << endl;

		ht.Insert(make_pair(-1, -1));
		ht.Insert(make_pair(-5, -5));
	}

	//void Hashstring()
	//{
	//	HashStr hs;
	//	cout << hs("left") << endl;
	//	cout << hs("right") << endl;
	//	cout << hs("abcd") << endl;
	//	cout << hs("aadd") << endl;
	//	cout << hs("acbd") << endl;
	//
	//	HashTable<string, string, HashStr> ht;
	//	ht.Insert(make_pair("red", "红色"));
	//	ht.Insert(make_pair("blue", "蓝色"));
	//}

	void test3()
	{
		Hash<int> hint;
		cout << hint(-9) << endl;
		cout << hint(9) << endl;
	}

	void test4()
	{
		HashTable<string, string> ht;
		ht.Insert(make_pair("red", "红色"));
		ht.Insert(make_pair("blue", "蓝色"));
	}
}

template<class K>
struct Hash
{
	size_t operator()(const K& key)
	{
		return key;
	}
};

//特化
template<>
struct Hash<string>
{
	size_t operator()(const string& s)
	{
		//BKDR
		size_t value = 0;
		for (auto ch : s)
		{
			value *= 31;
			value += ch;
		}
		return value;
	}

};

namespace LinkHash
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		HashNode(const T& data)
			:_data(data)
			,_next(nullptr)
		{}
	};

	template<class K, class T, class KeyOfT, class HashFunc>
	class HashTable;

	template<class K, class T, class Ref, class Ptr, class KeyOfT, class HashFunc>
	struct __HTIterator
	{
		typedef HashNode<T> Node;
		typedef __HTIterator<K, T, Ref, Ptr, KeyOfT, HashFunc> Self;
		 
		Node* _node; 
		HashTable<K, T, KeyOfT, HashFunc>* _pht;

		__HTIterator(Node* node, HashTable<K, T, KeyOfT, HashFunc>* pht)
			:_node(node)
			,_pht(pht)
		{}

		Ref& operator*()
		{
			return _node->_data;
		}
		Ptr operator->()
		{
			return &_node->_data;
		}

		Self& operator++()
		{
			if (_node->_next)
			{
				_node = _node->_next;
			}
			else
			{
				KeyOfT kot;
				HashFunc hf;
				size_t index = hf(kot(_node->_data)) % _pht->_tables.size();
				++index;

				// 找下一个不为空的桶
				while (index< _pht->_tables.size()) //_pht->_tables.size()表的大小
				{
					if (_pht->_tables[index])
					{
						break;
					}
					else
						++index;
				}

				if (index == _pht->_tables.size())
				{
					_node = nullptr;
				}
				else
				{
					_node = _pht->_tables[index];
				}
			}
			return *this;
		}

		bool operator!=(const Self& s) const
		{
			return _node != s._node;
		}

		bool operator==(const Self& s) const
		{
			return _node == s._node;
		}
	};



	template<class K,class T,class KeyOfT,class HashFunc>
	class HashTable
	{
		typedef HashNode<T> Node;

		template<class K, class T, class Ref, class Ptr, class KeyOfT, class HashFunc>
		friend struct __HTIterator; 

		typedef HashTable<K, T, KeyOfT, HashFunc> Self;
	public:
		typedef __HTIterator<K, T, T&, T*, KeyOfT, HashFunc> iterator;

		//初始化列表，
		//HashTable()
		//{}

		//显示指定编译器去生成默认构造函数
		HashTable() = default;

		HashTable(const HashTable<K, T, KeyOfT, HashFunc>& ht)
		{
			_tables.resize(ht._tables.size());
			for (size_t i = 0; i < ht._tables.size(); ++i)
			{
				Node* cur = ht._tables[i];
				while (cur)
				{
					Node* copy = new Node(cur->_data);
					copy->_next = _tables[i];
					_tables[i] = copy;
					cur = cur->_next;
				}
			}
		}

		//可以不写模板参数，拷贝构造和赋值可以不写
		//const HashTable<K, T, KeyOfT, HashFunc>& operator=(const HashTable<K, T, KeyOfT, HashFunc>)
		Self& operator=(Self copy)
		{
			swap(_n, copy._n);
			_tables.swap(copy._tables);
			return *this;
		}

		~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_tables[i] = nullptr;
			}
		}


		iterator begin()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				if (_tables[i])
				{
					return iterator(_tables[i],this);
				}
			}
			return end();
		}
		iterator end()
		{
			return iterator(nullptr, this);
		}

		bool Erase(const K& key)
		{
			if (_tables.empty())
				return false;

			HashFunc hf;
			size_t index = hf(key) % _tables.size();
			Node* prev = nullptr;
			Node* cur = _tables[index];
			KeyOfT kot;
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					if (prev == nullptr) {
						//头删
						_tables[index] = cur->_next;
					}
					else {
						//中间删除and尾删
						prev->_next = cur->_next;
					}
					--_n;
					delete cur;
					return true;
				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}
			return false;
		}

		iterator Find(const K& key)
		{
			if (_tables.empty())
				return end();

			HashFunc hf;
			size_t index = hf(key) % _tables.size();
			Node* cur = _tables[index];
			KeyOfT kot;
			while (cur)
			{
				if (kot(cur->_data) == key)
					return iterator(cur, this);
				else
					cur = cur->_next;
			}
			return end();
		}
		size_t GetNextPrime(size_t num)
		{
			static const unsigned long __stl_prime_list[28] =
			{
				53, 97, 193, 389, 769,
				1543, 3079, 6151, 12289, 24593,
				49157, 98317, 196613, 393241, 786433,
				1572869, 3145739, 6291469, 12582917, 25165843,
				50331653, 100663319, 201326611, 402653189, 805306457,
				1610612741, 3221225473, 4294967291
			};

			for (size_t i = 0; i < 28; ++i)
			{
				if (__stl_prime_list[i] > num)
				{
					return __stl_prime_list[i];
				}
			}

			return __stl_prime_list[27];
		}

		pair<iterator, bool> Insert(const T& data)
		{
			KeyOfT kot;
			iterator ret = Find(kot(data));
			if (ret != end())
				return make_pair(ret, false);

			HashFunc hf;
			// 负载因子 == 1时扩容
			if (_n == _tables.size())
			{
				//size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				size_t newSize = GetNextPrime(_tables.size());
				if (newSize > _tables.size())
				{
					vector<Node*> newTables;
					newTables.resize(newSize);
					for (size_t i = 0; i < _tables.size(); ++i)
					{
						Node* cur = _tables[i];
						while (cur)
						{
							Node* next = cur->_next;

							size_t index = hf(kot(cur->_data)) % newTables.size();
							// 头插
							cur->_next = newTables[index];
							newTables[index] = cur;

							cur = next;
						}

						_tables[i] = nullptr;
					}

					_tables.swap(newTables);
				}

			}


			size_t index = hf(kot(data)) % _tables.size();
			Node* newnode = new Node(data);
			// 头插
			newnode->_next = _tables[index];
			_tables[index] = newnode;

			++_n;
			return make_pair(iterator(newnode, this), false);;
		}
	private:
		vector<Node*> _tables;
		size_t _n=0;//有效数据个数
	};
}

C++下“hash“的简单模拟实现以及实现中遇见的问题1.线性探测2.二次探测3.载荷因子4.扩容5.删除（查找，删除，判断状态）6.插入特殊值时7.哈希冲突8.开散列9.key对象的要求10.套用map和set11.延伸：扩容12.最终代码

目录

1.线性探测

2.二次探测

3.载荷因子

4.扩容

扩容一：

5.删除（查找，删除，判断状态）

6.插入特殊值时

初试（字符串）

BKDR（解决字符串）

BKDR之后的负数和int

默认的不需要传第三个参数

7.哈希冲突

8.开散列

insert

Erase

开散列整体代码

9.key对象的要求

10.套用map和set

insert套用

++

UnorderedSet.h

UnorderedMap.h

HashTable.h

map[]

拷贝构造

深拷贝

析构函数

11.延伸：扩容

12.最终代码

UnorderedSet.h

UnorderedSet.h

HashTable.h

继续阅读