HashTable

HashTable,也叫哈希表/散列表，是一种通过key值直接访问在内存存储位置的数据结构

这种数据结构在插入、删除、查找的操作上具有“常数平均时间”的表现

哈希表最大的优点，就是把数据的存储和查找消耗的时间大大降低，几乎可以看成是常数时间；而代价仅仅是消耗比较多的内存。然而在当前可利用内存越来越多的情况下，用空间换时间的做法是值得的。

散列函数：使用某种映射函数，将大数映射为小数（所给键值映射为较小的方便作为下标的数字）

（<stl_hash_fun>中定义size_t __stl_hash_string(const char* s)）

使用hash function可能会带来“碰撞”问题，解决这个问题有多种方法，包括线性探测、二次探测、开链法

负载因子：元素个数/表格大小除了开链法，其它情况负载因子都在0-1

插入：利用hash function计算出元素插入位置，如果该位置的空间不可用，则往下寻找，直到找到一

个可用空间

查找：利用hash function计算出元素位置，如果该位置的元素不是我们所要找的元素，则往下寻找，

直到找到

删除：必须使用惰性删除，也就是只标记删除记号，实际删除操作则待表格重新整理时进行，因

为hashtable中的每一个元素不仅表述它自己，也关系到其它元素的排列

线性探测：H+i (i=0,1,2,3,4...)

二测探测：H+i^2 (i=0,1,2,3,4...)

开链法：在每一个表格元素维护一个list 虽然是线性操作，但如果每个list够短，速度还是够快

//_Capacity()中new不出空间
#pragma once
#include<string>
#include<ostream>

template<class K>
struct HASHFUNC
{
	size_t operator()(const K& key, size_t capacity)
	{
		return key%capacity;
	}
};

template<>
struct HASHFUNC<std::string>
{
	size_t operator()(const std::string& s,int capacity)
	{
		const char* str = s.c_str();
		return BKDRHash(str)%capacity;
	}
	static size_t BKDRHash(const char * str)
	{
		unsigned int seed = 131; // 31 131 1313 13131 131313
		unsigned int hash = 0;
		while (*str)
		{
			hash = hash * seed + (*str++);
		}
		return (hash & 0x7FFFFFFF);
	}
};
//hashtable的删除采用的是惰性删除，也就是只标记删除记号，实际删除操作则待表格重新整理时进行，
//这是因为hashtable中每一个元素不仅表述它自己，也关系到其它元素的排列
enum Status
{
	EMPTY,
	EXIST,
	DELETE
};
template<class K, class V>
struct KeyValue
{
	KeyValue(const K key=K(), const V value=V())
		:_key(key)
		,_value(value)
	{}
	friend std::ostream& operator<<(std::ostream &_cout, const KeyValue<K, V>& kv);
	
	K _key;
	V _value;
};
template<class K, class V, class COM = HASHFUNC<K>>
class Hash
{
public:
	Hash();
	
	bool Insert(K key, V value);
	KeyValue<K, V>* Find(K key);
	bool Remove(K key);
	void Print();

	~Hash();

	//friend std::ostream& operator<<(std::ostream &_cout, const KeyValue<K, V>& kv);
protected:
	void _Capacity()
	{
		const int _PrimeSize = 28;
		static const unsigned long _PrimeList[_PrimeSize] =
		{
			53ul, 97ul, 193ul, 389ul, 769ul,
			1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
			49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
			1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
			50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
			1610612741ul, 3221225473ul, 4294967291ul
		};
		//确定容量
		size_t NewCapacity = _capacity;
		if (NewCapacity == 0){
			NewCapacity = _PrimeList[0];
		}
		else if (_size * 10 / NewCapacity >= 7){
			for (int i = 0; i < _PrimeSize; ++i){
				if (NewCapacity < _PrimeList[i]){
					NewCapacity = _PrimeList[i];
					break;

				}
			}
		}
		//把原表中的数据哈希到扩容后的表中
		if ( NewCapacity != _capacity){
			Status* NewStatus = new Status[NewCapacity];
			KeyValue<K, V>* NewTables = new KeyValue<K, V>[NewCapacity];//************************new不出空间
			memset(NewStatus, EMPTY, sizeof(NewStatus)*NewCapacity);
			if (_capacity != 0){
				for (size_t index = 0; index < _capacity; ++index)
				{
					if (_status[index] == EXIST){
						size_t  i = HashFunc(_tables[index]._key, _capacity);//得到该关键字对应的位置
						NewTables[i]._key = _tables[index]._key;
						NewTables[i]._value = _tables[index]._value;
						NewStatus[i] = EXIST;
					}
				}
			}
			std::swap(NewTables, _tables);
			std::swap(NewStatus, _status);
			_capacity = NewCapacity;
		}
	}
	size_t HashFunc(const K& key, size_t capacity)
	{
		return COM()(key, capacity);
	}
	size_t HashFunc2(const K& key,size_t capacity)
	{
		size_t i = 0;
		size_t res = HashFunc(key, capacity);
		while(_status[(res + (i*i)) % capacity] == EXIST){
			i++;	
		}
		return (res + (i*i)) % capacity;
	}
	int _Find(K key);
protected:
	size_t _size;
	size_t _capacity;
	KeyValue<K,V>* _tables;

	Status* _status; //哈希表位置的状态

};
template<class K, class V, class COM = HASHFUNC<K>>
Hash<K, V, COM >::Hash()
: _size(0)
, _capacity(0)
, _tables(NULL)
, _status(NULL)
//, _tables(new KeyValue<K,V>[_capacity])
//, _status(new Status[_capacity])
{
	/*memset(_tables, NULL, sizeof(KeyValue<K,V>)*_capacity);
	memset(_status, EMPTY, sizeof(Status)*_capacity);*/
}

template<class K, class V, class COM = HASHFUNC<K>>
bool Hash<K, V, COM >::Insert(K key, V value)
{
	_Capacity();//检查容量，扩容
	size_t index = HashFunc2(key,_capacity);//****************
	//while (_status[index]!=EMPTY){
	//	if (_tables[index]._key == key)
	//		return false;
	//	index++;
	//}
	//while (_status[index] == EXIST){
	//	index++;
	//}
	_tables[index]._key = key;
	_tables[index]._value = value;
	_status[index] = EXIST;
	++_size;
	return true;
}

template<class K, class V, class COM = HASHFUNC<K>>
int Hash<K, V, COM >::_Find(K key)
{
	size_t k = HashFunc(key, _capacity);
	if (_tables[k]._key == key)
		return k;
	size_t i = 1;
	size_t index = (k + i*i) % _capacity;
	while (_tables[index]._key!=_tables[k]._key){
		if (_status[index] == EXIST){
			if (_tables[index]._key == key){

				return index;
			}
		}
		i++;
		index = (k + i*i)%_capacity;
	}
	std::cout << "未找到key为" << key << "的数据" << std::endl;
	return -1;
}
template<class K, class V, class COM = HASHFUNC<K>>
KeyValue<K, V >* Hash<K, V, COM >::Find(K key)
{
	size_t index=_Find(key);
	if (index != -1){
		std::cout << "已找到key为" << key << "的数据，下标为" << index << std::endl;
		return &_tables[index];
	}
	return NULL;
}

template<class K, class V, class COM = HASHFUNC<K>>
bool  Hash<K, V, COM >::Remove(K key)
{
	int index = _Find(key);
	if (index== -1)
		return false;
	_status[index] = DELETE;
	std::cout << "已删除key为"<<key<<"的数据" << std::endl;
	return true;
}

template<class K, class V, class COM = HASHFUNC<K>>
void Hash<K, V, COM >::Print()
{
	std::cout << "_tables:" << std::endl;
	for (size_t i = 0; i < _capacity; ++i){
		if (_status[i] == EXIST){
			std::cout << "      _tables[" << i << "]-->" << _tables[i]._key<<","<<_tables[i]._value << std::endl;
		}
	}
	std::cout << std::endl;
}

template<class K, class V, class COM = HASHFUNC<K>>
Hash<K, V, COM >::~Hash()
{
	for (size_t i = 0; i < _capacity; ++i){
		if (_status[i] == EXIST)
			_status[i] = DELETE;
	}
	delete[] _tables;
	delete[] _status;
}

template<class K, class V>
std::ostream& operator<<(std::ostream &_cout, const KeyValue<K, V>& kv)
{
	
	_cout <<kv._value << std::endl;
	return _cout;
}

#pragma once
#include<vector>
#include<string>

template<class K>
struct HashFuncDefault
{
	size_t operator()(const K& key,const size_t& capacity)
	{
		return key%capacity;
	}
};
template<>
struct HashFuncDefault<std::string>
{
	static size_t BKDRHash(const char * str)
	{
		unsigned int seed = 131; // 31 131 1313 13131 131313
		unsigned int hash = 0;
		while (*str)
		{
			hash = hash * seed + (*str++);
		}
		return (hash & 0x7FFFFFFF);
	}
	size_t operator()(const std::string& key, const size_t& capacity)
	{
		const char* str = key.c_str();
		return BKDRHash(str)%capacity;
	}
};
template<class K,class V>
struct KeyValue
{
	KeyValue()
	:_key(0)
	, _value(0)
	, _next(NULL)
	{}
	KeyValue(const K& key,const V& value)
	:_key(key)
	, _value(value)
	, _next(NULL)
	{}

	K _key;
	V _value;

	KeyValue<K, V>* _next;
};
template<class K, class V, class HashFunc = HashFuncDefault<K>>
class HashTablesBucket
{
public:
	HashTablesBucket();
	HashTablesBucket(const HashTablesBucket<K,V,HashFunc>& hst);
	HashTablesBucket<K,V,HashFunc>& operator=(const HashTablesBucket<K, V, HashFunc>& hst);
	bool Insert(const K& key, const V& value);
	KeyValue<K, V>* Find(const K& key);
	bool Remove(const K& key);
	void Print();
	~HashTablesBucket();
protected:
	void _Copy(const HashTablesBucket<K,V,HashFunc>& hsb)
	{
		_tables.resize(hsb._tables.size());
		for (size_t i = 0; i < hsb._tables.size(); ++i){
			KeyValue<K, V>*cur = hsb._tables[i];
			while (cur){
				KeyValue<K, V>* NewCur = new KeyValue<K, V>(cur->_key,cur->_value);

				NewCur->_next = _tables[i];
				_tables[i]= NewCur;
				cur = cur->_next;
			}
		}
	}
	//检查容量
	void _Capacity()
	{
		const int _PrimeSize = 28;
		static const unsigned long _PrimeList[_PrimeSize] =
		{
			53ul, 97ul, 193ul, 389ul, 769ul,
			1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
			49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
			1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
			50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
			1610612741ul, 3221225473ul, 4294967291ul
		};
		//扩容
		size_t NewCapacity = _tables.size();
		if (_size == _tables.size()){
			for (size_t i = 0; i < _PrimeSize; ++i){
				if (_tables.size()< _PrimeList[i]){
					NewCapacity = _PrimeList[i];
					break;
				}
				i++;
			}
		}
		//说明有扩容，需把原来的数据哈希到新的表中
		HashTablesBucket<K, V, HashFunc> NewHTB;
		NewHTB._tables.resize(NewCapacity);
		if (NewCapacity != _tables.size()){
			for(size_t i = 0; i < _tables.size(); ++i){
				KeyValue<K, V>* cur = _tables[i];
				while (cur){
					size_t index=HashTablesBucketFunc(cur->_key);
					_tables[i] = cur->_next;
					NewHTB._tables[index]= cur;
					cur = _tables[i];
				}
			}
			swap(NewHTB._tables, _tables);
		}
	}
	//返回下标
	size_t HashTablesBucketFunc(const K& key)
	{
		return HashFunc()(key,_tables.size());
	}
	//析构函数
	void _Deatructor(KeyValue<K,V>* cur)
	{
		while (cur){
			KeyValue<K, V>* tmp = cur;
			cur = cur->_next;
			delete tmp;
			tmp = NULL;
		}
	}
protected:
	size_t _size;
	
	std::vector<KeyValue<K, V>*> _tables;
};
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>::HashTablesBucket()
:_size(0)
{}
//拷贝构造
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>::HashTablesBucket(const HashTablesBucket<K, V, HashFunc>& hst)
: _size(hst._size)
{
	_Copy(hst);
}
//赋值操作符的重载
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>& HashTablesBucket<K, V, HashFunc>::operator=(const HashTablesBucket<K, V, HashFunc>& hst)
{
	if (this != &hst){
		for (size_t i = 0; i < _tables.size(); ++i){
			_Deatructor(_tables[i]);
			_tables[i] = NULL;
		}

		_size=hst._tables.size();
		_Copy(hst);
	}
	return *this;
}
template<class K, class V, class HashFunc>
bool HashTablesBucket<K, V, HashFunc>::Insert(const K& key, const V& value)
{
	_Capacity();
	size_t index = HashTablesBucketFunc(key);//二次探测
	KeyValue<K, V>* cur = _tables[index];
	while (cur)
	{
		if (cur->_key == key){
			return false;
		}
		cur = cur->_next;
	}
	KeyValue<K, V>* kv = new KeyValue<K, V>(key, value);
	if (_tables[index]==NULL){
		_tables[index] = kv;
	}
	else{
		kv->_next = _tables[index]->_next;
		_tables[index]->_next = kv;
	}
	_size++;
	return true;
}
template<class K, class V, class HashFunc>
KeyValue<K, V>* HashTablesBucket<K, V, HashFunc>::Find(const K& key)
{
	size_t index = HashTablesBucketFunc(key);
	KeyValue<K, V>* cur = _tables[index];
	while (cur){
		if (cur->_key == key){
			return cur;
		}
		cur = cur->_next;
	}
	return NULL;
}
template<class K, class V,  class HashFunc>
bool HashTablesBucket<K, V, HashFunc>::Remove(const K& key)
{
	KeyValue<K, V>* tmp = Find(key);
	if (tmp == NULL){
		std::cout << "未找到key为" << key << "的数据"<<std::endl;
		return false;
	}
	size_t index = HashTablesBucketFunc(key);
	KeyValue<K, V>* cur = _tables[index];
	if (cur == NULL){
		std::cout << "未找到key为" << key << "的数据" << std::endl;
		return false;
	}
	if (cur->_key == key){
		_tables[index] = cur->_next;
		cur->_next = NULL;
		delete cur;
		cur = NULL;
		--_size;
		return true;
	}
	while (cur->_next){
		if (cur->_next->_key == key){
			KeyValue<K, V>* tmp = cur->_next;
			cur->_next = cur->_next->_next;
			tmp->_next = NULL;
			return true;
		}
		cur = cur->_next;
	}
	std::cout << "未找到key为" << key << "的数据" << std::endl;
	return false;
}
template<class K, class V, class HashFunc>
void HashTablesBucket<K, V, HashFunc>::Print()
{
	std::cout << "_tables:" << std::endl;
	for (size_t i = 0; i < _tables.size(); ++i){
		KeyValue<K, V>* cur = _tables[i];
		if (cur != NULL){
			std::cout << "      _tables[" << i << "]-->";
			while (cur){

				std::cout << cur->_key << "," << cur->_value << "   ";
				cur = cur->_next;
			}
			std::cout << std::endl;
		}
	}
}
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>::~HashTablesBucket()
{
	for (size_t i = 0; i < _tables.size(); ++i){
		KeyValue<K, V>* cur = _tables[i];
		_Deatructor(cur);
		_tables[i] = NULL;
	}
}

HashTable

继续阅读

跨库触发器应用

数据库增量同步(二台SQL Server服务器的数据库之间增量传输数据)(jaime原创)

利用expdp 和impdp来导出，导入 oracle 数据（含比较两个schema的表结构）

DataTable..::.ToString 方法

得到access表字段名VBS脚本

OBJ分析代码

Partitioning Concepts

oracle海量数据表删除重复记录

[引]在Oracle中如何利用Rowid查找和删除表中的重复记录

oralce与mysql中，如何删除重复记录

删除表中重复记录只留一条即可

数据库表中有重复记录，如何删除这些重复记录保留ID最大的一个信息

查询表中重复记录

使用WinRAR解压缩文件

DB2表压缩功能

磁盘结构及在Linux中的命名