HashTable

HashTable,也叫哈希表/散清單，是一種通過key值直接通路在記憶體存儲位置的資料結構

這種資料結構在插入、删除、查找的操作上具有“常數平均時間”的表現

哈希表最大的優點，就是把資料的存儲和查找消耗的時間大大降低，幾乎可以看成是常數時間；而代價僅僅是消耗比較多的記憶體。然而在目前可利用記憶體越來越多的情況下，用空間換時間的做法是值得的。

散列函數：使用某種映射函數，将大數映射為小數（所給鍵值映射為較小的友善作為下标的數字）

（<stl_hash_fun>中定義size_t __stl_hash_string(const char* s)）

使用hash function可能會帶來“碰撞”問題，解決這個問題有多種方法，包括線性探測、二次探測、開鍊法

負載因子：元素個數/表格大小除了開鍊法，其它情況負載因子都在0-1

插入：利用hash function計算出元素插入位置，如果該位置的空間不可用，則往下尋找，直到找到一

個可用空間

查找：利用hash function計算出元素位置，如果該位置的元素不是我們所要找的元素，則往下尋找，

直到找到

删除：必須使用惰性删除，也就是隻标記删除記号，實際删除操作則待表格重新整理時進行，因

為hashtable中的每一個元素不僅表述它自己，也關系到其它元素的排列

線性探測：H+i (i=0,1,2,3,4...)

二測探測：H+i^2 (i=0,1,2,3,4...)

開鍊法：在每一個表格元素維護一個list 雖然是線性操作，但如果每個list夠短，速度還是夠快

//_Capacity()中new不出空間
#pragma once
#include<string>
#include<ostream>

template<class K>
struct HASHFUNC
{
	size_t operator()(const K& key, size_t capacity)
	{
		return key%capacity;
	}
};

template<>
struct HASHFUNC<std::string>
{
	size_t operator()(const std::string& s,int capacity)
	{
		const char* str = s.c_str();
		return BKDRHash(str)%capacity;
	}
	static size_t BKDRHash(const char * str)
	{
		unsigned int seed = 131; // 31 131 1313 13131 131313
		unsigned int hash = 0;
		while (*str)
		{
			hash = hash * seed + (*str++);
		}
		return (hash & 0x7FFFFFFF);
	}
};
//hashtable的删除采用的是惰性删除，也就是隻标記删除記号，實際删除操作則待表格重新整理時進行，
//這是因為hashtable中每一個元素不僅表述它自己，也關系到其它元素的排列
enum Status
{
	EMPTY,
	EXIST,
	DELETE
};
template<class K, class V>
struct KeyValue
{
	KeyValue(const K key=K(), const V value=V())
		:_key(key)
		,_value(value)
	{}
	friend std::ostream& operator<<(std::ostream &_cout, const KeyValue<K, V>& kv);
	
	K _key;
	V _value;
};
template<class K, class V, class COM = HASHFUNC<K>>
class Hash
{
public:
	Hash();
	
	bool Insert(K key, V value);
	KeyValue<K, V>* Find(K key);
	bool Remove(K key);
	void Print();

	~Hash();

	//friend std::ostream& operator<<(std::ostream &_cout, const KeyValue<K, V>& kv);
protected:
	void _Capacity()
	{
		const int _PrimeSize = 28;
		static const unsigned long _PrimeList[_PrimeSize] =
		{
			53ul, 97ul, 193ul, 389ul, 769ul,
			1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
			49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
			1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
			50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
			1610612741ul, 3221225473ul, 4294967291ul
		};
		//确定容量
		size_t NewCapacity = _capacity;
		if (NewCapacity == 0){
			NewCapacity = _PrimeList[0];
		}
		else if (_size * 10 / NewCapacity >= 7){
			for (int i = 0; i < _PrimeSize; ++i){
				if (NewCapacity < _PrimeList[i]){
					NewCapacity = _PrimeList[i];
					break;

				}
			}
		}
		//把原表中的資料哈希到擴容後的表中
		if ( NewCapacity != _capacity){
			Status* NewStatus = new Status[NewCapacity];
			KeyValue<K, V>* NewTables = new KeyValue<K, V>[NewCapacity];//************************new不出空間
			memset(NewStatus, EMPTY, sizeof(NewStatus)*NewCapacity);
			if (_capacity != 0){
				for (size_t index = 0; index < _capacity; ++index)
				{
					if (_status[index] == EXIST){
						size_t  i = HashFunc(_tables[index]._key, _capacity);//得到該關鍵字對應的位置
						NewTables[i]._key = _tables[index]._key;
						NewTables[i]._value = _tables[index]._value;
						NewStatus[i] = EXIST;
					}
				}
			}
			std::swap(NewTables, _tables);
			std::swap(NewStatus, _status);
			_capacity = NewCapacity;
		}
	}
	size_t HashFunc(const K& key, size_t capacity)
	{
		return COM()(key, capacity);
	}
	size_t HashFunc2(const K& key,size_t capacity)
	{
		size_t i = 0;
		size_t res = HashFunc(key, capacity);
		while(_status[(res + (i*i)) % capacity] == EXIST){
			i++;	
		}
		return (res + (i*i)) % capacity;
	}
	int _Find(K key);
protected:
	size_t _size;
	size_t _capacity;
	KeyValue<K,V>* _tables;

	Status* _status; //哈希表位置的狀态

};
template<class K, class V, class COM = HASHFUNC<K>>
Hash<K, V, COM >::Hash()
: _size(0)
, _capacity(0)
, _tables(NULL)
, _status(NULL)
//, _tables(new KeyValue<K,V>[_capacity])
//, _status(new Status[_capacity])
{
	/*memset(_tables, NULL, sizeof(KeyValue<K,V>)*_capacity);
	memset(_status, EMPTY, sizeof(Status)*_capacity);*/
}

template<class K, class V, class COM = HASHFUNC<K>>
bool Hash<K, V, COM >::Insert(K key, V value)
{
	_Capacity();//檢查容量，擴容
	size_t index = HashFunc2(key,_capacity);//****************
	//while (_status[index]!=EMPTY){
	//	if (_tables[index]._key == key)
	//		return false;
	//	index++;
	//}
	//while (_status[index] == EXIST){
	//	index++;
	//}
	_tables[index]._key = key;
	_tables[index]._value = value;
	_status[index] = EXIST;
	++_size;
	return true;
}

template<class K, class V, class COM = HASHFUNC<K>>
int Hash<K, V, COM >::_Find(K key)
{
	size_t k = HashFunc(key, _capacity);
	if (_tables[k]._key == key)
		return k;
	size_t i = 1;
	size_t index = (k + i*i) % _capacity;
	while (_tables[index]._key!=_tables[k]._key){
		if (_status[index] == EXIST){
			if (_tables[index]._key == key){

				return index;
			}
		}
		i++;
		index = (k + i*i)%_capacity;
	}
	std::cout << "未找到key為" << key << "的資料" << std::endl;
	return -1;
}
template<class K, class V, class COM = HASHFUNC<K>>
KeyValue<K, V >* Hash<K, V, COM >::Find(K key)
{
	size_t index=_Find(key);
	if (index != -1){
		std::cout << "已找到key為" << key << "的資料，下标為" << index << std::endl;
		return &_tables[index];
	}
	return NULL;
}

template<class K, class V, class COM = HASHFUNC<K>>
bool  Hash<K, V, COM >::Remove(K key)
{
	int index = _Find(key);
	if (index== -1)
		return false;
	_status[index] = DELETE;
	std::cout << "已删除key為"<<key<<"的資料" << std::endl;
	return true;
}

template<class K, class V, class COM = HASHFUNC<K>>
void Hash<K, V, COM >::Print()
{
	std::cout << "_tables:" << std::endl;
	for (size_t i = 0; i < _capacity; ++i){
		if (_status[i] == EXIST){
			std::cout << "      _tables[" << i << "]-->" << _tables[i]._key<<","<<_tables[i]._value << std::endl;
		}
	}
	std::cout << std::endl;
}

template<class K, class V, class COM = HASHFUNC<K>>
Hash<K, V, COM >::~Hash()
{
	for (size_t i = 0; i < _capacity; ++i){
		if (_status[i] == EXIST)
			_status[i] = DELETE;
	}
	delete[] _tables;
	delete[] _status;
}

template<class K, class V>
std::ostream& operator<<(std::ostream &_cout, const KeyValue<K, V>& kv)
{
	
	_cout <<kv._value << std::endl;
	return _cout;
}

#pragma once
#include<vector>
#include<string>

template<class K>
struct HashFuncDefault
{
	size_t operator()(const K& key,const size_t& capacity)
	{
		return key%capacity;
	}
};
template<>
struct HashFuncDefault<std::string>
{
	static size_t BKDRHash(const char * str)
	{
		unsigned int seed = 131; // 31 131 1313 13131 131313
		unsigned int hash = 0;
		while (*str)
		{
			hash = hash * seed + (*str++);
		}
		return (hash & 0x7FFFFFFF);
	}
	size_t operator()(const std::string& key, const size_t& capacity)
	{
		const char* str = key.c_str();
		return BKDRHash(str)%capacity;
	}
};
template<class K,class V>
struct KeyValue
{
	KeyValue()
	:_key(0)
	, _value(0)
	, _next(NULL)
	{}
	KeyValue(const K& key,const V& value)
	:_key(key)
	, _value(value)
	, _next(NULL)
	{}

	K _key;
	V _value;

	KeyValue<K, V>* _next;
};
template<class K, class V, class HashFunc = HashFuncDefault<K>>
class HashTablesBucket
{
public:
	HashTablesBucket();
	HashTablesBucket(const HashTablesBucket<K,V,HashFunc>& hst);
	HashTablesBucket<K,V,HashFunc>& operator=(const HashTablesBucket<K, V, HashFunc>& hst);
	bool Insert(const K& key, const V& value);
	KeyValue<K, V>* Find(const K& key);
	bool Remove(const K& key);
	void Print();
	~HashTablesBucket();
protected:
	void _Copy(const HashTablesBucket<K,V,HashFunc>& hsb)
	{
		_tables.resize(hsb._tables.size());
		for (size_t i = 0; i < hsb._tables.size(); ++i){
			KeyValue<K, V>*cur = hsb._tables[i];
			while (cur){
				KeyValue<K, V>* NewCur = new KeyValue<K, V>(cur->_key,cur->_value);

				NewCur->_next = _tables[i];
				_tables[i]= NewCur;
				cur = cur->_next;
			}
		}
	}
	//檢查容量
	void _Capacity()
	{
		const int _PrimeSize = 28;
		static const unsigned long _PrimeList[_PrimeSize] =
		{
			53ul, 97ul, 193ul, 389ul, 769ul,
			1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
			49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
			1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
			50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
			1610612741ul, 3221225473ul, 4294967291ul
		};
		//擴容
		size_t NewCapacity = _tables.size();
		if (_size == _tables.size()){
			for (size_t i = 0; i < _PrimeSize; ++i){
				if (_tables.size()< _PrimeList[i]){
					NewCapacity = _PrimeList[i];
					break;
				}
				i++;
			}
		}
		//說明有擴容，需把原來的資料哈希到新的表中
		HashTablesBucket<K, V, HashFunc> NewHTB;
		NewHTB._tables.resize(NewCapacity);
		if (NewCapacity != _tables.size()){
			for(size_t i = 0; i < _tables.size(); ++i){
				KeyValue<K, V>* cur = _tables[i];
				while (cur){
					size_t index=HashTablesBucketFunc(cur->_key);
					_tables[i] = cur->_next;
					NewHTB._tables[index]= cur;
					cur = _tables[i];
				}
			}
			swap(NewHTB._tables, _tables);
		}
	}
	//傳回下标
	size_t HashTablesBucketFunc(const K& key)
	{
		return HashFunc()(key,_tables.size());
	}
	//析構函數
	void _Deatructor(KeyValue<K,V>* cur)
	{
		while (cur){
			KeyValue<K, V>* tmp = cur;
			cur = cur->_next;
			delete tmp;
			tmp = NULL;
		}
	}
protected:
	size_t _size;
	
	std::vector<KeyValue<K, V>*> _tables;
};
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>::HashTablesBucket()
:_size(0)
{}
//拷貝構造
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>::HashTablesBucket(const HashTablesBucket<K, V, HashFunc>& hst)
: _size(hst._size)
{
	_Copy(hst);
}
//指派操作符的重載
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>& HashTablesBucket<K, V, HashFunc>::operator=(const HashTablesBucket<K, V, HashFunc>& hst)
{
	if (this != &hst){
		for (size_t i = 0; i < _tables.size(); ++i){
			_Deatructor(_tables[i]);
			_tables[i] = NULL;
		}

		_size=hst._tables.size();
		_Copy(hst);
	}
	return *this;
}
template<class K, class V, class HashFunc>
bool HashTablesBucket<K, V, HashFunc>::Insert(const K& key, const V& value)
{
	_Capacity();
	size_t index = HashTablesBucketFunc(key);//二次探測
	KeyValue<K, V>* cur = _tables[index];
	while (cur)
	{
		if (cur->_key == key){
			return false;
		}
		cur = cur->_next;
	}
	KeyValue<K, V>* kv = new KeyValue<K, V>(key, value);
	if (_tables[index]==NULL){
		_tables[index] = kv;
	}
	else{
		kv->_next = _tables[index]->_next;
		_tables[index]->_next = kv;
	}
	_size++;
	return true;
}
template<class K, class V, class HashFunc>
KeyValue<K, V>* HashTablesBucket<K, V, HashFunc>::Find(const K& key)
{
	size_t index = HashTablesBucketFunc(key);
	KeyValue<K, V>* cur = _tables[index];
	while (cur){
		if (cur->_key == key){
			return cur;
		}
		cur = cur->_next;
	}
	return NULL;
}
template<class K, class V,  class HashFunc>
bool HashTablesBucket<K, V, HashFunc>::Remove(const K& key)
{
	KeyValue<K, V>* tmp = Find(key);
	if (tmp == NULL){
		std::cout << "未找到key為" << key << "的資料"<<std::endl;
		return false;
	}
	size_t index = HashTablesBucketFunc(key);
	KeyValue<K, V>* cur = _tables[index];
	if (cur == NULL){
		std::cout << "未找到key為" << key << "的資料" << std::endl;
		return false;
	}
	if (cur->_key == key){
		_tables[index] = cur->_next;
		cur->_next = NULL;
		delete cur;
		cur = NULL;
		--_size;
		return true;
	}
	while (cur->_next){
		if (cur->_next->_key == key){
			KeyValue<K, V>* tmp = cur->_next;
			cur->_next = cur->_next->_next;
			tmp->_next = NULL;
			return true;
		}
		cur = cur->_next;
	}
	std::cout << "未找到key為" << key << "的資料" << std::endl;
	return false;
}
template<class K, class V, class HashFunc>
void HashTablesBucket<K, V, HashFunc>::Print()
{
	std::cout << "_tables:" << std::endl;
	for (size_t i = 0; i < _tables.size(); ++i){
		KeyValue<K, V>* cur = _tables[i];
		if (cur != NULL){
			std::cout << "      _tables[" << i << "]-->";
			while (cur){

				std::cout << cur->_key << "," << cur->_value << "   ";
				cur = cur->_next;
			}
			std::cout << std::endl;
		}
	}
}
template<class K, class V, class HashFunc>
HashTablesBucket<K, V, HashFunc>::~HashTablesBucket()
{
	for (size_t i = 0; i < _tables.size(); ++i){
		KeyValue<K, V>* cur = _tables[i];
		_Deatructor(cur);
		_tables[i] = NULL;
	}
}

HashTable

繼續閱讀

跨庫觸發器應用

資料庫增量同步(二台SQL Server伺服器的資料庫之間增量傳輸資料)(jaime原創)

利用expdp 和impdp來導出，導入 oracle 資料（含比較兩個schema的表結構）

DataTable..::.ToString 方法

得到access表字段名VBS腳本

OBJ分析代碼

Partitioning Concepts

oracle海量資料表删除重複記錄

[引]在Oracle中如何利用Rowid查找和删除表中的重複記錄

oralce與mysql中，如何删除重複記錄

删除表中重複記錄隻留一條即可

資料庫表中有重複記錄，如何删除這些重複記錄保留ID最大的一個資訊

查詢表中重複記錄

使用WinRAR解壓縮檔案

DB2表壓縮功能

磁盤結構及在Linux中的命名