天天看點

trie字尾樹字元串索引

Trie樹簡介:

Trie樹又叫字典樹,是一種多叉單詞查找樹,其每個節點都是一個字母,建立好樹後,對一單詞的查找可迅速完成,查找深度為該單詞的長度。

Trie樹的建立:

Trie樹首先從已有的詞庫中讀取單詞來建立樹,數的深度為最長單詞的長度加一,多出的那個長度為根節點,根節點不包含字元資料,值有指向各子樹的指針。

優點:

可利用字元串的公共字首減小查詢時間,減小無謂字元之間的比較,縮短查詢的時間。例如,可以用作快速查找一篇文章中是否有某個單詞出現。

trie字尾樹字元串索引
//使用trie來建立字尾樹進行字元串的索引

#include <iostream>
using namespace  std;

typedef struct stTrieNode
{
	bool bLeaf; //是否為葉子節點,true為葉子節點
	bool bMidLeaf; //是否為處在中間的隐式葉子節點
	int firstAppear;
	struct stTrieNode* toNext[32];
}stTrieNode, *LPstTrieNode;

//根據字母建構新的trie樹節點
LPstTrieNode builtNode(LPstTrieNode root, int loc, char szNodeInfo, int len, int firstAppear)
{
	root->toNext[loc] = (LPstTrieNode)malloc(sizeof(stTrieNode));
	memset(root->toNext[loc], 0, sizeof(stTrieNode));
	//root->firstAppear = firstAppear;
	root->toNext[loc]->firstAppear = firstAppear;
	if (len == 1)  //已經是最後一個節點,建立節點時要加上
	{
		root->toNext[loc]->bLeaf = 1;
		

	}

	return (LPstTrieNode)(root->toNext[loc]);

}


//将單詞strinfo加入到trie樹中
void AddToTree(LPstTrieNode root, char *strInfo, int len, int firstAppear)
{
	char sztemp = strInfo[0];
	int loc = 0 + (sztemp - 'a');
	if (len <= 0)
	{
		return;
	}

	if (root->toNext[loc] == NULL)
	{
		LPstTrieNode nextNode = builtNode(root, loc, sztemp, len, firstAppear);
		AddToTree(nextNode, strInfo+1, len-1, firstAppear);
	}
	else if (root->toNext[loc] != NULL)
	{
		AddToTree(root->toNext[loc], strInfo+1,len-1,firstAppear);
	}

}


//檢查checkword是否在trie樹中
bool checkWord(LPstTrieNode root, char *checkWord , int *loc)
{
	int len = 0;
	int charloc = 0;
	len = strlen(checkWord);
	LPstTrieNode lpTemp = root;

	while(charloc < len)  //字元串沒有檢索完
	{
		int lpLoc = 0 + (checkWord[charloc] -'a');  
		if (lpLoc > 26 || lpLoc < 0)
		{
			return false;
		}
		if (lpTemp->toNext[lpLoc] != NULL)
		{
			lpTemp = lpTemp->toNext[lpLoc];
			charloc++;
			if (charloc == len ) //最後一個字元
			{
				*loc = lpTemp->firstAppear;
				return true;
			}

		}
		else
			return false;

	}
	return false;
}

int main()
{
	char WordContent[128]; //從文本中讀出的單詞
	char wordForCheck[128]; //驗證單詞
	FILE *fReadIn = fopen("Stringmodle.txt","r+");
	if (fReadIn == NULL)
	{
		cout<<"無法打開檔案words.txt"<<endl;
		system("pause");
		return 0;

	}
	LPstTrieNode root = (LPstTrieNode)malloc(sizeof(stTrieNode));
	memset(root, 0, sizeof(stTrieNode));

	//讀取資料到wordcontent中,建樹過程
	fscanf(fReadIn,"%s",WordContent);
	int len = strlen(WordContent);
	for (int i =0; i< len; i++)
	{
		AddToTree(root, WordContent+i, len-i, i);
	}
	fclose(fReadIn);

	//驗證一個單詞是否在樹中
	while(true)
	{
		cout<<"輸入要檢驗的單詞:  ";
		bool nflag;
		int appearLoc;
		cin>>wordForCheck;
		if (wordForCheck[0] == '0') //輸入0驗證結束
		{
			break;
		}

		else
			nflag = checkWord(root, wordForCheck,&appearLoc);
		if (nflag)
		{
			cout<<wordForCheck<<" 存在, 首次出現位置為:"<<appearLoc<<endl<<endl;
		}
		else
			cout<<wordForCheck<<" 不存在"<<endl<<endl;

	}
	return 1;
}


           

繼續閱讀