天天看點

LeetCode Implement strStr() 暴力法, KMP法, Boyer-Moore簡易版法

Implement strStr().

Returns a pointer to the first occurrence of needle in haystack, or null if needle is not part of haystack.

經典的字元串查找問題了。這個問題其實有四種比較經典的算法,這裡介紹2種半。因為這裡的Boyer-Moore是簡易版,全功率版應該是簡易版加上KMP法的,那個實在是複雜,面試是不大可能寫出來的,除非記熟了。

還有一種是Rabin-Karp fingerprint search,這個是利用hash函數産生所謂的fingerprint,然後對比子串的hash值和原串同樣字元長的hash值是否一緻,這個算法的關鍵是如何處理hash值。

下面的三種方法測試LeetCode的運作時間都差不多,大概是因為測試資料不大吧。

1 BF暴力法:

char *strStr(char *haystack, char *needle)
	{
		if (!haystack || !needle) return nullptr;
		int n = strlen(haystack);
		int m = strlen(needle);
		for (int i = 0; i <= n - m; i++)
		{
			int j = 0;
			for (; j < m; j++)
			{
				if (needle[j] != haystack[j+i]) break;
			}
			if (j == m) return haystack + i;
		}
		return nullptr;
	}
           

3 簡易版Boyer Moore法

void bmSkipTable(char *ch, vector<int> &skipTable)
	{
		if (!ch) return;
		skipTable.resize(26);
		int m = strlen(ch);

		for (int i = 0; i < m; i++)
		{
			skipTable[ch[i] - 'a'] = i;
		}
	}
	char *bmStrStr(char *haystack, char *needle)
	{
		if(!haystack || !needle) return haystack;
		int n = strlen(haystack);
		int m = strlen(needle);

		vector<int> skipTable;
		bmSkipTable(needle, skipTable);

		int skip = 0;
		for (int i = 0; i <= n-m; i+=skip)
		{
			skip = 0;
			for (int j = m-1; j >= 0; j--)
			{
				if (haystack[i+j] != needle[j])
				{
					skip = j - skipTable[haystack[i+j] - 'a'];
					if (skip < 1) skip = 1;
					break;
				}
			}
			if (skip == 0) return haystack + i;
		}
		return nullptr;
	}
           

2 更新KMP法

//2014-2-20 update
	char *strStr(char *hay, char *ne)
	{
		int len = strlen(hay);
		int n = strlen(ne);
		vector<int> tbl = genTbl(ne, n);
		for (int i = 0, j = 0; i <= len - n; )//改成i<len那麼會空串的時候出錯
		{
			for ( ; j < len && j < n && hay[i+j] == ne[j]; j++);
			if (j == n) return hay+i;
			if (j > 0) 
			{
				i += j - tbl[j-1] - 1;
				j = tbl[j-1] + 1;
			}
			else i++;
		}
		return nullptr;
	}

	//修正bug
	vector<int> genTbl(char *ne, int n)
	{
		vector<int> tbl(n, -1);
		for (int i = 1, j = 0; i < n;)
		{
			if (ne[i] == ne[j])
			{
				tbl[i] = j;
				++j;
				++i;
			}
			else if (j > 0) j = tbl[j-1]+1;
			else i++;
		}
		return tbl;
	}
           
//2014-2-21 update Boyer-Moore Algorithm
	char *strStr(char *hay, char *ne)
	{
		int n = strlen(ne);
		if (n == 0) return hay;
		int len = strlen(hay);
		vector<int> tbl(256, -1);
		for (int i = 0; i < n; i++)
		{
			tbl[ne[i]] = i;
		}
		for (int i = n-1, j = n-1; i < len; )
		{
			for ( ; j >= 0 && hay[i] == ne[j]; j--, i--);
			if (j < 0) return hay + i + 1;
			if (tbl[hay[i]] < j) i += n - tbl[hay[i]] - 1;
			else i += n - j;
			j = n-1;
		}
		return nullptr;
	}