Implement strStr().
Returns a pointer to the first occurrence of needle in haystack, or null if needle is not part of haystack.
經典的字元串查找問題了。這個問題其實有四種比較經典的算法,這裡介紹2種半。因為這裡的Boyer-Moore是簡易版,全功率版應該是簡易版加上KMP法的,那個實在是複雜,面試是不大可能寫出來的,除非記熟了。
還有一種是Rabin-Karp fingerprint search,這個是利用hash函數産生所謂的fingerprint,然後對比子串的hash值和原串同樣字元長的hash值是否一緻,這個算法的關鍵是如何處理hash值。
下面的三種方法測試LeetCode的運作時間都差不多,大概是因為測試資料不大吧。
1 BF暴力法:
char *strStr(char *haystack, char *needle)
{
if (!haystack || !needle) return nullptr;
int n = strlen(haystack);
int m = strlen(needle);
for (int i = 0; i <= n - m; i++)
{
int j = 0;
for (; j < m; j++)
{
if (needle[j] != haystack[j+i]) break;
}
if (j == m) return haystack + i;
}
return nullptr;
}
3 簡易版Boyer Moore法
void bmSkipTable(char *ch, vector<int> &skipTable)
{
if (!ch) return;
skipTable.resize(26);
int m = strlen(ch);
for (int i = 0; i < m; i++)
{
skipTable[ch[i] - 'a'] = i;
}
}
char *bmStrStr(char *haystack, char *needle)
{
if(!haystack || !needle) return haystack;
int n = strlen(haystack);
int m = strlen(needle);
vector<int> skipTable;
bmSkipTable(needle, skipTable);
int skip = 0;
for (int i = 0; i <= n-m; i+=skip)
{
skip = 0;
for (int j = m-1; j >= 0; j--)
{
if (haystack[i+j] != needle[j])
{
skip = j - skipTable[haystack[i+j] - 'a'];
if (skip < 1) skip = 1;
break;
}
}
if (skip == 0) return haystack + i;
}
return nullptr;
}
2 更新KMP法
//2014-2-20 update
char *strStr(char *hay, char *ne)
{
int len = strlen(hay);
int n = strlen(ne);
vector<int> tbl = genTbl(ne, n);
for (int i = 0, j = 0; i <= len - n; )//改成i<len那麼會空串的時候出錯
{
for ( ; j < len && j < n && hay[i+j] == ne[j]; j++);
if (j == n) return hay+i;
if (j > 0)
{
i += j - tbl[j-1] - 1;
j = tbl[j-1] + 1;
}
else i++;
}
return nullptr;
}
//修正bug
vector<int> genTbl(char *ne, int n)
{
vector<int> tbl(n, -1);
for (int i = 1, j = 0; i < n;)
{
if (ne[i] == ne[j])
{
tbl[i] = j;
++j;
++i;
}
else if (j > 0) j = tbl[j-1]+1;
else i++;
}
return tbl;
}
//2014-2-21 update Boyer-Moore Algorithm
char *strStr(char *hay, char *ne)
{
int n = strlen(ne);
if (n == 0) return hay;
int len = strlen(hay);
vector<int> tbl(256, -1);
for (int i = 0; i < n; i++)
{
tbl[ne[i]] = i;
}
for (int i = n-1, j = n-1; i < len; )
{
for ( ; j >= 0 && hay[i] == ne[j]; j--, i--);
if (j < 0) return hay + i + 1;
if (tbl[hay[i]] < j) i += n - tbl[hay[i]] - 1;
else i += n - j;
j = n-1;
}
return nullptr;
}