BMH描叙:
The Boyer-Moore-Horspool (BMH) algorithm [Horspool 1980] is a widely-known
search algorithm for a single pattern. The preprocessing phase of the algorithm
consists of calculating the bad character function B(x). It is defined as the distance
from the end of the pattern p1p2 · · · pm to the last occurrence of the character x:
B(x) = min{h | pm−h = x, h > 0}.
If the character x does not appear in the pattern B(x) = m.
In the matching phase the text is processed in windows of length m. First the
last character in the window is compared with the last character of the pattern.
If they match the whole window is compared against the pattern to check for a
match. After that or if the last characters did not match, the window is shifted by
B(x) where x is the last character of the window
shift-or描叙:
The shift-or algorithm [Baeza-Yates and Gonnet 1992] is a simple bit-parallel algo-
rithm. In the preprocessing phase a bit vector B[x] is initialized for each character
x of the alphabet. The bit in position i is set to 0 in the bit vector if the i:th
character in the pattern is x. Otherwise the bits are set to one.
In the beginning of the matching phase the state vector E is initialized to 1m.
Then the text is read one character at a time and the state vector is updated as
follows:
E = (E << 1)|B[x]
where x is the character read, << moves the bits to the left and | is the bitwise or
operator. If the m:th bit is zero after this update we have found a match.
BMH实现:
#ifndef _CSINGLE_BMH_H
#define _CSINGLE_BMH_H
#define SHIFT_SIZE 256
// the single string match algorithm
class CSingle_BMH
{
private :
char shift_table[SHIFT_SIZE];
int pattern_len;
char * pattern;
public :
CSingle_BMH( const char * pat)
{
pattern_len = strlen(pat);
pattern = new char [pattern_len];
strcpy(pattern,pat);
memset(shift_table, pattern_len, SHIFT_SIZE * sizeof ( char ));
for ( int i = 0 ; i < pattern_len 0 ; i ++ )
{
shift_table[ * (pattern + i)] = pattern_len - i;
}
}
// if match success, return the location of the first matching
// else return -1
int match_BMH( char * str_match, int str_len);
~ CSingle_BMH()
{
delete[] pattern;
}
};
#endif
int CSingle_BMH::match_BMH( char * str_match, int str_len)
{
char * window_end = str_match + pattern_len - 1 ;
char * pat = pattern + pattern_len - 1 ;
while ((window_end - str_match) < str_len)
{
for ( int i = 0 ; i < pattern_len; i ++ )
{
if ( * (window_end - i) != * (pat - i))
{
window_end += shift_table[ * (pat - i)];
break ;
}
}
if ( i == pattern_len)
{
return window_end - str_match + 1 ;
}
}
return - 1 ;
}
shift-or实现:
#ifndef _CSINGLE_SHIFT_OR_H
#define _CSINGLE_SHIFT_OR_H
#define ALPHABET_SIZE 256
// this is the single string match algorithm
class CSingle_shift_Or
{
private :
unsigned int bit_vector[ALPHABET_SIZE];
unsigned int state; // the state of match ,because the state is 32_bits ,so the longest pattern is 32 bytes
int pattern_len; // pattern_len == strlen
public :
CSingle_shift_Or( const char * pat)
{
memset(bit_vector, 0xFFFFFFFF ,ALPHABET_SIZE * sizeof (unsigned int ));
state = 0xFFFFFFFF ;
pattern_len = strlen(pat);
for ( int i = 0 , j = 1 ; i < pattern_len; i ++ , j <<= 1 )
{
bit_vector[ * (pat + i)] &= ~ j ;
}
}
int match_shift_or( char * str_match, int str_len);
};
#endif
int CSingle_shift_Or::match_shift_or( char * str_match, int str_len)
{
int shift_bit = pattern_len - 1 ;
for ( int i = 0 ; i < str_len; i ++ )
{
state = (state << 1 ) | bit_vector[ * (str_match + i)];
if ((state & ( 0x1 << shift_bit)) == 0 )
{
return i; // if the pattern_len bit is zero ,match successfully
}
}
return - 1 ;
}