天天看点

各种字符串Hash函数比较及各种Hash算法代码大全

       常用的字符串Hash函数还有ELFHash,APHash等等,都是十分简单有效的方法。这些函数使用位运算使得每一个字符都对最后的函数值产生影响。另外还有以MD5和SHA1为代表的杂凑函数,这些函数几乎不可能找到碰撞。

       常用字符串哈希函数有BKDRHash,APHash,DJBHash,JSHash,RSHash,SDBMHash,PJWHash,ELFHash等等。对于以上几种哈希函数,我对其进行了一个小小的评测。

Hash函数 数据1 数据2 数据3 数据4 数据1得分 数据2得分 数据3得分 数据4得分 平均分
BKDRHash 2 4774 481 96.55 100 90.95 82.05 92.64
APHash 2 3 4754 493 96.55 88.46 100 51.28 86.28
DJBHash 2 2 4975 474 96.55 92.31 100 83.43
JSHash 1 4 4761 506 100 84.62 96.83 17.95 81.94
RSHash 1 4861 505 100 100 51.58 20.51 75.96
SDBMHash 3 2 4849 504 93.1 92.31 57.01 23.08 72.41
PJWHash 30 26 4878 513 43.89 21.95
ELFHash 30 26 4878 513 43.89 21.95

       其中数据1为100000个字母和数字组成的随机串哈希冲突个数。数据2为100000个有意义的英文句子哈希冲突个数。数据3为数据1的哈希值与1000003(大素数)求模后存储到线性表中冲突的个数。数据4为数据1的哈希值与10000019(更大素数)求模后存储到线性表中冲突的个数。

        经过比较,得出以上平均得分。平均数为平方平均数。可以发现,BKDRHash无论是在实际效果还是编码实现中,效果都是最突出的。APHash也是较为优秀的算法。DJBHash,JSHash,RSHash与SDBMHash各有千秋。PJWHash与ELFHash效果最差,但得分相似,其算法本质是相似的。

附1:各种哈希函数的C程序代码

#define M  249997 
#define M1 1000001 
#define M2 0xF0000000 

// RS Hash Function  
unsigned int RSHash(char*str) 
{ 
    unsigned int b=378551 ; 
    unsigned int a=63689 ; 
    unsigned int hash=0 ;     
    while(*str) 
    { 
        hash=hash*a+(*str++); 
        a*=b ; 
    }     
    return(hash % M); 
} 
 
// JS Hash Function  
unsigned int JSHash(char*str) 
{ 
    unsigned int hash=1315423911 ;    
    while(*str) 
    { 
        hash^=((hash<<5)+(*str++)+(hash>>2)); 
    }      
    return(hash % M); 
} 
 
// P. J. Weinberger Hash Function  
unsigned int PJWHash(char*str) 
{ 
    unsigned int BitsInUnignedInt=(unsigned int)(sizeof(unsigned int)*8); 
    unsigned int ThreeQuarters=(unsigned int)((BitsInUnignedInt*3)/4); 
    unsigned int OneEighth=(unsigned int)(BitsInUnignedInt/8); 
    unsigned int HighBits=(unsigned int)(0xFFFFFFFF)<<(BitsInUnignedInt-OneEighth); 
    unsigned int hash=0 ; 
    unsigned int test=0 ;     
    while(*str) 
    { 
        hash=(hash<<OneEighth)+(*str++); 
        if((test=hash&HighBits)!=0) 
        { 
            hash=((hash^(test>>ThreeQuarters))&(~HighBits)); 
        } 
    }      
    return(hash % M); 
} 
 
// ELF Hash Function  
unsigned int ELFHash(char*str) 
{ 
    unsigned int hash=0 ; 
    unsigned int x=0 ;    
    while(*str) 
    { 
        hash=(hash<<4)+(*str++); 
        if((x=hash&0xF0000000L)!=0) 
        { 
            hash^=(x>>24); 
            hash&=~x ; 
        } 
    }     
    return(hash % M); 
} 
 
// BKDR Hash Function  
unsigned int BKDRHash(char*str) 
{ 
    unsigned int seed=131 ;// 31 131 1313 13131 131313 etc..  
    unsigned int hash=0 ;     
    while(*str) 
    { 
        hash=hash*seed+(*str++); 
    }     
    return(hash % M); 
} 
 
// SDBM Hash Function  
unsigned int SDBMHash(char*str) 
{ 
    unsigned int hash=0 ;     
    while(*str) 
    { 
        hash=(*str++)+(hash<<6)+(hash<<16)-hash ; 
    }     
    return(hash % M); 
} 
 
// DJB Hash Function  
unsigned int DJBHash(char*str) 
{ 
    unsigned int hash=5381 ;    
    while(*str) 
    { 
        hash+=(hash<<5)+(*str++); 
    }      
    return(hash % M); 
} 
 
// AP Hash Function  
unsigned int APHash(char*str) 
{ 
    unsigned int hash=0 ; 
    int i ;      
    for(i=0;*str;i++) 
    { 
        if((i&1)==0) 
        { 
            hash^=((hash<<7)^(*str++)^(hash>>3)); 
        } 
        else  
        { 
            hash^=(~((hash<<11)^(*str++)^(hash>>5))); 
        } 
    }      
    return(hash % M); 
} 
           

附2:Hash算法大全

/**   
* Hash算法大全<br>   
* 推荐使用FNV1算法   
* @algorithm None   
* @author Goodzzp 2006-11-20   
* @lastEdit Goodzzp 2006-11-20    
* @editDetail Create   
*/
public class HashAlgorithms
{
    /**   
    * 加法hash   
    * @param key 字符串   
    * @param prime 一个质数   
    * @return hash结果   
    */
    public static int additiveHash(String key, int prime)
    {
        int hash, i;
        for (hash = key.length(), i = 0; i < key.length(); i++)
            hash += key.charAt(i);
        return (hash % prime);
    }

    /**   
    * 旋转hash   
    * @param key 输入字符串   
    * @param prime 质数   
    * @return hash值   
    */
    public static int rotatingHash(String key, int prime)
    {
        int hash, i;
        for (hash = key.length(), i = 0; i < key.length(); ++i)
            hash = (hash << 4) ^ (hash >> 28) ^ key.charAt(i);
        return (hash % prime);
        //   return (hash ^ (hash>>10) ^ (hash>>20));    
    }
    // 替代:    
    // 使用:hash = (hash ^ (hash>>10) ^ (hash>>20)) & mask;    
    // 替代:hash %= prime;    

    /**   
    * MASK值,随便找一个值,最好是质数   
    */
    static int M_MASK = 0x8765fed1;
    /**   
    * 一次一个hash   
    * @param key 输入字符串   
    * @return 输出hash值   
    */
    public static int oneByOneHash(String key)
    {
        int hash, i;
        for (hash = 0, i = 0; i < key.length(); ++i)
        {
            hash += key.charAt(i);
            hash += (hash << 10);
            hash ^= (hash >> 6);
        }
        hash += (hash << 3);
        hash ^= (hash >> 11);
        hash += (hash << 15);
        //   return (hash & M_MASK);    
        return hash;
    }

    /**   
    * Bernstein's hash   
    * @param key 输入字节数组   
    * @param level 初始hash常量   
    * @return 结果hash   
    */
    public static int bernstein(String key)
    {
        int hash = 0;
        int i;
        for (i = 0; i < key.length(); ++i) hash = 33 * hash + key.charAt(i);
        return hash;
    }

    //    
     Pearson's Hash    
    // char pearson(char[]key, ub4 len, char tab[256])    
    // {    
    //   char hash;    
    //   ub4 i;    
    //   for (hash=len, i=0; i<len; ++i)     
    //     hash=tab[hash^key[i]];    
    //   return (hash);    
    // }    

     CRC Hashing,计算crc,具体代码见其他    
    // ub4 crc(char *key, ub4 len, ub4 mask, ub4 tab[256])    
    // {    
    //   ub4 hash, i;    
    //   for (hash=len, i=0; i<len; ++i)    
    //     hash = (hash >> 8) ^ tab[(hash & 0xff) ^ key[i]];    
    //   return (hash & mask);    
    // }    

    /**   
    * Universal Hashing   
    */
    public static int universal(char[] key, int mask, int[] tab)
    {
        int hash = key.length, i, len = key.length;
        for (i = 0; i < (len << 3); i += 8)
        {
            char k = key[i >> 3];
            if ((k & 0x01) == 0) hash ^= tab[i + 0];
            if ((k & 0x02) == 0) hash ^= tab[i + 1];
            if ((k & 0x04) == 0) hash ^= tab[i + 2];
            if ((k & 0x08) == 0) hash ^= tab[i + 3];
            if ((k & 0x10) == 0) hash ^= tab[i + 4];
            if ((k & 0x20) == 0) hash ^= tab[i + 5];
            if ((k & 0x40) == 0) hash ^= tab[i + 6];
            if ((k & 0x80) == 0) hash ^= tab[i + 7];
        }
        return (hash & mask);
    }

    /**   
    * Zobrist Hashing   
    */
    public static int zobrist(char[] key, int mask, int[][] tab)
    {
        int hash, i;
        for (hash = key.length, i = 0; i < key.length; ++i)
            hash ^= tab[i][key[i]];
        return (hash & mask);
    }

    // LOOKUP3     
    // 见Bob Jenkins(3).c文件    

    // 32位FNV算法    
    static int M_SHIFT = 0;
    /**   
    * 32位的FNV算法   
    * @param data 数组   
    * @return int值   
    */
    public static int FNVHash(byte[] data)    
    {    
        int hash = (int)2166136261L;    
        for(byte b : data)    
            hash = (hash * 16777619) ^ b;    
        if (M_SHIFT == 0)    
            return hash;    
        return (hash ^ (hash >> M_SHIFT)) & M_MASK;    
    }

    /**   
     * 改进的32位FNV算法1   
     * @param data 数组   
     * @return int值   
     */
    public static int FNVHash1(byte[] data)    
    {    
        final int p = 16777619;    
        int hash = (int)2166136261L;    
        for(byte b:data)    
            hash = (hash ^ b) * p;    
        hash += hash << 13;    
        hash ^= hash >> 7;    
        hash += hash << 3;    
        hash ^= hash >> 17;    
        hash += hash << 5;    
        return hash;    
    }

    /**   
     * 改进的32位FNV算法1   
     * @param data 字符串   
     * @return int值   
     */
    public static int FNVHash1(String data)    
    {    
        final int p = 16777619;    
        int hash = (int)2166136261L;    
        for(int i=0;i<data.length();i++)    
            hash = (hash ^ data.charAt(i)) * p;    
        hash += hash << 13;    
        hash ^= hash >> 7;    
        hash += hash << 3;    
        hash ^= hash >> 17;    
        hash += hash << 5;    
        return hash;    
    }

    /**   
     * Thomas Wang的算法,整数hash   
     */
    public static int intHash(int key)    
    { 
        key += ~(key << 15);    
        key ^= (key >>> 10);    
        key += (key << 3);    
        key ^= (key >>> 6);    
        key += ~(key << 11);    
        key ^= (key >>> 16);    
        return key;    
    }

    /**   
     * RS算法hash   
     * @param str 字符串   
     */
    public static int RSHash(String str)
    {
        int b = 378551;
        int a = 63689;
        int hash = 0;
        for (int i = 0; i < str.length(); i++)
        {
            hash = hash * a + str.charAt(i);
            a = a * b;
        }
        return (hash & 0x7FFFFFFF);
    }
    /* End Of RS Hash Function */

    /**   
     * JS算法   
     */
    public static int JSHash(String str)
    {
        int hash = 1315423911;
        for (int i = 0; i < str.length(); i++)
        {
            hash ^= ((hash << 5) + str.charAt(i) + (hash >> 2));
        }
        return (hash & 0x7FFFFFFF);
    }
    /* End Of JS Hash Function */

    /**   
     * PJW算法   
     */
    public static int PJWHash(String str)
    {
        int BitsInUnsignedInt = 32;
        int ThreeQuarters = (BitsInUnsignedInt * 3) / 4;
        int OneEighth = BitsInUnsignedInt / 8;
        int HighBits = 0xFFFFFFFF << (BitsInUnsignedInt - OneEighth);
        int hash = 0;
        int test = 0;
        for (int i = 0; i < str.length(); i++)
        {
            hash = (hash << OneEighth) + str.charAt(i);

            if ((test = hash & HighBits) != 0)
            {
                hash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits));
            }
        }
        return (hash & 0x7FFFFFFF);
    }
    /* End Of P. J. Weinberger Hash Function */

    /**   
     * ELF算法   
     */
    public static int ELFHash(String str)
    {
        int hash = 0;
        int x = 0;
        for (int i = 0; i < str.length(); i++)
        {
            hash = (hash << 4) + str.charAt(i);
            if ((x = (int)(hash & 0xF0000000L)) != 0)
            {
                hash ^= (x >> 24);
                hash &= ~x;
            }
        }
        return (hash & 0x7FFFFFFF);
    }
    /* End Of ELF Hash Function */

    /**   
     * BKDR算法   
     */
    public static int BKDRHash(String str)
    {
        int seed = 131; // 31 131 1313 13131 131313 etc..    
        int hash = 0;
        for (int i = 0; i < str.length(); i++)
        {
            hash = (hash * seed) + str.charAt(i);
        }
        return (hash & 0x7FFFFFFF);
    }
    /* End Of BKDR Hash Function */

    /**   
     * SDBM算法   
     */
    public static int SDBMHash(String str)
    {
        int hash = 0;
        for (int i = 0; i < str.length(); i++)
        {
            hash = str.charAt(i) + (hash << 6) + (hash << 16) - hash;
        }
        return (hash & 0x7FFFFFFF);
    }
    /* End Of SDBM Hash Function */

    /**   
     * DJB算法   
     */
    public static int DJBHash(String str)
    {
        int hash = 5381;
        for (int i = 0; i < str.length(); i++)
        {
            hash = ((hash << 5) + hash) + str.charAt(i);
        }
        return (hash & 0x7FFFFFFF);
    }
    /* End Of DJB Hash Function */

    /**   
     * DEK算法   
     */
    public static int DEKHash(String str)
    {
        int hash = str.length();
        for (int i = 0; i < str.length(); i++)
        {
            hash = ((hash << 5) ^ (hash >> 27)) ^ str.charAt(i);
        }
        return (hash & 0x7FFFFFFF);
    }
    /* End Of DEK Hash Function */

    /**   
     * AP算法   
     */
    public static int APHash(String str)
    {
        int hash = 0;
        for (int i = 0; i < str.length(); i++)
        {
            hash ^= ((i & 1) == 0) ? ((hash << 7) ^ str.charAt(i) ^ (hash >> 3)) :
                                     (~((hash << 11) ^ str.charAt(i) ^ (hash >> 5)));
        }
        //       return (hash & 0x7FFFFFFF);    
        return hash;
    }
    /* End Of AP Hash Function */

    /**   
     * JAVA自己带的算法   
     */
    public static int java(String str)
    {
        int h = 0;
        int off = 0;
        int len = str.length();
        for (int i = 0; i < len; i++)
        {
            h = 31 * h + str.charAt(off++);
        }
        return h;
    }

    /**   
     * 混合hash算法,输出64位的值   
     */
    public static long mixHash(String str)
    {
        long hash = str.hashCode();
        hash <<= 32;
        hash |= FNVHash1(str);
        return hash;
    }
}
           

继续阅读