一.背景介紹
MurmurHash算法:高運算性能,低碰撞率,由Austin Appleby建立于2008年,現已應用到Hadoop、libstdc++、nginx、libmemcached等開源系統。2011年Appleby被Google雇傭,随後Google推出其變種的CityHash算法。官方隻提供了C語言的實作版本。
Java界中Redis,Memcached,Cassandra,HBase,Lucene都用它。
在Java的實作,Guava的Hashing類裡有,上面提到的Jedis,Cassandra裡都有Util類。
但存在的問題是由于Java的資料類型long與C語言中無符号長整型uint64_t有差別,導緻Java輸出版本存在負數,針對這個問題進行了修改;另外需要注意的是中文不同編碼(UTF-8或GBK)會導緻輸出結果的不同,使用中需要統一編碼。
一.實作類
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
/**
* 一緻性Hash的一種算法 高效低碰撞率
*/
public class Murmurs {
/**
* murmur hash算法實作
*/
public static Long hash(byte[] key) {
ByteBuffer buf = ByteBuffer.wrap(key);
int seed = 0x1234ABCD;
ByteOrder byteOrder = buf.order();
buf.order(ByteOrder.LITTLE_ENDIAN);
long m = 0xc6a4a7935bd1e995L;
int r = 47;
long h = seed ^ (buf.remaining() * m);
long k;
while (buf.remaining() >= 8) {
k = buf.getLong();
k *= m;
k ^= k >>> r;
k *= m;
h ^= k;
h *= m;
}
if (buf.remaining() > 0) {
ByteBuffer finish = ByteBuffer.allocate(8).order(
ByteOrder.LITTLE_ENDIAN);
// for big-endian version, do this first:
// finish.position(8-buf.remaining());
finish.put(buf).rewind();
h ^= finish.getLong();
h *= m;
}
h ^= h >>> r;
h *= m;
h ^= h >>> r;
buf.order(byteOrder);
return h;
}
public static Long hash(String key) {
return hash(key.getBytes());
}
/**
* Long轉換成無符号長整型(C中資料類型)
*/
public static BigDecimal readUnsignedLong(long value) {
if (value >= 0)
return new BigDecimal(value);
long lowValue = value & 0x7fffffffffffffffL;
return BigDecimal.valueOf(lowValue).add(BigDecimal.valueOf(Long.MAX_VALUE)).add(BigDecimal.valueOf(1));
}
/**
* 傳回無符号murmur hash值
*/
public static BigDecimal hashUnsigned(String key) {
return readUnsignedLong(hash(key));
}
public static BigDecimal hashUnsigned(byte[] key) {
return readUnsignedLong(hash(key));
}
}
二.測試類
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import static org.assertj.core.api.Assertions.assertThat;
public class MurmurHashTest {
private static final Logger logger = LoggerFactory.getLogger(MurmurHashTest.class);
@Test
public void test() throws IOException {
assertThat(Murmurs.hashUnsigned("chenshuo").toString()).isEqualTo("5016608279269930526");
assertThat(Murmurs.hashUnsigned("shaoguoqing").toString()).isEqualTo("17121371936686143062");
assertThat(Murmurs.hashUnsigned("baozenghui").toString()).isEqualTo("5451996895512824982");
assertThat(Murmurs.hashUnsigned("05ff62ff6f7749ffff43ffff6673ff65").toString()).isEqualTo("10652549470333968609");
assertThat(Murmurs.hashUnsigned("hahahaha").toString()).isEqualTo("15134676900169534748");
assertThat(Murmurs.hashUnsigned("hahah1369531321aha5466sfdfaerttedddd56da").toString()).isEqualTo("6439159232526071817");
assertThat(Murmurs.hashUnsigned("測試漢字").toString()).isEqualTo("1146745369200541601");
assertThat(Murmurs.hashUnsigned("1234566大大21".getBytes("GBK")).toString()).isEqualTo("10129762727109086067");
assertThat(Murmurs.hashUnsigned("12345665哦4哦3我的媽呀21".getBytes("GBK")).toString()).isEqualTo("5141842319936259217");
}
}