public class BloomFilter implements Serializable {
private static final long serialVersionUID = 5829598197124113258L;
private static final int DEFAULT_SIZE =2 << 25 ; //2的24次方
private static final int [] seeds =new int []{5,7,11,13,19,31,37,61,73,89};//構造不同因子,用于不同散列函數
private BitSet bits= new BitSet(DEFAULT_SIZE);
private SimpleHash[] func=new SimpleHash[seeds.length];//對不同的數字構造不同Hash函數避免沖突
private int size = 0;
public BloomFilter() {
for (int i = 0; i < seeds.length; i++) {//初始化構造函數
func[i] = new SimpleHash(DEFAULT_SIZE, seeds[i]);
}
}
public void add(String value) {
if(!contains(value)){
size++;
}
for (SimpleHash f : func) { //添加hash值映射到相應位
bits.set(f.hash(value), true);
}
}
public boolean contains(String value) { //判斷存在有一定誤判率,所有位置為1不一定存在
if (value == null) {
return false;
}
boolean ret = true;
for (SimpleHash f : func) {
if(!bits.get(f.hash(value)){return false};//按照添加時候hash函數計算是否true,判斷存在有一定誤判率,所有位置為1不一定存在
}
return true;
}
public int getSize(){
return size;
}
public static class SimpleHash implements Serializable {
private static final long serialVersionUID = 1286028965900771610L;
private int cap;
private int seed;
public SimpleHash(int cap, int seed) {
this.cap = cap;
this.seed = seed;
}
public int hash(String value) {//hash值通過數字個數和數字散列後得到值
int result = 0;
int len = value.length();
for (int i = 0; i < len; i++) {
result = seed * result + value.charAt(i);
}
return (cap - 1) & result;
}
}
public static void main(String[] args) {
String value = "[email protected]";
String value2 = "[email protected]";
String value3 = "[email protected]";
BloomFilter filter = new BloomFilter();
System.out.println(filter.contains(value));
filter.add(value);
filter.add(value2);
if(filter.noContains(value3)){
System.out.println(value3+" 一定不存在 ");
}
}
/*擷取100内質數*/
public static int[] getAllSeeds(){
return new int[]{2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97};
}
}//基于Redis BitSet,由單機擴充到分布式
public void add(byte[] bytes) {
int[] hashes = createHashes(bytes, k);
for (int hash : hashes)
bitset.set(Math.abs(hash % bitSetSize), true);
}
public boolean contains(byte[] bytes) {
int[] hashes = createHashes(bytes, k);
for (int hash : hashes) {
if (!bitset.get(Math.abs(hash % bitSetSize))) {
return false;
}
}
return true;
}