【恋上数据结构(2)，40张图文详解，我就不信你还参透不了并发编程

![在这里插入图片描述](https://img-blog.csdnimg.cn/20200426132803952.png)

哈希函数的个数 k：

【恋上数据结构(2)，40张图文详解，我就不信你还参透不了并发编程

布隆过滤器的实现

===========================================================================

Guava: Google Core Libraries For Java（谷歌核心库中Java实现）

https://mvnrepository.com/artifact/com.google.guava/guava

布隆过滤器的基础操作有两个：添加元素、查询元素是否存在

/**

 * 添加元素

 * @return true代表bit发送了变化

 */

boolean put(T value);



/**

 * 查询元素是否存在

 * @return false代表一定不存在, true代表可能存在

 */

boolean contains(T value);

布隆过滤器的构造

根据上面的公式可知，布隆过滤器必然有2个全局变量：

bitSize ：二进制向量的长度（一共有多少个二进制位）
hashSize ：哈希函数的个数

并且必然有个容器来存储这些二进制位：

bits ：这里选择 long[] 来存储，因为1个 long 可以表示64位 bit ；（ int[] 等数组也可以）

package com.mj;



public class BloomFilter<T> {

	/**

	 * 二进制向量的长度(一共有多少个二进制位)

	 */

	private int bitSize;

	/**

	 * 二进制向量

	 */

	private long[] bits;

	/**

	 * 哈希函数的个数

	 */

	private int hashSize;

	

	/**

	 * 布隆过滤器的构造

	 * @param n 数据规模

	 * @param p 误判率, 取值范围(0, 1)

	 */

	public BloomFilter(int n, double p){

		if (n <= 0 || p <= 0 || p >= 1) { // 非法输入检测

			throw new IllegalArgumentException("wrong n or p");

		}

		

		// 根据公式求出对应的数据

		double ln2 = Math.log(2);

		// 求出二进制向量的长度

		bitSize = (int) (- (n * Math.log(p)) / (ln2 * ln2));

		hashSize = (int) (bitSize * ln2 / n);

		// bits数组的长度

		bits = new long[(bitSize + Long.SIZE - 1) / Long.SIZE]; // 分页公式

		// (64 + 64 - 1) / 64 = 127 / 64 = 1

		// (128 + 64 - 1) / 64 = 2

		// (130 + 64 - 1) / 64 = 3

		

		// 分页问题:

		// 每一页显示100条数据, pageSize = 100

		// 一共有999999条数据, n = 999999

		// 请问有多少页 pageCount = (n + pageSize - 1) / pageSize

	};

	

}

测试一下，假设有1亿个数据，要求误判率为1%：

可以得到哈希函数的个数为 6，二进制位的个数是 958505837。

public static void main(String[] args) {

	BloomFilter<Integer> bf = new BloomFilter<>(1_0000_0000, 0.01);

	// 哈希函数的个数: 6

	// 二进制位的个数: 958505837 

}

布隆过滤器 - 添加元素

设置指定位置元素的二进制值为1

比如要设置

的第2位bit 为 1，应当

100000 | 000100

，即

100000 | (1 << 2)

；

100000

| 	000100   ==  (1 << 2)

	------------------

	100100

那么设置

value

的第index位bit为 1，则是

value| (1 << index)

；

/**

 * 设置index位置的二进制为1

 */

private boolean set(int index){

	// 对应的long值

	long value = bits[index / Long.SIZE];

	int bitValue = 1 << (index % Long.SIZE);

	bits[index / Long.SIZE] = value | bitValue;

	return (value & bitValue) == 0;

}

有了以上基础，可以实现布隆过滤器的添加元素操作：

/**

 * 添加元素

 */

public boolean put(T value) {

	nullCheck(value);

	

	// 利用value生成 2 个整数

	int hash1 = value.hashCode();

	int hash2 = hash1 >>> 16;



	boolean result = false;

	for (int i = 1; i <= hashSize; i++) {

		int combinedHash = hash1 + (i * hash2);

		if (combinedHash < 0) {

			combinedHash = ~combinedHash;

		}	

		

		// 生成一个二进制的索引

		int index = combinedHash % bitSize;

		// 设置第index位置的二进制为1

		if (set(index)) result = true;

		//   101010101010010101

		// | 000000000000000100	   1 << index

		//   101010111010010101

	}

	return result;

}

布隆过滤器 - 判断元素是否存在

查看指定位置的二进制的值

比如要查看

10101111

的第2位bit 为 1，应当

10101111 & 00000100

，即

10101111 & (1 << 2)

，只有指定位置的二进制的值为 0，返回值才会是 0，否则为 1；

10101111

& 	00000100	== 	(1 << 2)

	--------------

	00000100 != 0, 说明index位的二进制为1

那么获取

value

的第index位bit 的值，则是

value & (1 << index)

；

/**

 * 查看index位置的二进制的值

 * @param index

 * @return true代表1, false代表0

 */

private boolean get(int index) {

	// 对应的long值

	long value = bits[index / Long.SIZE];

	return (value & (1 << (index % Long.SIZE))) != 0;

}

有了以上基础，可以实现布隆过滤器的判断一个元素是否存在操作：

/**

 * 判断一个元素是否存在

 */

public boolean contains(T value) {

	nullCheck(value);

	// 利用value生成2个整数

	int hash1 = value.hashCode();

	int hash2 = hash1 >>> 16;

	

	for (int i = 1; i <= hashSize; i++) {

		int combinedHash = hash1 + (i * hash2);

		if (combinedHash < 0) {

			combinedHash = ~combinedHash;

		}	

		// 生成一个二进制的索引

		int index = combinedHash % bitSize;

		// 查询第index位置的二进制是否为0

		if (!get(index)) return false;

		//   101010101010010101

		// | 000000000000000100	   1 << index

		//   101010111010010101

	}

	return true;

}

布隆过滤器 - 完整代码

===============================================================================

package com.mj;



public class BloomFilter<T> {

	/**

	 * 二进制向量的长度(一共有多少个二进制位)

	 */

	private int bitSize;

	/**

	 * 二进制向量

	 */

	private long[] bits;

	/**

	 * 哈希函数的个数

	 */

	private int hashSize;

	

	/**

	 * 布隆过滤器的构造

	 * @param n 数据规模

	 * @param p 误判率, 取值范围(0, 1)

	 */

	public BloomFilter(int n, double p){

		if (n <= 0 || p <= 0 || p >= 1) { // 非法输入检测

			throw new IllegalArgumentException("wrong n or p");

		}

		

		// 根据公式求出对应的数据

		double ln2 = Math.log(2);

		// 求出二进制向量的长度

		bitSize = (int) (- (n * Math.log(p)) / (ln2 * ln2));

		hashSize = (int) (bitSize * ln2 / n);

		// bits数组的长度

		bits = new long[(bitSize + Long.SIZE - 1) / Long.SIZE]; // 分页公式

		// (64 + 64 - 1) / 64 = 127 / 64 = 1

		// (128 + 64 - 1) / 64 = 2

		// (130 + 64 - 1) / 64 = 3

		

		// 分页问题:

		// 每一页显示100条数据, pageSize = 100

		// 一共有999999条数据, n = 999999

		// 请问有多少页 pageCount = (n + pageSize - 1) / pageSize

	};

	

	/**

	 * 添加元素

	 */

	public boolean put(T value) {

		nullCheck(value);

		

		// 利用value生成2个整数

		int hash1 = value.hashCode();

		int hash2 = hash1 >>> 16;



		boolean result = false;

		for (int i = 1; i <= hashSize; i++) {

			int combinedHash = hash1 + (i * hash2);

			if (combinedHash < 0) {

				combinedHash = ~combinedHash;

			}	

			// 生成一个二进制的索引

			int index = combinedHash % bitSize;

			// 设置第index位置的二进制为1

			if (set(index)) result = true;

			//   101010101010010101

			// | 000000000000000100	   1 << index

			//   101010111010010101

		}

		return result;

	}

	

	/**

	 * 判断一个元素是否存在

	 */

	public boolean contains(T value) {

		nullCheck(value);

		// 利用value生成2个整数

		int hash1 = value.hashCode();

		int hash2 = hash1 >>> 16;

		

		for (int i = 1; i <= hashSize; i++) {

			int combinedHash = hash1 + (i * hash2);

			if (combinedHash < 0) {

				combinedHash = ~combinedHash;

			}	


### 笔者福利

##### 以下是小编自己针对马上即将到来的金九银十准备的一套“面试宝典”，不管是技术还是HR的问题都有针对性的回答。

**有了这个，面试踩雷？不存在的！**

##### 需要这套“面试宝典”的，[点击这里即可免费获取](https://gitee.com/vip204888/java-p7)！回馈粉丝，诚意满满！！！

![](https://img-blog.csdnimg.cn/img_convert/708d4b175dac64861e53d09bdbbb139e.png)
![](https://img-blog.csdnimg.cn/img_convert/541342a4477c3b36b6ae8578c5d80331.png)
![](https://img-blog.csdnimg.cn/img_convert/610677ce4fc6586452b3ec3aef0b8769.png)
	public boolean contains(T value) {

		nullCheck(value);

		// 利用value生成2个整数

		int hash1 = value.hashCode();

		int hash2 = hash1 >>> 16;

		

		for (int i = 1; i <= hashSize; i++) {

			int combinedHash = hash1 + (i * hash2);

			if (combinedHash < 0) {

				combinedHash = ~combinedHash;

			}	


### 笔者福利

##### 以下是小编自己针对马上即将到来的金九银十准备的一套“面试宝典”，不管是技术还是HR的问题都有针对性的回答。

**有了这个，面试踩雷？不存在的！**

##### 需要这套“面试宝典”的，[点击这里即可免费获取](https://gitee.com/vip204888/java-p7)！回馈粉丝，诚意满满！！！

[外链图片转存中...(img-GirafzGd-1628428344406)]
[外链图片转存中...(img-ec81i51t-1628428344407)]
[外链图片转存中...(img-GJE893Ve-1628428344408)]
![](https://img-blog.csdnimg.cn/img_convert/5131d43a9636af0bdaccd501d43ca880.png)

【恋上数据结构(2)，40张图文详解，我就不信你还参透不了并发编程

设置指定位置元素的二进制值为1

查看指定位置的二进制的值

继续阅读

27 Best Free Eclipse Plug-ins for Java Developer to be ProductiveCode Quality PluginsText Editor PluginsDependency ManagementVersion Control Integration PluginsFramework Development Continuous Integration Related PluginsOther Utility Plugins

Java String.format方法的简单使用

neo4j之cypher使用文档

GitHub连夜封杀！这份阿里 10W 字内部 Java 字面试手册到底有多强？

spark/scala关于【资源文件】加载方法概述外部文件加载方案测试资源文件打包入jar包中小结

mybatis_入门程序Mybatis入门

AOP编程_Android优雅权限框架(1)概念基础，2021金三银四前言正文大纲正文

面试题解析：你接口测试是怎么做的？

软件测试工程师跳槽：收到面试电话后，我会做的事。

Effective Java 8:通用程序设计

OOM三种类型

工厂模式-三种类型

【递归】高效率求2的n次幂

express 搭建后台构建工程目录结构

win10本地scala和spark安装安装scala安装spark

scala (3) Function 和 Method