mirror of
https://gitee.com/dromara/hutool.git
synced 2026-02-09 09:16:26 +08:00
Merge pull request #4139 from Lettuceleaves/feat/bloom-multi-hash
Feat/bloom multi hash
This commit is contained in:
@@ -30,7 +30,7 @@ public abstract class AbstractFilter implements BloomFilter {
|
||||
@Serial
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private final BitSet bitSet;
|
||||
protected final BitSet bitSet;
|
||||
/**
|
||||
* 容量
|
||||
*/
|
||||
@@ -50,12 +50,12 @@ public abstract class AbstractFilter implements BloomFilter {
|
||||
|
||||
@Override
|
||||
public boolean contains(final String str) {
|
||||
return bitSet.get(Math.abs(hash(str)));
|
||||
return bitSet.get(hash(str));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean add(final String str) {
|
||||
final int hash = Math.abs(hash(str));
|
||||
final int hash = hash(str);
|
||||
if (bitSet.get(hash)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -16,7 +16,12 @@
|
||||
|
||||
package cn.hutool.v7.core.text.bloom;
|
||||
|
||||
import cn.hutool.v7.core.lang.Assert;
|
||||
|
||||
import java.io.Serial;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
@@ -33,26 +38,69 @@ public class FuncFilter extends AbstractFilter {
|
||||
* 创建FuncFilter
|
||||
*
|
||||
* @param size 最大值
|
||||
* @param hashFunc Hash函数
|
||||
* @param hashFuncs Hash函数
|
||||
* @return FuncFilter
|
||||
*/
|
||||
public static FuncFilter of(final int size, final Function<String, Number> hashFunc) {
|
||||
return new FuncFilter(size, hashFunc);
|
||||
@SafeVarargs
|
||||
public static FuncFilter of(final int size, final Function<String, Number>... hashFuncs) {
|
||||
return new FuncFilter(size, hashFuncs);
|
||||
}
|
||||
|
||||
private final Function<String, Number> hashFunc;
|
||||
// 允许接收多个哈希函数
|
||||
private final List<Function<String, Number>> hashFuncs;
|
||||
|
||||
/**
|
||||
* @param size 最大值
|
||||
* @param hashFunc Hash函数
|
||||
* @param hashFuncs Hash函数
|
||||
*/
|
||||
public FuncFilter(final int size, final Function<String, Number> hashFunc) {
|
||||
@SafeVarargs
|
||||
public FuncFilter(final int size, final Function<String, Number>... hashFuncs) {
|
||||
super(size);
|
||||
this.hashFunc = hashFunc;
|
||||
Assert.notEmpty(hashFuncs, "Hash functions must not be empty");
|
||||
this.hashFuncs = Collections.unmodifiableList(Arrays.asList(hashFuncs));
|
||||
}
|
||||
|
||||
/**
|
||||
*兼容父类,如果存在多个哈希函数,就使用第一个
|
||||
*
|
||||
* @param str 字符串
|
||||
*/
|
||||
@Override
|
||||
public int hash(final String str) {
|
||||
return hash(str, hashFuncs.get(0));
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param str 字符串
|
||||
* @param hashFunc 哈希函数
|
||||
* @return HashCode 指定哈希函数的计算结果
|
||||
*/
|
||||
public int hash(final String str, final Function<String, Number> hashFunc) {
|
||||
// 通过位运算获取正数
|
||||
return (hashFunc.apply(str).intValue() & 0x7FFFFFFF) % size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hash(final String str) {
|
||||
return hashFunc.apply(str).intValue() % size;
|
||||
public boolean contains(final String str) {
|
||||
for (final Function<String, Number> hashFunc : hashFuncs) {
|
||||
if (!bitSet.get(hash(str, hashFunc))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean add(final String str) {
|
||||
boolean add = false;
|
||||
for (final Function<String, Number> hashFunc : hashFuncs) {
|
||||
int hash = hash(str, hashFunc);
|
||||
if (!bitSet.get(hash)) {
|
||||
bitSet.set(hash);
|
||||
add = true;
|
||||
}
|
||||
}
|
||||
return add;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,11 +22,12 @@ import org.junit.jupiter.api.Test;
|
||||
|
||||
public class BitMapBloomFilterTest {
|
||||
|
||||
private static final int SIZE = 2 * 1024 * 1024 * 8;
|
||||
|
||||
@Test
|
||||
public void filterTest() {
|
||||
final int size = 2 * 1024 * 1024 * 8;
|
||||
|
||||
final CombinedBloomFilter filter = new CombinedBloomFilter(FuncFilter.of(size, HashUtil::rsHash));
|
||||
final CombinedBloomFilter filter = new CombinedBloomFilter(FuncFilter.of(SIZE, HashUtil::rsHash));
|
||||
filter.add("123");
|
||||
filter.add("abc");
|
||||
filter.add("ddd");
|
||||
@@ -35,4 +36,68 @@ public class BitMapBloomFilterTest {
|
||||
Assertions.assertTrue(filter.contains("ddd"));
|
||||
Assertions.assertTrue(filter.contains("123"));
|
||||
}
|
||||
@Test
|
||||
public void multiHashFuncTest() {
|
||||
final FuncFilter filter = FuncFilter.of(SIZE,
|
||||
HashUtil::rsHash,
|
||||
HashUtil::jsHash,
|
||||
HashUtil::pjwHash,
|
||||
HashUtil::elfHash,
|
||||
HashUtil::bkdrHash,
|
||||
HashUtil::sdbmHash,
|
||||
HashUtil::djbHash,
|
||||
HashUtil::dekHash,
|
||||
HashUtil::apHash,
|
||||
HashUtil::javaDefaultHash
|
||||
);
|
||||
|
||||
filter.add("Hutool");
|
||||
filter.add("BloomFilter");
|
||||
filter.add("Java");
|
||||
|
||||
Assertions.assertTrue(filter.contains("Hutool"));
|
||||
Assertions.assertTrue(filter.contains("BloomFilter"));
|
||||
Assertions.assertTrue(filter.contains("Java"));
|
||||
Assertions.assertFalse(filter.contains("Python"));
|
||||
Assertions.assertFalse(filter.contains("Go"));
|
||||
Assertions.assertFalse(filter.contains("hutool"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void combinedMultiHashTest() {
|
||||
FuncFilter multiHashFuncFilter = FuncFilter.of(SIZE,
|
||||
HashUtil::bkdrHash,
|
||||
HashUtil::apHash,
|
||||
HashUtil::djbHash
|
||||
);
|
||||
final CombinedBloomFilter filter = new CombinedBloomFilter(multiHashFuncFilter);
|
||||
filter.add("123123WASD-WASD");
|
||||
Assertions.assertTrue(filter.contains("123123WASD-WASD"));
|
||||
Assertions.assertFalse(filter.contains("123123WASD-WASD-false"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void chineseStringWithThreeHashesTest() {
|
||||
final FuncFilter filter = FuncFilter.of(SIZE,
|
||||
HashUtil::bkdrHash,
|
||||
HashUtil::apHash,
|
||||
HashUtil::djbHash
|
||||
);
|
||||
|
||||
String s1 = "你好世界";
|
||||
String s2 = "双亲委派";
|
||||
String s3 = "测试工程师";
|
||||
|
||||
filter.add(s1);
|
||||
filter.add(s2);
|
||||
filter.add(s3);
|
||||
Assertions.assertTrue(filter.contains(s1), "应包含: " + s1);
|
||||
Assertions.assertTrue(filter.contains(s2), "应包含: " + s2);
|
||||
Assertions.assertTrue(filter.contains(s3), "应包含: " + s3);
|
||||
Assertions.assertFalse(filter.contains("我好世界"), "多字");
|
||||
Assertions.assertFalse(filter.contains("父亲委派"), "改字");
|
||||
Assertions.assertFalse(filter.contains("测试"), "子串");
|
||||
Assertions.assertFalse(filter.contains(""), "空串");
|
||||
Assertions.assertFalse(filter.contains("👍"), "未添加的");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user