From a31e3ff096e358ad1c59f2745563703f758938c2 Mon Sep 17 00:00:00 2001 From: LettuceLeaves <2878506229@qq.com> Date: Fri, 21 Nov 2025 01:11:54 +0800 Subject: [PATCH 1/3] =?UTF-8?q?feat(core):=E5=8D=95=E4=B8=AA=E5=B8=83?= =?UTF-8?q?=E9=9A=86=E8=BF=87=E6=BB=A4=E5=99=A8=E5=85=81=E8=AE=B8=E4=BD=BF?= =?UTF-8?q?=E7=94=A8=E5=A4=9A=E4=B8=AA=E5=93=88=E5=B8=8C=E5=87=BD=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../v7/core/text/bloom/AbstractFilter.java | 2 +- .../hutool/v7/core/text/bloom/FuncFilter.java | 48 ++++++++++++++++--- 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/AbstractFilter.java b/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/AbstractFilter.java index 61dd388083..ae2e7c3b72 100644 --- a/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/AbstractFilter.java +++ b/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/AbstractFilter.java @@ -30,7 +30,7 @@ public abstract class AbstractFilter implements BloomFilter { @Serial private static final long serialVersionUID = 1L; - private final BitSet bitSet; + protected final BitSet bitSet; /** * 容量 */ diff --git a/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/FuncFilter.java b/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/FuncFilter.java index 8881d1806d..28b3d1c261 100644 --- a/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/FuncFilter.java +++ b/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/FuncFilter.java @@ -16,7 +16,12 @@ package cn.hutool.v7.core.text.bloom; +import cn.hutool.v7.core.lang.Assert; + import java.io.Serial; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; import java.util.function.Function; /** @@ -33,26 +38,57 @@ public class FuncFilter extends AbstractFilter { * 创建FuncFilter * * @param size 最大值 - * @param hashFunc Hash函数 + * @param hashFuncs Hash函数 * @return FuncFilter */ - public static FuncFilter of(final int size, final Function hashFunc) { - return new FuncFilter(size, hashFunc); + @SafeVarargs + public static FuncFilter of(final int size, final Function... hashFuncs) { + return new FuncFilter(size, hashFuncs); } - private final Function hashFunc; + // 允许接收多个哈希函数 + private final List> hashFuncs; /** * @param size 最大值 * @param hashFunc Hash函数 */ - public FuncFilter(final int size, final Function hashFunc) { + @SafeVarargs + public FuncFilter(final int size, final Function... hashFunc) { super(size); - this.hashFunc = hashFunc; + Assert.notEmpty(hashFunc, "Hash functions must not be empty"); + this.hashFuncs = Collections.unmodifiableList(Arrays.asList(hashFunc)); } @Override public int hash(final String str) { + return hash(str, hashFuncs.get(0)); + } + + public int hash(final String str, final Function hashFunc) { return hashFunc.apply(str).intValue() % size; } + + @Override + public boolean contains(final String str) { + for (final Function hashFunc : hashFuncs) { + if (!bitSet.get(Math.abs(hash(str, hashFunc)))) { + return false; + } + } + return true; + } + + @Override + public boolean add(final String str) { + boolean add = false; + for (final Function hashFunc : hashFuncs) { + int hash = Math.abs(hash(str, hashFunc)); + if (!bitSet.get(hash)) { + bitSet.set(hash); + add = true; + } + } + return add; + } } From 6ad6a8022e1d1b4eb0800ae9d7b2728e37f28932 Mon Sep 17 00:00:00 2001 From: LettuceLeaves <2878506229@qq.com> Date: Fri, 21 Nov 2025 01:37:52 +0800 Subject: [PATCH 2/3] =?UTF-8?q?fix(core):=E4=BD=BF=E7=94=A8=E4=BD=8D?= =?UTF-8?q?=E8=BF=90=E7=AE=97=E8=A7=A3=E5=86=B3=E6=9E=81=E7=AB=AF=E6=83=85?= =?UTF-8?q?=E5=86=B5=E4=B8=8BMath.abs()=E5=A4=B1=E8=B4=A5=E5=AF=BC?= =?UTF-8?q?=E8=87=B4=E7=9A=84BitSet=E6=8A=A5=E9=94=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../v7/core/text/bloom/AbstractFilter.java | 4 +-- .../hutool/v7/core/text/bloom/FuncFilter.java | 26 ++++++++++++++----- .../text/bloom/BitMapBloomFilterTest.java | 3 +++ 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/AbstractFilter.java b/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/AbstractFilter.java index ae2e7c3b72..9b8f9c3e2c 100644 --- a/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/AbstractFilter.java +++ b/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/AbstractFilter.java @@ -50,12 +50,12 @@ public abstract class AbstractFilter implements BloomFilter { @Override public boolean contains(final String str) { - return bitSet.get(Math.abs(hash(str))); + return bitSet.get(hash(str)); } @Override public boolean add(final String str) { - final int hash = Math.abs(hash(str)); + final int hash = hash(str); if (bitSet.get(hash)) { return false; } diff --git a/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/FuncFilter.java b/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/FuncFilter.java index 28b3d1c261..f764468387 100644 --- a/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/FuncFilter.java +++ b/hutool-core/src/main/java/cn/hutool/v7/core/text/bloom/FuncFilter.java @@ -51,28 +51,40 @@ public class FuncFilter extends AbstractFilter { /** * @param size 最大值 - * @param hashFunc Hash函数 + * @param hashFuncs Hash函数 */ @SafeVarargs - public FuncFilter(final int size, final Function... hashFunc) { + public FuncFilter(final int size, final Function... hashFuncs) { super(size); - Assert.notEmpty(hashFunc, "Hash functions must not be empty"); - this.hashFuncs = Collections.unmodifiableList(Arrays.asList(hashFunc)); + Assert.notEmpty(hashFuncs, "Hash functions must not be empty"); + this.hashFuncs = Collections.unmodifiableList(Arrays.asList(hashFuncs)); } + /** + *兼容父类,如果存在多个哈希函数,就使用第一个 + * + * @param str 字符串 + */ @Override public int hash(final String str) { return hash(str, hashFuncs.get(0)); } + /** + * + * @param str 字符串 + * @param hashFunc 哈希函数 + * @return HashCode 指定哈希函数的计算结果 + */ public int hash(final String str, final Function hashFunc) { - return hashFunc.apply(str).intValue() % size; + // 通过位运算获取正数 + return (hashFunc.apply(str).intValue() & 0x7FFFFFFF) % size; } @Override public boolean contains(final String str) { for (final Function hashFunc : hashFuncs) { - if (!bitSet.get(Math.abs(hash(str, hashFunc)))) { + if (!bitSet.get(hash(str, hashFunc))) { return false; } } @@ -83,7 +95,7 @@ public class FuncFilter extends AbstractFilter { public boolean add(final String str) { boolean add = false; for (final Function hashFunc : hashFuncs) { - int hash = Math.abs(hash(str, hashFunc)); + int hash = hash(str, hashFunc); if (!bitSet.get(hash)) { bitSet.set(hash); add = true; diff --git a/hutool-core/src/test/java/cn/hutool/v7/core/text/bloom/BitMapBloomFilterTest.java b/hutool-core/src/test/java/cn/hutool/v7/core/text/bloom/BitMapBloomFilterTest.java index 6706e34d78..2a06d7a897 100644 --- a/hutool-core/src/test/java/cn/hutool/v7/core/text/bloom/BitMapBloomFilterTest.java +++ b/hutool-core/src/test/java/cn/hutool/v7/core/text/bloom/BitMapBloomFilterTest.java @@ -20,6 +20,8 @@ import cn.hutool.v7.core.codec.hash.HashUtil; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.util.BitSet; + public class BitMapBloomFilterTest { @Test @@ -35,4 +37,5 @@ public class BitMapBloomFilterTest { Assertions.assertTrue(filter.contains("ddd")); Assertions.assertTrue(filter.contains("123")); } + } From efee8a14b0c8c6f781f0ea18a5b9069c6eb47adf Mon Sep 17 00:00:00 2001 From: LettuceLeaves <2878506229@qq.com> Date: Fri, 21 Nov 2025 01:50:34 +0800 Subject: [PATCH 3/3] =?UTF-8?q?test(core):=E6=B7=BB=E5=8A=A0=E5=8D=95?= =?UTF-8?q?=E5=B8=83=E9=9A=86=E8=BF=87=E6=BB=A4=E5=99=A8=E5=A4=9A=E5=93=88?= =?UTF-8?q?=E5=B8=8C=E5=87=BD=E6=95=B0=E7=9A=84=E6=B5=8B=E8=AF=95=E7=94=A8?= =?UTF-8?q?=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../text/bloom/BitMapBloomFilterTest.java | 70 +++++++++++++++++-- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/hutool-core/src/test/java/cn/hutool/v7/core/text/bloom/BitMapBloomFilterTest.java b/hutool-core/src/test/java/cn/hutool/v7/core/text/bloom/BitMapBloomFilterTest.java index 2a06d7a897..a6851654f3 100644 --- a/hutool-core/src/test/java/cn/hutool/v7/core/text/bloom/BitMapBloomFilterTest.java +++ b/hutool-core/src/test/java/cn/hutool/v7/core/text/bloom/BitMapBloomFilterTest.java @@ -20,15 +20,14 @@ import cn.hutool.v7.core.codec.hash.HashUtil; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.util.BitSet; - public class BitMapBloomFilterTest { + private static final int SIZE = 2 * 1024 * 1024 * 8; + @Test public void filterTest() { - final int size = 2 * 1024 * 1024 * 8; - final CombinedBloomFilter filter = new CombinedBloomFilter(FuncFilter.of(size, HashUtil::rsHash)); + final CombinedBloomFilter filter = new CombinedBloomFilter(FuncFilter.of(SIZE, HashUtil::rsHash)); filter.add("123"); filter.add("abc"); filter.add("ddd"); @@ -37,5 +36,68 @@ public class BitMapBloomFilterTest { Assertions.assertTrue(filter.contains("ddd")); Assertions.assertTrue(filter.contains("123")); } + @Test + public void multiHashFuncTest() { + final FuncFilter filter = FuncFilter.of(SIZE, + HashUtil::rsHash, + HashUtil::jsHash, + HashUtil::pjwHash, + HashUtil::elfHash, + HashUtil::bkdrHash, + HashUtil::sdbmHash, + HashUtil::djbHash, + HashUtil::dekHash, + HashUtil::apHash, + HashUtil::javaDefaultHash + ); + filter.add("Hutool"); + filter.add("BloomFilter"); + filter.add("Java"); + + Assertions.assertTrue(filter.contains("Hutool")); + Assertions.assertTrue(filter.contains("BloomFilter")); + Assertions.assertTrue(filter.contains("Java")); + Assertions.assertFalse(filter.contains("Python")); + Assertions.assertFalse(filter.contains("Go")); + Assertions.assertFalse(filter.contains("hutool")); + } + + @Test + public void combinedMultiHashTest() { + FuncFilter multiHashFuncFilter = FuncFilter.of(SIZE, + HashUtil::bkdrHash, + HashUtil::apHash, + HashUtil::djbHash + ); + final CombinedBloomFilter filter = new CombinedBloomFilter(multiHashFuncFilter); + filter.add("123123WASD-WASD"); + Assertions.assertTrue(filter.contains("123123WASD-WASD")); + Assertions.assertFalse(filter.contains("123123WASD-WASD-false")); + } + + @Test + public void chineseStringWithThreeHashesTest() { + final FuncFilter filter = FuncFilter.of(SIZE, + HashUtil::bkdrHash, + HashUtil::apHash, + HashUtil::djbHash + ); + + String s1 = "你好世界"; + String s2 = "双亲委派"; + String s3 = "测试工程师"; + + filter.add(s1); + filter.add(s2); + filter.add(s3); + Assertions.assertTrue(filter.contains(s1), "应包含: " + s1); + Assertions.assertTrue(filter.contains(s2), "应包含: " + s2); + Assertions.assertTrue(filter.contains(s3), "应包含: " + s3); + Assertions.assertFalse(filter.contains("我好世界"), "多字"); + Assertions.assertFalse(filter.contains("父亲委派"), "改字"); + Assertions.assertFalse(filter.contains("测试"), "子串"); + Assertions.assertFalse(filter.contains(""), "空串"); + Assertions.assertFalse(filter.contains("👍"), "未添加的"); + } }