From 7e8fd7837778d1d7b114916ae05d5eafb67cfea6 Mon Sep 17 00:00:00 2001 From: LettuceLeaves <2878506229@qq.com> Date: Wed, 19 Nov 2025 00:01:56 +0800 Subject: [PATCH 1/4] =?UTF-8?q?test(core):=E6=B7=BB=E5=8A=A0=20StrFinder?= =?UTF-8?q?=20=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../v7/core/text/finder/StrFinderTest.java | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 hutool-core/src/test/java/cn/hutool/v7/core/text/finder/StrFinderTest.java diff --git a/hutool-core/src/test/java/cn/hutool/v7/core/text/finder/StrFinderTest.java b/hutool-core/src/test/java/cn/hutool/v7/core/text/finder/StrFinderTest.java new file mode 100644 index 0000000000..64cf009486 --- /dev/null +++ b/hutool-core/src/test/java/cn/hutool/v7/core/text/finder/StrFinderTest.java @@ -0,0 +1,153 @@ + +package cn.hutool.v7.core.text.finder; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * @author Lettuceleaves + */ +public class StrFinderTest { + + @Test + public void testForward() { + // 正向查找 + String text = "Hello Hutool World"; + StrFinder finder = StrFinder.of("Hutool", false); + finder.setText(text); + + Assertions.assertEquals(6, finder.start(0)); + Assertions.assertEquals(-1, finder.start(7)); + } + + @Test + public void testForwardIgnoreCase() { + // 正向查找,忽略大小写 + String text = "Hello HUTOOL World"; + StrFinder finder = StrFinder.of("hutool", true); + finder.setText(text); + + Assertions.assertEquals(6, finder.start(0)); + } + + @Test + public void testReverseBasic() { + // 反向查找 + String text = "abc abc abc"; + StrFinder finder = StrFinder.of("abc", false); + finder.setNegative(true); + finder.setText(text); + + Assertions.assertEquals(8, finder.start(text.length() - 1)); + Assertions.assertEquals(4, finder.start(7)); + Assertions.assertEquals(0, finder.start(2)); + } + + @Test + public void testReverseIgnoreCase() { + // 反向查找,忽略大小写 + String text = "ABC abc Abc"; + StrFinder finder = StrFinder.of("abc", true); + finder.setNegative(true); + finder.setText(text); + + Assertions.assertEquals(8, finder.start(text.length() - 1)); + } + +// @Test +// public void testAlgorithmEdgeCase() { +// // 5. 算法边界测试:验证 Sunday 算法偏移表构建是否正确 +// // 场景:模式串首尾字符重复 "aba" +// // 正向表应存靠右的 'a',反向表应存靠左的 'a' +// +// String text = "ababa"; +// // 索引: 01234 +// +// // --- 正向 --- +// StrFinder forward = StrFinder.of("aba", false); +// forward.setText(text); +// Assertions.assertEquals(0, forward.start(0)); +// // 从1开始找,应该跳过第一个a,匹配到索引2的aba +// Assertions.assertEquals(2, forward.start(1)); +// +// // --- 反向 --- +// StrFinder reverse = StrFinder.of("aba", false); +// reverse.setNegative(true); +// reverse.setText(text); +// Assertions.assertEquals(2, reverse.start(4)); +// Assertions.assertEquals(0, reverse.start(1)); +// } + + @Test + public void testZeroCopy() { + // 验证toString()优化 + StringBuilder bigText = new StringBuilder(); + bigText.append("ignore-".repeat(1000)); + bigText.append("TARGET"); + bigText.append("-ignore"); + + StrFinder finder = StrFinder.of("TARGET", false); + finder.setText(bigText); + + Assertions.assertEquals(7000, finder.start(0)); + } + + @Test + public void testChinese() { + // 中文测试 + String text = "希望pr能够通过"; + StrFinder finder = StrFinder.of("通过", false); + finder.setText(text); + + Assertions.assertEquals(6, finder.start(0)); + } + + @Test + public void testNotFound() { + // 不包含字符串 + StrFinder finder = StrFinder.of("NotExists", false); + finder.setText("Hello World"); + Assertions.assertEquals(-1, finder.start(0)); + } + + @Test + public void benchmark() { + System.out.println("正在生成测试数据..."); + StringBuilder sb = new StringBuilder(); + String base = "abcdefghijklmnopqrstuvwxyz0123456789-"; + sb.append(base.repeat(500)); + String target = "HUTOOL_TARGET"; + sb.append(target); + + String textStr = sb.toString(); + + int loop = 100; + for (int i = 0; i < 100; i++) { + StrFinder.of(target, false).setText(textStr).start(0); + } + + long start = System.currentTimeMillis(); + for (int i = 0; i < loop; i++) { + int index = textStr.indexOf(target); + if (index == -1) throw new RuntimeException("Bug!"); + } + long end = System.currentTimeMillis(); + System.out.println("1. JDK String.indexOf耗时: " + (end - start) + "ms"); + + start = System.currentTimeMillis(); + for (int i = 0; i < loop; i++) { + int index = StrFinder.of(target, false).setText(textStr).start(0); + if (index == -1) throw new RuntimeException("Bug!"); + } + end = System.currentTimeMillis(); + System.out.println("2. StrFinder (String) 耗时: " + (end - start) + "ms"); + + start = System.currentTimeMillis(); + for (int i = 0; i < loop; i++) { + int index = StrFinder.of(target, false).setText(sb).start(0); + if (index == -1) throw new RuntimeException("Bug!"); + } + end = System.currentTimeMillis(); + System.out.println("3. StrFinder (Builder)耗时: " + (end - start) + "ms"); + } +} From 1bba40a32f4e9c16fc3bd21ae6643b505e948e16 Mon Sep 17 00:00:00 2001 From: LettuceLeaves <2878506229@qq.com> Date: Wed, 19 Nov 2025 00:20:56 +0800 Subject: [PATCH 2/4] =?UTF-8?q?perf:=E4=BC=98=E5=8C=96isSubEquals=E6=96=B9?= =?UTF-8?q?=E6=B3=95=EF=BC=8C=E5=8E=BB=E9=99=A4toString()=E9=98=B2?= =?UTF-8?q?=E6=AD=A2=E6=8B=B7=E8=B4=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../hutool/v7/core/text/CharSequenceUtil.java | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/hutool-core/src/main/java/cn/hutool/v7/core/text/CharSequenceUtil.java b/hutool-core/src/main/java/cn/hutool/v7/core/text/CharSequenceUtil.java index e443dd4a02..ecf0fcdfb4 100644 --- a/hutool-core/src/main/java/cn/hutool/v7/core/text/CharSequenceUtil.java +++ b/hutool-core/src/main/java/cn/hutool/v7/core/text/CharSequenceUtil.java @@ -2416,8 +2416,35 @@ public class CharSequenceUtil extends StrValidator { if (null == str1 || null == str2) { return false; } + if (str1 instanceof String && str2 instanceof String) { + return ((String) str1).regionMatches(ignoreCase, offset1, (String) str2, offset2, length); + } + if (offset1 < 0 || offset2 < 0 || length < 0) { + return false; + } + if (str1.length() - offset1 < length || str2.length() - offset2 < length) { + return false; + } + for (int i = 0; i < length; i++) { + final char c1 = str1.charAt(offset1 + i); + final char c2 = str2.charAt(offset2 + i); + if (c1 == c2) { + continue; + } + if (ignoreCase) { + final char u1 = Character.toUpperCase(c1); + final char u2 = Character.toUpperCase(c2); + if (u1 == u2) { + continue; + } + if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { + continue; + } + } + return false; + } - return str1.toString().regionMatches(ignoreCase, offset1, str2.toString(), offset2, length); + return true; } // endregion From 3f15fdd44c05710ce016b3472aab6c2ef888e88e Mon Sep 17 00:00:00 2001 From: LettuceLeaves <2878506229@qq.com> Date: Wed, 19 Nov 2025 01:03:42 +0800 Subject: [PATCH 3/4] =?UTF-8?q?perf:=E4=BD=BF=E7=94=A8Sunday=E7=AE=97?= =?UTF-8?q?=E6=B3=95=EF=BC=8C=E4=BC=98=E5=8C=96=E5=AD=97=E4=B8=B2=E6=9F=A5?= =?UTF-8?q?=E8=AF=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../hutool/v7/core/text/CharSequenceUtil.java | 7 +- .../hutool/v7/core/text/finder/StrFinder.java | 95 +++++++++++++++++-- 2 files changed, 88 insertions(+), 14 deletions(-) diff --git a/hutool-core/src/main/java/cn/hutool/v7/core/text/CharSequenceUtil.java b/hutool-core/src/main/java/cn/hutool/v7/core/text/CharSequenceUtil.java index ecf0fcdfb4..a72f101c41 100644 --- a/hutool-core/src/main/java/cn/hutool/v7/core/text/CharSequenceUtil.java +++ b/hutool-core/src/main/java/cn/hutool/v7/core/text/CharSequenceUtil.java @@ -2432,14 +2432,11 @@ public class CharSequenceUtil extends StrValidator { continue; } if (ignoreCase) { - final char u1 = Character.toUpperCase(c1); - final char u2 = Character.toUpperCase(c2); + final char u1 = Character.toLowerCase(c1); + final char u2 = Character.toLowerCase(c2); if (u1 == u2) { continue; } - if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { - continue; - } } return false; } diff --git a/hutool-core/src/main/java/cn/hutool/v7/core/text/finder/StrFinder.java b/hutool-core/src/main/java/cn/hutool/v7/core/text/finder/StrFinder.java index 7c89619e32..f19b3a17df 100644 --- a/hutool-core/src/main/java/cn/hutool/v7/core/text/finder/StrFinder.java +++ b/hutool-core/src/main/java/cn/hutool/v7/core/text/finder/StrFinder.java @@ -20,6 +20,10 @@ import cn.hutool.v7.core.lang.Assert; import cn.hutool.v7.core.text.CharSequenceUtil; import java.io.Serial; +import java.util.HashMap; +import java.util.Map; + +import static cn.hutool.v7.core.text.CharSequenceUtil.isSubEquals; /** * 字符串查找器 @@ -44,6 +48,8 @@ public class StrFinder extends TextFinder { private final CharSequence strToFind; private final boolean caseInsensitive; + private Map forwardOffsetMap; + private Map reverseOffsetMap; /** * 构造 @@ -61,23 +67,54 @@ public class StrFinder extends TextFinder { public int start(int from) { Assert.notNull(this.text, "Text to find must be not null!"); final int subLen = strToFind.length(); + final int textLen = text.length(); - if (from < 0) { - from = 0; - } - int endLimit = getValidEndIndex(); + // 基于Sunday算法实现高效子串查询 if (negative) { - for (int i = from; i > endLimit; i--) { - if (CharSequenceUtil.isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) { + if (this.reverseOffsetMap == null) { + this.reverseOffsetMap = buildReverseOffsetMap(strToFind, caseInsensitive); + } + int maxIndex = textLen - subLen; + if (from > maxIndex) { + from = maxIndex; + } + int i = from; + while (i >= 0) { + if (isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) { return i; } + if (i - 1 < 0) { + break; + } + char preChar = text.charAt(i - 1); + int jump = reverseOffsetMap.getOrDefault( + caseInsensitive ? Character.toLowerCase(preChar) : preChar, + subLen + 1 + ); + i -= jump; } } else { - endLimit = endLimit - subLen + 1; - for (int i = from; i < endLimit; i++) { - if (CharSequenceUtil.isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) { + if (this.forwardOffsetMap == null) { + this.forwardOffsetMap = buildForwardOffsetMap(strToFind, caseInsensitive); + } + if (from < 0) { + from = 0; + } + int endLimit = textLen - subLen; + int i = from; + while (i <= endLimit) { + if (isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) { return i; } + if (i + subLen >= textLen) { + break; + } + char nextChar = text.charAt(i + subLen); + int jump = forwardOffsetMap.getOrDefault( + caseInsensitive ? Character.toLowerCase(nextChar) : nextChar, + subLen + 1 + ); + i += jump; } } @@ -91,4 +128,44 @@ public class StrFinder extends TextFinder { } return start + strToFind.length(); } + + /** + * 构建正向偏移表 + */ + private static Map buildForwardOffsetMap(CharSequence pattern, boolean caseInsensitive) { + int m = pattern.length(); + Map map = new HashMap<>(Math.min(m, 128)); + + for (int i = 0; i < m; i++) { + char c = pattern.charAt(i); + int jump = m - i; + + if (caseInsensitive) { + map.put(Character.toLowerCase(c), jump); + } else { + map.put(c, jump); + } + } + return map; + } + + /** + * 构建反向偏移表 + */ + private static Map buildReverseOffsetMap(CharSequence pattern, boolean caseInsensitive) { + int m = pattern.length(); + Map map = new HashMap<>(Math.min(m, 128)); + + for (int i = m - 1; i >= 0; i--) { + char c = pattern.charAt(i); + int jump = i + 1; + + if (caseInsensitive) { + map.put(Character.toLowerCase(c), jump); + } else { + map.put(c, jump); + } + } + return map; + } } From dbed4a4d0fba8de2b08bebcad2c9830911ecbd85 Mon Sep 17 00:00:00 2001 From: LettuceLeaves <2878506229@qq.com> Date: Wed, 19 Nov 2025 01:05:44 +0800 Subject: [PATCH 4/4] =?UTF-8?q?test(core):=E6=B7=BB=E5=8A=A0Sunday?= =?UTF-8?q?=E7=AE=97=E6=B3=95=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../v7/core/text/finder/StrFinderTest.java | 39 ++++++++----------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/hutool-core/src/test/java/cn/hutool/v7/core/text/finder/StrFinderTest.java b/hutool-core/src/test/java/cn/hutool/v7/core/text/finder/StrFinderTest.java index 64cf009486..b874db482f 100644 --- a/hutool-core/src/test/java/cn/hutool/v7/core/text/finder/StrFinderTest.java +++ b/hutool-core/src/test/java/cn/hutool/v7/core/text/finder/StrFinderTest.java @@ -54,29 +54,22 @@ public class StrFinderTest { Assertions.assertEquals(8, finder.start(text.length() - 1)); } -// @Test -// public void testAlgorithmEdgeCase() { -// // 5. 算法边界测试:验证 Sunday 算法偏移表构建是否正确 -// // 场景:模式串首尾字符重复 "aba" -// // 正向表应存靠右的 'a',反向表应存靠左的 'a' -// -// String text = "ababa"; -// // 索引: 01234 -// -// // --- 正向 --- -// StrFinder forward = StrFinder.of("aba", false); -// forward.setText(text); -// Assertions.assertEquals(0, forward.start(0)); -// // 从1开始找,应该跳过第一个a,匹配到索引2的aba -// Assertions.assertEquals(2, forward.start(1)); -// -// // --- 反向 --- -// StrFinder reverse = StrFinder.of("aba", false); -// reverse.setNegative(true); -// reverse.setText(text); -// Assertions.assertEquals(2, reverse.start(4)); -// Assertions.assertEquals(0, reverse.start(1)); -// } + @Test + public void testAlgorithmEdgeCase() { + + String text = "ababa"; + + StrFinder forward = StrFinder.of("aba", false); + forward.setText(text); + Assertions.assertEquals(0, forward.start(0)); + Assertions.assertEquals(2, forward.start(1)); + + StrFinder reverse = StrFinder.of("aba", false); + reverse.setNegative(true); + reverse.setText(text); + Assertions.assertEquals(2, reverse.start(4)); + Assertions.assertEquals(0, reverse.start(1)); + } @Test public void testZeroCopy() {