diff --git a/hutool-core/src/main/java/cn/hutool/v7/core/text/CharSequenceUtil.java b/hutool-core/src/main/java/cn/hutool/v7/core/text/CharSequenceUtil.java index e443dd4a02..a72f101c41 100644 --- a/hutool-core/src/main/java/cn/hutool/v7/core/text/CharSequenceUtil.java +++ b/hutool-core/src/main/java/cn/hutool/v7/core/text/CharSequenceUtil.java @@ -2416,8 +2416,32 @@ public class CharSequenceUtil extends StrValidator { if (null == str1 || null == str2) { return false; } + if (str1 instanceof String && str2 instanceof String) { + return ((String) str1).regionMatches(ignoreCase, offset1, (String) str2, offset2, length); + } + if (offset1 < 0 || offset2 < 0 || length < 0) { + return false; + } + if (str1.length() - offset1 < length || str2.length() - offset2 < length) { + return false; + } + for (int i = 0; i < length; i++) { + final char c1 = str1.charAt(offset1 + i); + final char c2 = str2.charAt(offset2 + i); + if (c1 == c2) { + continue; + } + if (ignoreCase) { + final char u1 = Character.toLowerCase(c1); + final char u2 = Character.toLowerCase(c2); + if (u1 == u2) { + continue; + } + } + return false; + } - return str1.toString().regionMatches(ignoreCase, offset1, str2.toString(), offset2, length); + return true; } // endregion diff --git a/hutool-core/src/main/java/cn/hutool/v7/core/text/finder/StrFinder.java b/hutool-core/src/main/java/cn/hutool/v7/core/text/finder/StrFinder.java index 7c89619e32..f19b3a17df 100644 --- a/hutool-core/src/main/java/cn/hutool/v7/core/text/finder/StrFinder.java +++ b/hutool-core/src/main/java/cn/hutool/v7/core/text/finder/StrFinder.java @@ -20,6 +20,10 @@ import cn.hutool.v7.core.lang.Assert; import cn.hutool.v7.core.text.CharSequenceUtil; import java.io.Serial; +import java.util.HashMap; +import java.util.Map; + +import static cn.hutool.v7.core.text.CharSequenceUtil.isSubEquals; /** * 字符串查找器 @@ -44,6 +48,8 @@ public class StrFinder extends TextFinder { private final CharSequence strToFind; private final boolean caseInsensitive; + private Map forwardOffsetMap; + private Map reverseOffsetMap; /** * 构造 @@ -61,23 +67,54 @@ public class StrFinder extends TextFinder { public int start(int from) { Assert.notNull(this.text, "Text to find must be not null!"); final int subLen = strToFind.length(); + final int textLen = text.length(); - if (from < 0) { - from = 0; - } - int endLimit = getValidEndIndex(); + // 基于Sunday算法实现高效子串查询 if (negative) { - for (int i = from; i > endLimit; i--) { - if (CharSequenceUtil.isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) { + if (this.reverseOffsetMap == null) { + this.reverseOffsetMap = buildReverseOffsetMap(strToFind, caseInsensitive); + } + int maxIndex = textLen - subLen; + if (from > maxIndex) { + from = maxIndex; + } + int i = from; + while (i >= 0) { + if (isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) { return i; } + if (i - 1 < 0) { + break; + } + char preChar = text.charAt(i - 1); + int jump = reverseOffsetMap.getOrDefault( + caseInsensitive ? Character.toLowerCase(preChar) : preChar, + subLen + 1 + ); + i -= jump; } } else { - endLimit = endLimit - subLen + 1; - for (int i = from; i < endLimit; i++) { - if (CharSequenceUtil.isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) { + if (this.forwardOffsetMap == null) { + this.forwardOffsetMap = buildForwardOffsetMap(strToFind, caseInsensitive); + } + if (from < 0) { + from = 0; + } + int endLimit = textLen - subLen; + int i = from; + while (i <= endLimit) { + if (isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) { return i; } + if (i + subLen >= textLen) { + break; + } + char nextChar = text.charAt(i + subLen); + int jump = forwardOffsetMap.getOrDefault( + caseInsensitive ? Character.toLowerCase(nextChar) : nextChar, + subLen + 1 + ); + i += jump; } } @@ -91,4 +128,44 @@ public class StrFinder extends TextFinder { } return start + strToFind.length(); } + + /** + * 构建正向偏移表 + */ + private static Map buildForwardOffsetMap(CharSequence pattern, boolean caseInsensitive) { + int m = pattern.length(); + Map map = new HashMap<>(Math.min(m, 128)); + + for (int i = 0; i < m; i++) { + char c = pattern.charAt(i); + int jump = m - i; + + if (caseInsensitive) { + map.put(Character.toLowerCase(c), jump); + } else { + map.put(c, jump); + } + } + return map; + } + + /** + * 构建反向偏移表 + */ + private static Map buildReverseOffsetMap(CharSequence pattern, boolean caseInsensitive) { + int m = pattern.length(); + Map map = new HashMap<>(Math.min(m, 128)); + + for (int i = m - 1; i >= 0; i--) { + char c = pattern.charAt(i); + int jump = i + 1; + + if (caseInsensitive) { + map.put(Character.toLowerCase(c), jump); + } else { + map.put(c, jump); + } + } + return map; + } } diff --git a/hutool-core/src/test/java/cn/hutool/v7/core/text/finder/StrFinderTest.java b/hutool-core/src/test/java/cn/hutool/v7/core/text/finder/StrFinderTest.java new file mode 100644 index 0000000000..b874db482f --- /dev/null +++ b/hutool-core/src/test/java/cn/hutool/v7/core/text/finder/StrFinderTest.java @@ -0,0 +1,146 @@ + +package cn.hutool.v7.core.text.finder; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * @author Lettuceleaves + */ +public class StrFinderTest { + + @Test + public void testForward() { + // 正向查找 + String text = "Hello Hutool World"; + StrFinder finder = StrFinder.of("Hutool", false); + finder.setText(text); + + Assertions.assertEquals(6, finder.start(0)); + Assertions.assertEquals(-1, finder.start(7)); + } + + @Test + public void testForwardIgnoreCase() { + // 正向查找,忽略大小写 + String text = "Hello HUTOOL World"; + StrFinder finder = StrFinder.of("hutool", true); + finder.setText(text); + + Assertions.assertEquals(6, finder.start(0)); + } + + @Test + public void testReverseBasic() { + // 反向查找 + String text = "abc abc abc"; + StrFinder finder = StrFinder.of("abc", false); + finder.setNegative(true); + finder.setText(text); + + Assertions.assertEquals(8, finder.start(text.length() - 1)); + Assertions.assertEquals(4, finder.start(7)); + Assertions.assertEquals(0, finder.start(2)); + } + + @Test + public void testReverseIgnoreCase() { + // 反向查找,忽略大小写 + String text = "ABC abc Abc"; + StrFinder finder = StrFinder.of("abc", true); + finder.setNegative(true); + finder.setText(text); + + Assertions.assertEquals(8, finder.start(text.length() - 1)); + } + + @Test + public void testAlgorithmEdgeCase() { + + String text = "ababa"; + + StrFinder forward = StrFinder.of("aba", false); + forward.setText(text); + Assertions.assertEquals(0, forward.start(0)); + Assertions.assertEquals(2, forward.start(1)); + + StrFinder reverse = StrFinder.of("aba", false); + reverse.setNegative(true); + reverse.setText(text); + Assertions.assertEquals(2, reverse.start(4)); + Assertions.assertEquals(0, reverse.start(1)); + } + + @Test + public void testZeroCopy() { + // 验证toString()优化 + StringBuilder bigText = new StringBuilder(); + bigText.append("ignore-".repeat(1000)); + bigText.append("TARGET"); + bigText.append("-ignore"); + + StrFinder finder = StrFinder.of("TARGET", false); + finder.setText(bigText); + + Assertions.assertEquals(7000, finder.start(0)); + } + + @Test + public void testChinese() { + // 中文测试 + String text = "希望pr能够通过"; + StrFinder finder = StrFinder.of("通过", false); + finder.setText(text); + + Assertions.assertEquals(6, finder.start(0)); + } + + @Test + public void testNotFound() { + // 不包含字符串 + StrFinder finder = StrFinder.of("NotExists", false); + finder.setText("Hello World"); + Assertions.assertEquals(-1, finder.start(0)); + } + + @Test + public void benchmark() { + System.out.println("正在生成测试数据..."); + StringBuilder sb = new StringBuilder(); + String base = "abcdefghijklmnopqrstuvwxyz0123456789-"; + sb.append(base.repeat(500)); + String target = "HUTOOL_TARGET"; + sb.append(target); + + String textStr = sb.toString(); + + int loop = 100; + for (int i = 0; i < 100; i++) { + StrFinder.of(target, false).setText(textStr).start(0); + } + + long start = System.currentTimeMillis(); + for (int i = 0; i < loop; i++) { + int index = textStr.indexOf(target); + if (index == -1) throw new RuntimeException("Bug!"); + } + long end = System.currentTimeMillis(); + System.out.println("1. JDK String.indexOf耗时: " + (end - start) + "ms"); + + start = System.currentTimeMillis(); + for (int i = 0; i < loop; i++) { + int index = StrFinder.of(target, false).setText(textStr).start(0); + if (index == -1) throw new RuntimeException("Bug!"); + } + end = System.currentTimeMillis(); + System.out.println("2. StrFinder (String) 耗时: " + (end - start) + "ms"); + + start = System.currentTimeMillis(); + for (int i = 0; i < loop; i++) { + int index = StrFinder.of(target, false).setText(sb).start(0); + if (index == -1) throw new RuntimeException("Bug!"); + } + end = System.currentTimeMillis(); + System.out.println("3. StrFinder (Builder)耗时: " + (end - start) + "ms"); + } +}