Merge branch 'v7-dev' of github.com:chinabugotech/hutool into v7-dev

This commit is contained in:
Looly
2025-11-19 22:31:42 +08:00
3 changed files with 257 additions and 10 deletions

View File

@@ -2416,8 +2416,32 @@ public class CharSequenceUtil extends StrValidator {
if (null == str1 || null == str2) {
return false;
}
if (str1 instanceof String && str2 instanceof String) {
return ((String) str1).regionMatches(ignoreCase, offset1, (String) str2, offset2, length);
}
if (offset1 < 0 || offset2 < 0 || length < 0) {
return false;
}
if (str1.length() - offset1 < length || str2.length() - offset2 < length) {
return false;
}
for (int i = 0; i < length; i++) {
final char c1 = str1.charAt(offset1 + i);
final char c2 = str2.charAt(offset2 + i);
if (c1 == c2) {
continue;
}
if (ignoreCase) {
final char u1 = Character.toLowerCase(c1);
final char u2 = Character.toLowerCase(c2);
if (u1 == u2) {
continue;
}
}
return false;
}
return str1.toString().regionMatches(ignoreCase, offset1, str2.toString(), offset2, length);
return true;
}
// endregion

View File

@@ -20,6 +20,10 @@ import cn.hutool.v7.core.lang.Assert;
import cn.hutool.v7.core.text.CharSequenceUtil;
import java.io.Serial;
import java.util.HashMap;
import java.util.Map;
import static cn.hutool.v7.core.text.CharSequenceUtil.isSubEquals;
/**
* 字符串查找器
@@ -44,6 +48,8 @@ public class StrFinder extends TextFinder {
private final CharSequence strToFind;
private final boolean caseInsensitive;
private Map<Character, Integer> forwardOffsetMap;
private Map<Character, Integer> reverseOffsetMap;
/**
* 构造
@@ -61,23 +67,54 @@ public class StrFinder extends TextFinder {
public int start(int from) {
Assert.notNull(this.text, "Text to find must be not null!");
final int subLen = strToFind.length();
final int textLen = text.length();
if (from < 0) {
from = 0;
}
int endLimit = getValidEndIndex();
// 基于Sunday算法实现高效子串查询
if (negative) {
for (int i = from; i > endLimit; i--) {
if (CharSequenceUtil.isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) {
if (this.reverseOffsetMap == null) {
this.reverseOffsetMap = buildReverseOffsetMap(strToFind, caseInsensitive);
}
int maxIndex = textLen - subLen;
if (from > maxIndex) {
from = maxIndex;
}
int i = from;
while (i >= 0) {
if (isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) {
return i;
}
if (i - 1 < 0) {
break;
}
char preChar = text.charAt(i - 1);
int jump = reverseOffsetMap.getOrDefault(
caseInsensitive ? Character.toLowerCase(preChar) : preChar,
subLen + 1
);
i -= jump;
}
} else {
endLimit = endLimit - subLen + 1;
for (int i = from; i < endLimit; i++) {
if (CharSequenceUtil.isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) {
if (this.forwardOffsetMap == null) {
this.forwardOffsetMap = buildForwardOffsetMap(strToFind, caseInsensitive);
}
if (from < 0) {
from = 0;
}
int endLimit = textLen - subLen;
int i = from;
while (i <= endLimit) {
if (isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) {
return i;
}
if (i + subLen >= textLen) {
break;
}
char nextChar = text.charAt(i + subLen);
int jump = forwardOffsetMap.getOrDefault(
caseInsensitive ? Character.toLowerCase(nextChar) : nextChar,
subLen + 1
);
i += jump;
}
}
@@ -91,4 +128,44 @@ public class StrFinder extends TextFinder {
}
return start + strToFind.length();
}
/**
* 构建正向偏移表
*/
private static Map<Character, Integer> buildForwardOffsetMap(CharSequence pattern, boolean caseInsensitive) {
int m = pattern.length();
Map<Character, Integer> map = new HashMap<>(Math.min(m, 128));
for (int i = 0; i < m; i++) {
char c = pattern.charAt(i);
int jump = m - i;
if (caseInsensitive) {
map.put(Character.toLowerCase(c), jump);
} else {
map.put(c, jump);
}
}
return map;
}
/**
* 构建反向偏移表
*/
private static Map<Character, Integer> buildReverseOffsetMap(CharSequence pattern, boolean caseInsensitive) {
int m = pattern.length();
Map<Character, Integer> map = new HashMap<>(Math.min(m, 128));
for (int i = m - 1; i >= 0; i--) {
char c = pattern.charAt(i);
int jump = i + 1;
if (caseInsensitive) {
map.put(Character.toLowerCase(c), jump);
} else {
map.put(c, jump);
}
}
return map;
}
}

View File

@@ -0,0 +1,146 @@
package cn.hutool.v7.core.text.finder;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
/**
* @author Lettuceleaves
*/
public class StrFinderTest {
@Test
public void testForward() {
// 正向查找
String text = "Hello Hutool World";
StrFinder finder = StrFinder.of("Hutool", false);
finder.setText(text);
Assertions.assertEquals(6, finder.start(0));
Assertions.assertEquals(-1, finder.start(7));
}
@Test
public void testForwardIgnoreCase() {
// 正向查找,忽略大小写
String text = "Hello HUTOOL World";
StrFinder finder = StrFinder.of("hutool", true);
finder.setText(text);
Assertions.assertEquals(6, finder.start(0));
}
@Test
public void testReverseBasic() {
// 反向查找
String text = "abc abc abc";
StrFinder finder = StrFinder.of("abc", false);
finder.setNegative(true);
finder.setText(text);
Assertions.assertEquals(8, finder.start(text.length() - 1));
Assertions.assertEquals(4, finder.start(7));
Assertions.assertEquals(0, finder.start(2));
}
@Test
public void testReverseIgnoreCase() {
// 反向查找,忽略大小写
String text = "ABC abc Abc";
StrFinder finder = StrFinder.of("abc", true);
finder.setNegative(true);
finder.setText(text);
Assertions.assertEquals(8, finder.start(text.length() - 1));
}
@Test
public void testAlgorithmEdgeCase() {
String text = "ababa";
StrFinder forward = StrFinder.of("aba", false);
forward.setText(text);
Assertions.assertEquals(0, forward.start(0));
Assertions.assertEquals(2, forward.start(1));
StrFinder reverse = StrFinder.of("aba", false);
reverse.setNegative(true);
reverse.setText(text);
Assertions.assertEquals(2, reverse.start(4));
Assertions.assertEquals(0, reverse.start(1));
}
@Test
public void testZeroCopy() {
// 验证toString()优化
StringBuilder bigText = new StringBuilder();
bigText.append("ignore-".repeat(1000));
bigText.append("TARGET");
bigText.append("-ignore");
StrFinder finder = StrFinder.of("TARGET", false);
finder.setText(bigText);
Assertions.assertEquals(7000, finder.start(0));
}
@Test
public void testChinese() {
// 中文测试
String text = "希望pr能够通过";
StrFinder finder = StrFinder.of("通过", false);
finder.setText(text);
Assertions.assertEquals(6, finder.start(0));
}
@Test
public void testNotFound() {
// 不包含字符串
StrFinder finder = StrFinder.of("NotExists", false);
finder.setText("Hello World");
Assertions.assertEquals(-1, finder.start(0));
}
@Test
public void benchmark() {
System.out.println("正在生成测试数据...");
StringBuilder sb = new StringBuilder();
String base = "abcdefghijklmnopqrstuvwxyz0123456789-";
sb.append(base.repeat(500));
String target = "HUTOOL_TARGET";
sb.append(target);
String textStr = sb.toString();
int loop = 100;
for (int i = 0; i < 100; i++) {
StrFinder.of(target, false).setText(textStr).start(0);
}
long start = System.currentTimeMillis();
for (int i = 0; i < loop; i++) {
int index = textStr.indexOf(target);
if (index == -1) throw new RuntimeException("Bug!");
}
long end = System.currentTimeMillis();
System.out.println("1. JDK String.indexOf耗时: " + (end - start) + "ms");
start = System.currentTimeMillis();
for (int i = 0; i < loop; i++) {
int index = StrFinder.of(target, false).setText(textStr).start(0);
if (index == -1) throw new RuntimeException("Bug!");
}
end = System.currentTimeMillis();
System.out.println("2. StrFinder (String) 耗时: " + (end - start) + "ms");
start = System.currentTimeMillis();
for (int i = 0; i < loop; i++) {
int index = StrFinder.of(target, false).setText(sb).start(0);
if (index == -1) throw new RuntimeException("Bug!");
}
end = System.currentTimeMillis();
System.out.println("3. StrFinder (Builder)耗时: " + (end - start) + "ms");
}
}