mirror of
https://gitee.com/dromara/hutool.git
synced 2026-02-09 09:16:26 +08:00
Merge pull request #4134 from Lettuceleaves/perf/optimize-str-finder
Perf/optimize str finder
This commit is contained in:
@@ -2416,8 +2416,32 @@ public class CharSequenceUtil extends StrValidator {
|
||||
if (null == str1 || null == str2) {
|
||||
return false;
|
||||
}
|
||||
if (str1 instanceof String && str2 instanceof String) {
|
||||
return ((String) str1).regionMatches(ignoreCase, offset1, (String) str2, offset2, length);
|
||||
}
|
||||
if (offset1 < 0 || offset2 < 0 || length < 0) {
|
||||
return false;
|
||||
}
|
||||
if (str1.length() - offset1 < length || str2.length() - offset2 < length) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < length; i++) {
|
||||
final char c1 = str1.charAt(offset1 + i);
|
||||
final char c2 = str2.charAt(offset2 + i);
|
||||
if (c1 == c2) {
|
||||
continue;
|
||||
}
|
||||
if (ignoreCase) {
|
||||
final char u1 = Character.toLowerCase(c1);
|
||||
final char u2 = Character.toLowerCase(c2);
|
||||
if (u1 == u2) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
return str1.toString().regionMatches(ignoreCase, offset1, str2.toString(), offset2, length);
|
||||
return true;
|
||||
}
|
||||
// endregion
|
||||
|
||||
|
||||
@@ -20,6 +20,10 @@ import cn.hutool.v7.core.lang.Assert;
|
||||
import cn.hutool.v7.core.text.CharSequenceUtil;
|
||||
|
||||
import java.io.Serial;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static cn.hutool.v7.core.text.CharSequenceUtil.isSubEquals;
|
||||
|
||||
/**
|
||||
* 字符串查找器
|
||||
@@ -44,6 +48,8 @@ public class StrFinder extends TextFinder {
|
||||
|
||||
private final CharSequence strToFind;
|
||||
private final boolean caseInsensitive;
|
||||
private Map<Character, Integer> forwardOffsetMap;
|
||||
private Map<Character, Integer> reverseOffsetMap;
|
||||
|
||||
/**
|
||||
* 构造
|
||||
@@ -61,23 +67,54 @@ public class StrFinder extends TextFinder {
|
||||
public int start(int from) {
|
||||
Assert.notNull(this.text, "Text to find must be not null!");
|
||||
final int subLen = strToFind.length();
|
||||
final int textLen = text.length();
|
||||
|
||||
if (from < 0) {
|
||||
from = 0;
|
||||
}
|
||||
int endLimit = getValidEndIndex();
|
||||
// 基于Sunday算法实现高效子串查询
|
||||
if (negative) {
|
||||
for (int i = from; i > endLimit; i--) {
|
||||
if (CharSequenceUtil.isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) {
|
||||
if (this.reverseOffsetMap == null) {
|
||||
this.reverseOffsetMap = buildReverseOffsetMap(strToFind, caseInsensitive);
|
||||
}
|
||||
int maxIndex = textLen - subLen;
|
||||
if (from > maxIndex) {
|
||||
from = maxIndex;
|
||||
}
|
||||
int i = from;
|
||||
while (i >= 0) {
|
||||
if (isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) {
|
||||
return i;
|
||||
}
|
||||
if (i - 1 < 0) {
|
||||
break;
|
||||
}
|
||||
char preChar = text.charAt(i - 1);
|
||||
int jump = reverseOffsetMap.getOrDefault(
|
||||
caseInsensitive ? Character.toLowerCase(preChar) : preChar,
|
||||
subLen + 1
|
||||
);
|
||||
i -= jump;
|
||||
}
|
||||
} else {
|
||||
endLimit = endLimit - subLen + 1;
|
||||
for (int i = from; i < endLimit; i++) {
|
||||
if (CharSequenceUtil.isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) {
|
||||
if (this.forwardOffsetMap == null) {
|
||||
this.forwardOffsetMap = buildForwardOffsetMap(strToFind, caseInsensitive);
|
||||
}
|
||||
if (from < 0) {
|
||||
from = 0;
|
||||
}
|
||||
int endLimit = textLen - subLen;
|
||||
int i = from;
|
||||
while (i <= endLimit) {
|
||||
if (isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) {
|
||||
return i;
|
||||
}
|
||||
if (i + subLen >= textLen) {
|
||||
break;
|
||||
}
|
||||
char nextChar = text.charAt(i + subLen);
|
||||
int jump = forwardOffsetMap.getOrDefault(
|
||||
caseInsensitive ? Character.toLowerCase(nextChar) : nextChar,
|
||||
subLen + 1
|
||||
);
|
||||
i += jump;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,4 +128,44 @@ public class StrFinder extends TextFinder {
|
||||
}
|
||||
return start + strToFind.length();
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建正向偏移表
|
||||
*/
|
||||
private static Map<Character, Integer> buildForwardOffsetMap(CharSequence pattern, boolean caseInsensitive) {
|
||||
int m = pattern.length();
|
||||
Map<Character, Integer> map = new HashMap<>(Math.min(m, 128));
|
||||
|
||||
for (int i = 0; i < m; i++) {
|
||||
char c = pattern.charAt(i);
|
||||
int jump = m - i;
|
||||
|
||||
if (caseInsensitive) {
|
||||
map.put(Character.toLowerCase(c), jump);
|
||||
} else {
|
||||
map.put(c, jump);
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建反向偏移表
|
||||
*/
|
||||
private static Map<Character, Integer> buildReverseOffsetMap(CharSequence pattern, boolean caseInsensitive) {
|
||||
int m = pattern.length();
|
||||
Map<Character, Integer> map = new HashMap<>(Math.min(m, 128));
|
||||
|
||||
for (int i = m - 1; i >= 0; i--) {
|
||||
char c = pattern.charAt(i);
|
||||
int jump = i + 1;
|
||||
|
||||
if (caseInsensitive) {
|
||||
map.put(Character.toLowerCase(c), jump);
|
||||
} else {
|
||||
map.put(c, jump);
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,146 @@
|
||||
|
||||
package cn.hutool.v7.core.text.finder;
|
||||
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
/**
|
||||
* @author Lettuceleaves
|
||||
*/
|
||||
public class StrFinderTest {
|
||||
|
||||
@Test
|
||||
public void testForward() {
|
||||
// 正向查找
|
||||
String text = "Hello Hutool World";
|
||||
StrFinder finder = StrFinder.of("Hutool", false);
|
||||
finder.setText(text);
|
||||
|
||||
Assertions.assertEquals(6, finder.start(0));
|
||||
Assertions.assertEquals(-1, finder.start(7));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForwardIgnoreCase() {
|
||||
// 正向查找,忽略大小写
|
||||
String text = "Hello HUTOOL World";
|
||||
StrFinder finder = StrFinder.of("hutool", true);
|
||||
finder.setText(text);
|
||||
|
||||
Assertions.assertEquals(6, finder.start(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReverseBasic() {
|
||||
// 反向查找
|
||||
String text = "abc abc abc";
|
||||
StrFinder finder = StrFinder.of("abc", false);
|
||||
finder.setNegative(true);
|
||||
finder.setText(text);
|
||||
|
||||
Assertions.assertEquals(8, finder.start(text.length() - 1));
|
||||
Assertions.assertEquals(4, finder.start(7));
|
||||
Assertions.assertEquals(0, finder.start(2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReverseIgnoreCase() {
|
||||
// 反向查找,忽略大小写
|
||||
String text = "ABC abc Abc";
|
||||
StrFinder finder = StrFinder.of("abc", true);
|
||||
finder.setNegative(true);
|
||||
finder.setText(text);
|
||||
|
||||
Assertions.assertEquals(8, finder.start(text.length() - 1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAlgorithmEdgeCase() {
|
||||
|
||||
String text = "ababa";
|
||||
|
||||
StrFinder forward = StrFinder.of("aba", false);
|
||||
forward.setText(text);
|
||||
Assertions.assertEquals(0, forward.start(0));
|
||||
Assertions.assertEquals(2, forward.start(1));
|
||||
|
||||
StrFinder reverse = StrFinder.of("aba", false);
|
||||
reverse.setNegative(true);
|
||||
reverse.setText(text);
|
||||
Assertions.assertEquals(2, reverse.start(4));
|
||||
Assertions.assertEquals(0, reverse.start(1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testZeroCopy() {
|
||||
// 验证toString()优化
|
||||
StringBuilder bigText = new StringBuilder();
|
||||
bigText.append("ignore-".repeat(1000));
|
||||
bigText.append("TARGET");
|
||||
bigText.append("-ignore");
|
||||
|
||||
StrFinder finder = StrFinder.of("TARGET", false);
|
||||
finder.setText(bigText);
|
||||
|
||||
Assertions.assertEquals(7000, finder.start(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testChinese() {
|
||||
// 中文测试
|
||||
String text = "希望pr能够通过";
|
||||
StrFinder finder = StrFinder.of("通过", false);
|
||||
finder.setText(text);
|
||||
|
||||
Assertions.assertEquals(6, finder.start(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNotFound() {
|
||||
// 不包含字符串
|
||||
StrFinder finder = StrFinder.of("NotExists", false);
|
||||
finder.setText("Hello World");
|
||||
Assertions.assertEquals(-1, finder.start(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void benchmark() {
|
||||
System.out.println("正在生成测试数据...");
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String base = "abcdefghijklmnopqrstuvwxyz0123456789-";
|
||||
sb.append(base.repeat(500));
|
||||
String target = "HUTOOL_TARGET";
|
||||
sb.append(target);
|
||||
|
||||
String textStr = sb.toString();
|
||||
|
||||
int loop = 100;
|
||||
for (int i = 0; i < 100; i++) {
|
||||
StrFinder.of(target, false).setText(textStr).start(0);
|
||||
}
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
for (int i = 0; i < loop; i++) {
|
||||
int index = textStr.indexOf(target);
|
||||
if (index == -1) throw new RuntimeException("Bug!");
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
System.out.println("1. JDK String.indexOf耗时: " + (end - start) + "ms");
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i = 0; i < loop; i++) {
|
||||
int index = StrFinder.of(target, false).setText(textStr).start(0);
|
||||
if (index == -1) throw new RuntimeException("Bug!");
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("2. StrFinder (String) 耗时: " + (end - start) + "ms");
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i = 0; i < loop; i++) {
|
||||
int index = StrFinder.of(target, false).setText(sb).start(0);
|
||||
if (index == -1) throw new RuntimeException("Bug!");
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("3. StrFinder (Builder)耗时: " + (end - start) + "ms");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user