mirror of
https://gitee.com/dromara/hutool.git
synced 2025-11-24 08:33:22 +08:00
fix(dfa): 修复WordTree.addWord在关键词以停顿词结尾时词尾标记错误的问题
- 修复:引入 lastAcceptedChar 变量,确保 setEnd 使用最后一个合法字符 - 新增单元测试验证修复效果 Fixes #4091
This commit is contained in:
@@ -102,23 +102,22 @@ public class WordTree extends HashMap<Character, WordTree> {
|
||||
WordTree parent = null;
|
||||
WordTree current = this;
|
||||
WordTree child;
|
||||
char currentChar = 0;
|
||||
Character lastAcceptedChar = null;
|
||||
|
||||
final int length = word.length();
|
||||
for (int i = 0; i < length; i++) {
|
||||
currentChar = word.charAt(i);
|
||||
char currentChar = word.charAt(i);
|
||||
if (charFilter.accept(currentChar)) {//只处理合法字符
|
||||
child = current.get(currentChar);
|
||||
if (child == null) {
|
||||
//无子类,新建一个子节点后存放下一个字符
|
||||
child = new WordTree();
|
||||
current.put(currentChar, child);
|
||||
}
|
||||
child = current.computeIfAbsent(currentChar, c -> new WordTree());
|
||||
parent = current;
|
||||
current = child;
|
||||
lastAcceptedChar = currentChar;
|
||||
}
|
||||
}
|
||||
// 仅当存在父节点且存在非停顿词时,才设置词尾标记
|
||||
// 当 null != parent 条件成立时,lastAcceptedChar != null 必然成立,故也可以省去
|
||||
if (null != parent) {
|
||||
parent.setEnd(currentChar);
|
||||
parent.setEnd(lastAcceptedChar);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -116,6 +116,34 @@ public class DfaTest {
|
||||
assertEquals(all, CollUtil.newArrayList("t-io"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Github Issue #4091
|
||||
* 测试当关键词以停顿词(如括号)结尾时,其合法前缀是否能被正确匹配
|
||||
*/
|
||||
@Test
|
||||
public void addWordWithTrailingFilteredCharTest() {
|
||||
WordTree tree = new WordTree();
|
||||
tree.addWord("hello "); // 以被过滤字符结尾
|
||||
|
||||
List<String> matches = tree.matchAll("hello", -1);
|
||||
assertEquals(1, matches.size());
|
||||
assertEquals("hello", matches.get(0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Github Issue #4091
|
||||
* 测试关键词中间包含停顿词的情况
|
||||
*/
|
||||
@Test
|
||||
public void addWordWithMiddleFilteredCharTest() {
|
||||
WordTree tree = new WordTree();
|
||||
tree.addWord("he llo"); // 中间 '(' 被过滤
|
||||
|
||||
List<String> matches = tree.matchAll("hello", -1);
|
||||
assertEquals(1, matches.size());
|
||||
assertEquals("hello", matches.get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void aTest(){
|
||||
WordTree tree = new WordTree();
|
||||
|
||||
Reference in New Issue
Block a user