fix(dfa): 修复WordTree.addWord在关键词以停顿词结尾时词尾标记错误的问题

- 修复:引入 lastAcceptedChar 变量,确保 setEnd 使用最后一个合法字符
- 新增单元测试验证修复效果

Fixes #4091
This commit is contained in:
kfkfka
2025-10-04 22:57:51 +08:00
parent e9a615691c
commit 08cc041a4d
2 changed files with 36 additions and 9 deletions

View File

@@ -102,23 +102,22 @@ public class WordTree extends HashMap<Character, WordTree> {
WordTree parent = null;
WordTree current = this;
WordTree child;
char currentChar = 0;
Character lastAcceptedChar = null;
final int length = word.length();
for (int i = 0; i < length; i++) {
currentChar = word.charAt(i);
char currentChar = word.charAt(i);
if (charFilter.accept(currentChar)) {//只处理合法字符
child = current.get(currentChar);
if (child == null) {
//无子类,新建一个子节点后存放下一个字符
child = new WordTree();
current.put(currentChar, child);
}
child = current.computeIfAbsent(currentChar, c -> new WordTree());
parent = current;
current = child;
lastAcceptedChar = currentChar;
}
}
// 仅当存在父节点且存在非停顿词时,才设置词尾标记
// 当 null != parent 条件成立时lastAcceptedChar != null 必然成立,故也可以省去
if (null != parent) {
parent.setEnd(currentChar);
parent.setEnd(lastAcceptedChar);
}
return this;
}

View File

@@ -116,6 +116,34 @@ public class DfaTest {
assertEquals(all, CollUtil.newArrayList("t-io"));
}
/**
* Github Issue #4091
* 测试当关键词以停顿词(如括号)结尾时,其合法前缀是否能被正确匹配
*/
@Test
public void addWordWithTrailingFilteredCharTest() {
WordTree tree = new WordTree();
tree.addWord("hello "); // 以被过滤字符结尾
List<String> matches = tree.matchAll("hello", -1);
assertEquals(1, matches.size());
assertEquals("hello", matches.get(0));
}
/**
* Github Issue #4091
* 测试关键词中间包含停顿词的情况
*/
@Test
public void addWordWithMiddleFilteredCharTest() {
WordTree tree = new WordTree();
tree.addWord("he llo"); // 中间 '(' 被过滤
List<String> matches = tree.matchAll("hello", -1);
assertEquals(1, matches.size());
assertEquals("hello", matches.get(0));
}
@Test
public void aTest(){
WordTree tree = new WordTree();