mirror of
https://gitee.com/dromara/hutool.git
synced 2025-07-16 16:50:45 +08:00
fix:避免调用方 显示调用API 触发查找树 优化;并通过内置锁,避免因并行树优化 可能造成的不可预知结果 和 无效重复的 树优化操作
This commit is contained in:
parent
6cd998f648
commit
00e9af4ffb
@ -4,19 +4,33 @@ import java.util.*;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
*
|
|
||||||
* 基于非确定性有穷自动机(NFA) 实现的多模匹配工具
|
* 基于非确定性有穷自动机(NFA) 实现的多模匹配工具
|
||||||
|
* </p>
|
||||||
*
|
*
|
||||||
* @author renyp
|
* @author renyp
|
||||||
*/
|
*/
|
||||||
public class Automaton {
|
public class Automaton {
|
||||||
|
/**
|
||||||
|
* AC树的根节点
|
||||||
|
*/
|
||||||
private final Node root;
|
private final Node root;
|
||||||
|
/**
|
||||||
|
* 标记是否需要构建AC自动机,做树优化
|
||||||
|
*/
|
||||||
|
private volatile boolean needBuildAC;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 内置锁,防止并发场景,并行建AC树,造成不可预知结果
|
||||||
|
*/
|
||||||
|
private final Object lock;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 默认构造
|
* 默认构造
|
||||||
*/
|
*/
|
||||||
public Automaton() {
|
public Automaton() {
|
||||||
this.root = new Node();
|
this.root = new Node();
|
||||||
|
this.needBuildAC = true;
|
||||||
|
this.lock = new Object();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -35,6 +49,7 @@ public class Automaton {
|
|||||||
* @param word 添加的新词
|
* @param word 添加的新词
|
||||||
*/
|
*/
|
||||||
public void insert(String word) {
|
public void insert(String word) {
|
||||||
|
needBuildAC = true;
|
||||||
Node p = root;
|
Node p = root;
|
||||||
for (char curr : word.toCharArray()) {
|
for (char curr : word.toCharArray()) {
|
||||||
int ind = curr;
|
int ind = curr;
|
||||||
@ -61,7 +76,7 @@ public class Automaton {
|
|||||||
/**
|
/**
|
||||||
* 构建基于NFA模型的 AC自动机
|
* 构建基于NFA模型的 AC自动机
|
||||||
*/
|
*/
|
||||||
public void buildAc() {
|
private void buildAc() {
|
||||||
Queue<Node> queue = new LinkedList<>();
|
Queue<Node> queue = new LinkedList<>();
|
||||||
Node p = root;
|
Node p = root;
|
||||||
for (Integer key : p.next.keySet()) {
|
for (Integer key : p.next.keySet()) {
|
||||||
@ -86,6 +101,7 @@ public class Automaton {
|
|||||||
queue.offer(curr.next.get(key));
|
queue.offer(curr.next.get(key));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
needBuildAC = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -100,6 +116,14 @@ public class Automaton {
|
|||||||
* @param isDensityMatch 是否密集匹配
|
* @param isDensityMatch 是否密集匹配
|
||||||
*/
|
*/
|
||||||
public List<FoundWord> find(String text, boolean isDensityMatch) {
|
public List<FoundWord> find(String text, boolean isDensityMatch) {
|
||||||
|
// double check,防止重复无用的 buildAC
|
||||||
|
if (needBuildAC) {
|
||||||
|
synchronized (lock) {
|
||||||
|
if (needBuildAC) {
|
||||||
|
this.buildAc();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
List<FoundWord> ans = new ArrayList<>();
|
List<FoundWord> ans = new ArrayList<>();
|
||||||
Node p = root, k = null;
|
Node p = root, k = null;
|
||||||
for (int i = 0, len = text.length(); i < len; i++) {
|
for (int i = 0, len = text.length(); i < len; i++) {
|
||||||
@ -131,9 +155,21 @@ public class Automaton {
|
|||||||
|
|
||||||
private static class Node {
|
private static class Node {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 当前节点是否是一个单词的结尾
|
||||||
|
*/
|
||||||
boolean flag;
|
boolean flag;
|
||||||
|
/**
|
||||||
|
* 指向 当前节点匹配失败应该跳转的下个节点
|
||||||
|
*/
|
||||||
Node fail;
|
Node fail;
|
||||||
|
/**
|
||||||
|
* 以当前节点结尾的单词
|
||||||
|
*/
|
||||||
String str;
|
String str;
|
||||||
|
/**
|
||||||
|
* 当前节点的子节点
|
||||||
|
*/
|
||||||
Map<Integer, Node> next;
|
Map<Integer, Node> next;
|
||||||
|
|
||||||
public Node() {
|
public Node() {
|
||||||
|
@ -17,7 +17,7 @@ public class AutomatonTest extends TestCase {
|
|||||||
Automaton automaton = new Automaton();
|
Automaton automaton = new Automaton();
|
||||||
WordTree wordTree = new WordTree();
|
WordTree wordTree = new WordTree();
|
||||||
automaton.insert("say", "her", "he", "she", "shr");
|
automaton.insert("say", "her", "he", "she", "shr");
|
||||||
automaton.buildAc();
|
// automaton.buildAc();
|
||||||
wordTree.addWords("say", "her", "he", "she", "shr");
|
wordTree.addWords("say", "her", "he", "she", "shr");
|
||||||
|
|
||||||
StopWatch stopWatch = new StopWatch();
|
StopWatch stopWatch = new StopWatch();
|
||||||
@ -51,7 +51,7 @@ public class AutomatonTest extends TestCase {
|
|||||||
Automaton automaton = new Automaton();
|
Automaton automaton = new Automaton();
|
||||||
WordTree wordTree = new WordTree();
|
WordTree wordTree = new WordTree();
|
||||||
automaton.insert("say", "her", "he", "she", "shr");
|
automaton.insert("say", "her", "he", "she", "shr");
|
||||||
automaton.buildAc();
|
// automaton.buildAc();
|
||||||
wordTree.addWords("say", "her", "he", "she", "shr");
|
wordTree.addWords("say", "her", "he", "she", "shr");
|
||||||
|
|
||||||
StopWatch stopWatch = new StopWatch();
|
StopWatch stopWatch = new StopWatch();
|
||||||
@ -84,7 +84,7 @@ public class AutomatonTest extends TestCase {
|
|||||||
stopWatch.start("automaton_char_buid_find");
|
stopWatch.start("automaton_char_buid_find");
|
||||||
Automaton automatonLocal = new Automaton();
|
Automaton automatonLocal = new Automaton();
|
||||||
automatonLocal.insert("say", "her", "he", "she", "shr");
|
automatonLocal.insert("say", "her", "he", "she", "shr");
|
||||||
automatonLocal.buildAc();
|
// automatonLocal.buildAc();
|
||||||
List<FoundWord> ans1 = automatonLocal.find(input);
|
List<FoundWord> ans1 = automatonLocal.find(input);
|
||||||
stopWatch.stop();
|
stopWatch.stop();
|
||||||
assertEquals("she,he,her,say", ans1.stream().map(FoundWord::getWord).collect(Collectors.joining(",")));
|
assertEquals("she,he,her,say", ans1.stream().map(FoundWord::getWord).collect(Collectors.joining(",")));
|
||||||
@ -118,7 +118,7 @@ public class AutomatonTest extends TestCase {
|
|||||||
stopWatch.start("automaton_cn_build_find");
|
stopWatch.start("automaton_cn_build_find");
|
||||||
Automaton automatonLocal = new Automaton();
|
Automaton automatonLocal = new Automaton();
|
||||||
automatonLocal.insert("赵", "赵啊", "赵啊三");
|
automatonLocal.insert("赵", "赵啊", "赵啊三");
|
||||||
automatonLocal.buildAc();
|
// automatonLocal.buildAc();
|
||||||
|
|
||||||
final List<FoundWord> result = automatonLocal.find(input);
|
final List<FoundWord> result = automatonLocal.find(input);
|
||||||
stopWatch.stop();
|
stopWatch.stop();
|
||||||
@ -156,7 +156,7 @@ public class AutomatonTest extends TestCase {
|
|||||||
|
|
||||||
Automaton automatonLocal = new Automaton();
|
Automaton automatonLocal = new Automaton();
|
||||||
automatonLocal.insert("赵", "赵啊", "赵啊三");
|
automatonLocal.insert("赵", "赵啊", "赵啊三");
|
||||||
automatonLocal.buildAc();
|
// automatonLocal.buildAc();
|
||||||
|
|
||||||
stopWatch.start("automaton_cn_find");
|
stopWatch.start("automaton_cn_find");
|
||||||
final List<FoundWord> result = automatonLocal.find(input);
|
final List<FoundWord> result = automatonLocal.find(input);
|
||||||
@ -196,7 +196,7 @@ public class AutomatonTest extends TestCase {
|
|||||||
|
|
||||||
Automaton automatonLocal = new Automaton();
|
Automaton automatonLocal = new Automaton();
|
||||||
automatonLocal.insert("赵", "赵啊", "赵啊三");
|
automatonLocal.insert("赵", "赵啊", "赵啊三");
|
||||||
automatonLocal.buildAc();
|
// automatonLocal.buildAc();
|
||||||
|
|
||||||
stopWatch.start("automaton_cn_find_not_density");
|
stopWatch.start("automaton_cn_find_not_density");
|
||||||
final List<FoundWord> result = automatonLocal.find(input, false);
|
final List<FoundWord> result = automatonLocal.find(input, false);
|
||||||
|
Loading…
Reference in New Issue
Block a user