文本及富文本支持自定义规则脱敏

2025-06-28 13:34:09 +08:00 · 2025-06-09 17:21:42 +08:00 · 2025-06-09 17:21:42 +08:00 · 28d1a8cdff
commit 28d1a8cdff
parent 8b7d621cd7
6 changed files with 997 additions and 3 deletions
--- a/hutool-core/src/main/java/cn/hutool/v7/core/data/masking/MaskingUtil.java
+++ b/hutool-core/src/main/java/cn/hutool/v7/core/data/masking/MaskingUtil.java
@ -14,10 +14,8 @@
 * limitations under the License.
 */

-package cn.hutool.v7.core.data;
+package cn.hutool.v7.core.data.masking;

-import cn.hutool.v7.core.data.masking.MaskingManager;
-import cn.hutool.v7.core.data.masking.MaskingType;
 import cn.hutool.v7.core.text.StrUtil;

 /**
--- a/hutool-core/src/main/java/cn/hutool/v7/core/data/masking/RichTextMaskingProcessor.java
+++ b/hutool-core/src/main/java/cn/hutool/v7/core/data/masking/RichTextMaskingProcessor.java
@ -0,0 +1,286 @@
+package cn.hutool.v7.core.data.masking;
+
+import cn.hutool.v7.core.text.StrUtil;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * 富文本脱敏处理器，用于对富文本内容进行脱敏处理
+ *
+ * @author xjf
+ */
+public class RichTextMaskingProcessor {
+
+	/**
+	 * 脱敏规则列表
+	 */
+	private final List<RichTextMaskingRule> rules = new ArrayList<>();
+
+	/**
+	 * 是否保留HTML标签
+	 */
+	private boolean preserveHtmlTags = true;
+
+	/**
+	 * 构造函数
+	 */
+	public RichTextMaskingProcessor() {
+	}
+
+	/**
+	 * 构造函数
+	 *
+	 * @param preserveHtmlTags 是否保留HTML标签
+	 */
+	public RichTextMaskingProcessor(final boolean preserveHtmlTags) {
+		this.preserveHtmlTags = preserveHtmlTags;
+	}
+
+	/**
+	 * 添加脱敏规则
+	 *
+	 * @param rule 脱敏规则
+	 * @return this
+	 */
+	public RichTextMaskingProcessor addRule(final RichTextMaskingRule rule) {
+		this.rules.add(rule);
+		return this;
+	}
+
+	/**
+	 * 对文本内容进行脱敏处理
+	 *
+	 * @param text 文本内容
+	 * @return 脱敏后的文本
+	 */
+	public String mask(final String text) {
+		if (StrUtil.isBlank(text)) {
+			return text;
+		}
+
+		// 如果是HTML内容，则需要特殊处理
+		if (preserveHtmlTags && isHtmlContent(text)) {
+			return maskHtmlContent(text);
+		} else {
+			// 普通文本直接处理
+			return maskPlainText(text);
+		}
+	}
+
+	/**
+	 * 判断是否为HTML内容
+	 *
+	 * @param text 文本内容
+	 * @return 是否为HTML内容
+	 */
+	private boolean isHtmlContent(final String text) {
+		// 简单判断是否包含HTML标签
+		return text.contains("<") && text.contains(">") &&
+			(text.contains("</") || text.contains("/>"));
+	}
+
+	/**
+	 * 对HTML内容进行脱敏处理
+	 *
+	 * @param html HTML内容
+	 * @return 脱敏后的HTML
+	 */
+	private String maskHtmlContent(final String html) {
+		final StringBuilder result = new StringBuilder();
+		int lastIndex = 0;
+		boolean inTag = false;
+		String currentTag = null;
+
+		for (int i = 0; i < html.length(); i++) {
+			final char c = html.charAt(i);
+
+			if (c == '<') {
+				// 处理标签前的文本内容
+				if (!inTag && i > lastIndex) {
+					final String textContent = html.substring(lastIndex, i);
+					result.append(processTextContentWithContext(textContent, currentTag));
+				}
+
+				inTag = true;
+				lastIndex = i;
+
+				// 尝试获取当前标签名
+				int tagNameStart = i + 1;
+				if (tagNameStart < html.length()) {
+					// 跳过结束标签的斜杠
+					if (html.charAt(tagNameStart) == '/') {
+						tagNameStart++;
+					}
+
+					// 查找标签名结束位置
+					int tagNameEnd = html.indexOf(' ', tagNameStart);
+					if (tagNameEnd == -1) {
+						tagNameEnd = html.indexOf('>', tagNameStart);
+					}
+
+					if (tagNameEnd > tagNameStart) {
+						currentTag = html.substring(tagNameStart, tagNameEnd).toLowerCase();
+					}
+				}
+			} else if (c == '>' && inTag) {
+				inTag = false;
+				result.append(html, lastIndex, i + 1); // 保留标签
+				lastIndex = i + 1;
+			}
+		}
+
+		// 处理最后一部分
+		if (lastIndex < html.length()) {
+			if (inTag) {
+				// 如果还在标签内，直接添加剩余部分
+				result.append(html.substring(lastIndex));
+			} else {
+				// 处理最后的文本内容
+				final String textContent = html.substring(lastIndex);
+				result.append(processTextContentWithContext(textContent, currentTag));
+			}
+		}
+
+		return result.toString();
+	}
+
+	/**
+	 * 根据上下文处理文本内容
+	 *
+	 * @param text    文本内容
+	 * @param tagName 当前所在的标签名
+	 * @return 处理后的文本
+	 */
+	private String processTextContentWithContext(final String text, final String tagName) {
+		if (StrUtil.isBlank(text)) {
+			return text;
+		}
+
+		String result = text;
+
+		for (final RichTextMaskingRule rule : rules) {
+			// 检查是否需要根据标签进行过滤
+			if (tagName != null) {
+				// 如果设置了只包含特定标签且当前标签不在列表中，则跳过
+				if (!rule.getIncludeTags().isEmpty() && !rule.getIncludeTags().contains(tagName)) {
+					continue;
+				}
+
+				// 如果当前标签在排除列表中，则跳过
+				if (rule.getExcludeTags().contains(tagName)) {
+					continue;
+				}
+			}
+
+			// 应用脱敏规则
+			result = applyMaskingRule(result, rule);
+		}
+
+		return result;
+	}
+
+	/**
+	 * 对普通文本进行脱敏处理
+	 *
+	 * @param text 文本内容
+	 * @return 脱敏后的文本
+	 */
+	private String maskPlainText(final String text) {
+		String result = text;
+
+		for (final RichTextMaskingRule rule : rules) {
+			result = applyMaskingRule(result, rule);
+		}
+
+		return result;
+	}
+
+	/**
+	 * 应用脱敏规则
+	 *
+	 * @param text 文本内容
+	 * @param rule 脱敏规则
+	 * @return 脱敏后的文本
+	 */
+	private String applyMaskingRule(final String text, final RichTextMaskingRule rule) {
+		if (StrUtil.isBlank(text) || StrUtil.isBlank(rule.getPattern())) {
+			return text;
+		}
+
+		final Pattern pattern = Pattern.compile(rule.getPattern());
+		final Matcher matcher = pattern.matcher(text);
+
+		final StringBuilder sb = new StringBuilder();
+
+		while (matcher.find()) {
+			final String matched = matcher.group();
+			final String replacement = switch (rule.getMaskType()) {
+				case FULL ->
+					// 完全脱敏，用脱敏字符替换整个匹配内容
+					StrUtil.repeat(rule.getMaskChar(), matched.length());
+				case PARTIAL ->
+					// 部分脱敏，保留部分原始内容
+					partialMask(matched, rule.getPreserveLeft(), rule.getPreserveRight(), rule.getMaskChar());
+				case REPLACE ->
+					// 替换脱敏，用指定文本替换
+					rule.getReplacement();
+				default -> matched;
+			};
+
+			// 处理正则表达式中的特殊字符
+			matcher.appendReplacement(sb, Matcher.quoteReplacement(replacement));
+		}
+
+		matcher.appendTail(sb);
+
+		return sb.toString();
+	}
+
+	/**
+	 * 部分脱敏，保留部分原始内容
+	 *
+	 * @param text          原文本
+	 * @param preserveLeft  保留左侧字符数
+	 * @param preserveRight 保留右侧字符数
+	 * @param maskChar      脱敏字符
+	 * @return 脱敏后的文本
+	 */
+	private String partialMask(final String text, int preserveLeft, int preserveRight, final char maskChar) {
+		if (StrUtil.isBlank(text)) {
+			return text;
+		}
+
+		final int length = text.length();
+
+		// 调整保留字符数，确保不超过文本长度
+		preserveLeft = Math.min(preserveLeft, length);
+		preserveRight = Math.min(preserveRight, length - preserveLeft);
+
+		// 计算需要脱敏的字符数
+		final int maskLength = length - preserveLeft - preserveRight;
+
+		if (maskLength <= 0) {
+			return text;
+		}
+
+		final StringBuilder sb = new StringBuilder(length);
+
+		// 添加左侧保留的字符
+		if (preserveLeft > 0) {
+			sb.append(text, 0, preserveLeft);
+		}
+
+		// 添加脱敏字符
+		sb.append(StrUtil.repeat(maskChar, maskLength));
+
+		// 添加右侧保留的字符
+		if (preserveRight > 0) {
+			sb.append(text, length - preserveRight, length);
+		}
+
+		return sb.toString();
+	}
+}
--- a/hutool-core/src/main/java/cn/hutool/v7/core/data/masking/RichTextMaskingRule.java
+++ b/hutool-core/src/main/java/cn/hutool/v7/core/data/masking/RichTextMaskingRule.java
@ -0,0 +1,346 @@
+package cn.hutool.v7.core.data.masking;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * 富文本脱敏规则，用于配置如何对富文本内容进行脱敏处理
+ *
+ * @author xjf
+ */
+public class RichTextMaskingRule {
+
+	/**
+	 * 脱敏类型枚举
+	 */
+	public enum MaskType {
+		/**
+		 * 完全脱敏，将匹配的内容完全替换为指定字符
+		 */
+		FULL,
+
+		/**
+		 * 部分脱敏，保留部分原始内容
+		 */
+		PARTIAL,
+
+		/**
+		 * 替换脱敏，将匹配的内容替换为指定的替换文本
+		 */
+		REPLACE
+	}
+
+	/**
+	 * 规则名称
+	 */
+	private String name;
+
+	/**
+	 * 匹配模式（正则表达式）
+	 */
+	private String pattern;
+
+	/**
+	 * 脱敏类型
+	 */
+	private MaskType maskType;
+
+	/**
+	 * 替换内容
+	 */
+	private String replacement;
+
+	/**
+	 * 保留左侧字符数（用于PARTIAL类型）
+	 */
+	private int preserveLeft;
+
+	/**
+	 * 保留右侧字符数（用于PARTIAL类型）
+	 */
+	private int preserveRight;
+
+	/**
+	 * 脱敏字符
+	 */
+	private char maskChar = '*';
+
+	/**
+	 * 是否处理HTML标签内容
+	 */
+	private boolean processHtmlTags = false;
+
+	/**
+	 * 需要排除的HTML标签
+	 */
+	private Set<String> excludeTags = new HashSet<>();
+
+	/**
+	 * 仅处理指定的HTML标签
+	 */
+	private Set<String> includeTags = new HashSet<>();
+
+	/**
+	 * 构造函数
+	 */
+	public RichTextMaskingRule() {
+	}
+
+	/**
+	 * 构造函数
+	 *
+	 * @param name        规则名称
+	 * @param pattern     匹配模式（正则表达式）
+	 * @param maskType    脱敏类型
+	 * @param replacement 替换内容
+	 */
+	public RichTextMaskingRule(final String name, final String pattern, final MaskType maskType, final String replacement) {
+		this.name = name;
+		this.pattern = pattern;
+		this.maskType = maskType;
+		this.replacement = replacement;
+	}
+
+	/**
+	 * 构造函数，用于部分脱敏
+	 *
+	 * @param name          规则名称
+	 * @param pattern       匹配模式（正则表达式）
+	 * @param preserveLeft  保留左侧字符数
+	 * @param preserveRight 保留右侧字符数
+	 * @param maskChar      脱敏字符
+	 */
+	public RichTextMaskingRule(final String name, final String pattern, final int preserveLeft, final int preserveRight, final char maskChar) {
+		this.name = name;
+		this.pattern = pattern;
+		this.maskType = MaskType.PARTIAL;
+		this.preserveLeft = preserveLeft;
+		this.preserveRight = preserveRight;
+		this.maskChar = maskChar;
+	}
+
+	// Getter and Setter methods
+
+	/**
+	 * 获取规则名称
+	 *
+	 * @return 规则名称
+	 */
+	public String getName() {
+		return name;
+	}
+
+	/**
+	 * 设置规则名称
+	 *
+	 * @param name 名称
+	 * @return this
+	 */
+	public RichTextMaskingRule setName(final String name) {
+		this.name = name;
+		return this;
+	}
+
+	/**
+	 * 获取匹配模式（正则表达式）
+	 *
+	 * @return 匹配模式（正则表达式）
+	 */
+	public String getPattern() {
+		return pattern;
+	}
+
+	/**
+	 * 设置匹配模式（正则表达式）
+	 *
+	 * @param pattern 匹配模式（正则表达式）
+	 * @return this
+	 */
+	public RichTextMaskingRule setPattern(final String pattern) {
+		this.pattern = pattern;
+		return this;
+	}
+
+
+	/**
+	 * 获取脱敏类型
+	 *
+	 * @return 脱敏类型
+	 */
+	public MaskType getMaskType() {
+		return maskType;
+	}
+
+	/**
+	 * 设置脱敏类型
+	 *
+	 * @param maskType 脱敏类型
+	 * @return this
+	 */
+	public RichTextMaskingRule setMaskType(final MaskType maskType) {
+		this.maskType = maskType;
+		return this;
+	}
+
+	/**
+	 * 获取替换内容
+	 *
+	 * @return 替换内容
+	 */
+	public String getReplacement() {
+		return replacement;
+	}
+
+	/**
+	 * 设置替换内容
+	 *
+	 * @param replacement 替换内容
+	 * @return this
+	 */
+	public RichTextMaskingRule setReplacement(final String replacement) {
+		this.replacement = replacement;
+		return this;
+	}
+
+	/**
+	 * 获取保留左侧字符数
+	 *
+	 * @return 保留左侧字符数
+	 */
+	public int getPreserveLeft() {
+		return preserveLeft;
+	}
+
+	/**
+	 * 设置保留左侧字符数
+	 *
+	 * @param preserveLeft 保留左侧字符数
+	 * @return this
+	 */
+	public RichTextMaskingRule setPreserveLeft(final int preserveLeft) {
+		this.preserveLeft = preserveLeft;
+		return this;
+	}
+
+	/**
+	 * 获取保留右侧字符数
+	 *
+	 * @return 保留右侧字符数
+	 */
+	public int getPreserveRight() {
+		return preserveRight;
+	}
+
+	/**
+	 * 设置保留右侧字符数
+	 *
+	 * @param preserveRight 保留右侧字符数
+	 * @return this
+	 */
+	public RichTextMaskingRule setPreserveRight(final int preserveRight) {
+		this.preserveRight = preserveRight;
+		return this;
+	}
+
+	/**
+	 * 获取脱敏字符
+	 *
+	 * @return 脱敏字符
+	 */
+	public char getMaskChar() {
+		return maskChar;
+	}
+
+	/**
+	 * 设置脱敏字符
+	 *
+	 * @param maskChar 脱敏字符
+	 * @return this
+	 */
+	public RichTextMaskingRule setMaskChar(final char maskChar) {
+		this.maskChar = maskChar;
+		return this;
+	}
+
+	/**
+	 * 获取是否处理HTML标签内容
+	 *
+	 * @return 是否处理HTML标签内容
+	 */
+	public boolean isProcessHtmlTags() {
+		return processHtmlTags;
+	}
+
+	/**
+	 * 设置是否处理HTML标签内容
+	 *
+	 * @param processHtmlTags 是否处理HTML标签内容
+	 * @return this
+	 */
+	public RichTextMaskingRule setProcessHtmlTags(final boolean processHtmlTags) {
+		this.processHtmlTags = processHtmlTags;
+		return this;
+	}
+
+	/**
+	 * 获取需要排除的HTML标签
+	 *
+	 * @return 需要排除的HTML标签
+	 */
+	public Set<String> getExcludeTags() {
+		return excludeTags;
+	}
+
+	/**
+	 * 设置需要排除的HTML标签
+	 *
+	 * @param excludeTags 需要排除的HTML标签
+	 * @return this
+	 */
+	public RichTextMaskingRule setExcludeTags(final Set<String> excludeTags) {
+		this.excludeTags = excludeTags;
+		return this;
+	}
+
+	/**
+	 * 添加需要排除的HTML标签
+	 *
+	 * @param tag 需要排除的HTML标签
+	 * @return this
+	 */
+	public RichTextMaskingRule addExcludeTag(final String tag) {
+		this.excludeTags.add(tag.toLowerCase());
+		return this;
+	}
+
+	/**
+	 * 获取仅处理指定的HTML标签
+	 *
+	 * @return 仅处理指定的HTML标签
+	 */
+	public Set<String> getIncludeTags() {
+		return includeTags;
+	}
+
+	/**
+	 * 设置仅处理指定的HTML标签
+	 *
+	 * @param includeTags 仅处理指定的HTML标签
+	 * @return this
+	 */
+	public RichTextMaskingRule setIncludeTags(final Set<String> includeTags) {
+		this.includeTags = includeTags;
+		return this;
+	}
+
+	/**
+	 * 添加仅处理指定的HTML标签
+	 *
+	 * @param tag 仅处理指定的HTML标签
+	 * @return this
+	 */
+	public RichTextMaskingRule addIncludeTag(final String tag) {
+		this.includeTags.add(tag.toLowerCase());
+		return this;
+	}
+}
--- a/hutool-core/src/main/java/cn/hutool/v7/core/data/masking/RichTextMaskingUtil.java
+++ b/hutool-core/src/main/java/cn/hutool/v7/core/data/masking/RichTextMaskingUtil.java
@ -0,0 +1,159 @@
+package cn.hutool.v7.core.data.masking;
+
+/**
+ * 富文本脱敏工具类，提供对富文本内容的脱敏处理功能
+ *
+ * @author xjf
+ */
+public class RichTextMaskingUtil {
+
+	/**
+	 * 默认的富文本脱敏处理器
+	 */
+	private static final RichTextMaskingProcessor DEFAULT_PROCESSOR = createDefaultProcessor();
+
+	/**
+	 * 创建默认的富文本脱敏处理器
+	 *
+	 * @return 默认的富文本脱敏处理器
+	 */
+	private static RichTextMaskingProcessor createDefaultProcessor() {
+		final RichTextMaskingProcessor processor = new RichTextMaskingProcessor(true);
+
+		// 添加一些常用的脱敏规则
+
+		// 邮箱脱敏规则
+		processor.addRule(new RichTextMaskingRule(
+			"邮箱",
+			"[\\w.-]+@[\\w.-]+\\.\\w+",
+			RichTextMaskingRule.MaskType.PARTIAL,
+			"[邮箱已隐藏]")
+			.setPreserveLeft(1)
+			.setPreserveRight(0)
+			.setMaskChar('*'));
+
+		// 网址脱敏规则
+		processor.addRule(new RichTextMaskingRule(
+			"网址",
+			"https?://[\\w.-]+(?:/[\\w.-]*)*",
+			RichTextMaskingRule.MaskType.REPLACE,
+			"[网址已隐藏]"));
+
+		// 敏感词脱敏规则（示例）
+		processor.addRule(new RichTextMaskingRule(
+			"敏感词",
+			"(机密|绝密|内部资料|秘密|保密)",
+			RichTextMaskingRule.MaskType.FULL,
+			"***")
+			.setMaskChar('*'));
+
+		return processor;
+	}
+
+	/**
+	 * 对富文本内容进行脱敏处理
+	 *
+	 * @param text 富文本内容
+	 * @return 脱敏后的文本
+	 */
+	public static String mask(final String text) {
+		return DEFAULT_PROCESSOR.mask(text);
+	}
+
+	/**
+	 * 使用自定义处理器对富文本内容进行脱敏处理
+	 *
+	 * @param text      富文本内容
+	 * @param processor 自定义处理器
+	 * @return 脱敏后的文本
+	 */
+	public static String mask(final String text, final RichTextMaskingProcessor processor) {
+		return processor.mask(text);
+	}
+
+	/**
+	 * 创建一个新的富文本脱敏处理器
+	 *
+	 * @param preserveHtmlTags 是否保留HTML标签
+	 * @return 富文本脱敏处理器
+	 */
+	public static RichTextMaskingProcessor createProcessor(final boolean preserveHtmlTags) {
+		return new RichTextMaskingProcessor(preserveHtmlTags);
+	}
+
+	/**
+	 * 创建一个邮箱脱敏规则
+	 *
+	 * @return 邮箱脱敏规则
+	 */
+	public static RichTextMaskingRule createEmailRule() {
+		return new RichTextMaskingRule(
+			"邮箱",
+			"[\\w.-]+@[\\w.-]+\\.\\w+",
+			RichTextMaskingRule.MaskType.PARTIAL,
+			null)
+			.setPreserveLeft(1)
+			.setPreserveRight(0)
+			.setMaskChar('*');
+	}
+
+	/**
+	 * 创建一个网址脱敏规则
+	 *
+	 * @param replacement 替换文本
+	 * @return 网址脱敏规则
+	 */
+	public static RichTextMaskingRule createUrlRule(final String replacement) {
+		return new RichTextMaskingRule(
+			"网址",
+			"https?://[\\w.-]+(?:/[\\w.-]*)*",
+			RichTextMaskingRule.MaskType.REPLACE,
+			replacement);
+	}
+
+	/**
+	 * 创建一个敏感词脱敏规则
+	 *
+	 * @param pattern 敏感词正则表达式
+	 * @return 敏感词脱敏规则
+	 */
+	public static RichTextMaskingRule createSensitiveWordRule(final String pattern) {
+		return new RichTextMaskingRule(
+			"敏感词",
+			pattern,
+			RichTextMaskingRule.MaskType.FULL,
+			null)
+			.setMaskChar('*');
+	}
+
+	/**
+	 * 创建一个自定义脱敏规则
+	 *
+	 * @param name        规则名称
+	 * @param pattern     匹配模式（正则表达式）
+	 * @param maskType    脱敏类型
+	 * @param replacement 替换内容
+	 * @return 自定义脱敏规则
+	 */
+	public static RichTextMaskingRule createCustomRule(final String name, final String pattern,
+													   final RichTextMaskingRule.MaskType maskType,
+													   final String replacement) {
+		return new RichTextMaskingRule(name, pattern, maskType, replacement);
+	}
+
+	/**
+	 * 创建一个部分脱敏规则
+	 *
+	 * @param name          规则名称
+	 * @param pattern       匹配模式（正则表达式）
+	 * @param preserveLeft  保留左侧字符数
+	 * @param preserveRight 保留右侧字符数
+	 * @param maskChar      脱敏字符
+	 * @return 部分脱敏规则
+	 */
+	public static RichTextMaskingRule createPartialMaskRule(final String name, final String pattern,
+															final int preserveLeft, final int preserveRight,
+															final char maskChar) {
+		return new RichTextMaskingRule(name, pattern, preserveLeft, preserveRight, maskChar);
+	}
+}
--- a/hutool-core/src/test/java/cn/hutool/v7/core/data/MaskingUtilTest.java
+++ b/hutool-core/src/test/java/cn/hutool/v7/core/data/MaskingUtilTest.java
@ -18,6 +18,7 @@ package cn.hutool.v7.core.data;

 import cn.hutool.v7.core.data.masking.MaskingManager;
 import cn.hutool.v7.core.data.masking.MaskingType;
+import cn.hutool.v7.core.data.masking.MaskingUtil;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;

--- a/hutool-core/src/test/java/cn/hutool/v7/core/masking/RichTextMaskingUtilTest.java
+++ b/hutool-core/src/test/java/cn/hutool/v7/core/masking/RichTextMaskingUtilTest.java
@ -0,0 +1,204 @@
+package cn.hutool.v7.core.masking;
+
+import cn.hutool.v7.core.data.masking.RichTextMaskingProcessor;
+import cn.hutool.v7.core.data.masking.RichTextMaskingRule;
+import cn.hutool.v7.core.data.masking.RichTextMaskingUtil;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * 富文本脱敏工具类测试
+ *
+ * @author xjf
+ */
+public class RichTextMaskingUtilTest {
+
+	@Test
+	public void testDefaultMask() {
+		// 测试默认脱敏功能
+		final String html = "这是一封邮件，联系人：test@example.com，网址：https://www.example.com，包含机密信息。";
+		final String masked = RichTextMaskingUtil.mask(html);
+
+		// 验证邮箱被脱敏
+		Assertions.assertFalse(masked.contains("test@example.com"));
+		Assertions.assertTrue(masked.contains("t***"));
+
+		// 验证网址被脱敏
+		Assertions.assertFalse(masked.contains("https://www.example.com"));
+		Assertions.assertTrue(masked.contains("[网址已隐藏]"));
+
+		// 验证敏感词被脱敏
+		Assertions.assertFalse(masked.contains("机密"));
+		Assertions.assertTrue(masked.contains("**"));
+	}
+
+	@Test
+	public void testHtmlContentMask() {
+		// 测试HTML内容脱敏
+		final String html = "<p>这是一封邮件，联系人：<a href='mailto:testA@example.com'>test@example.com</a>，" +
+			"网址：<a href='https://www.aexample.com'>https://www.example.com</a>，" +
+			"包含<span style='color:red'>机密</span>信息。</p>";
+		final String masked = RichTextMaskingUtil.mask(html);
+
+		// 验证HTML标签被保留
+		Assertions.assertTrue(masked.contains("<p>"));
+		Assertions.assertTrue(masked.contains("</p>"));
+		Assertions.assertTrue(masked.contains("<a href='mailto:"));
+		Assertions.assertTrue(masked.contains("<span style='color:red'>"));
+
+		// 验证邮箱被脱敏
+		Assertions.assertFalse(masked.contains("test@example.com"));
+		Assertions.assertTrue(masked.contains("t***"));
+
+		// 验证网址被脱敏
+		Assertions.assertFalse(masked.contains("https://www.example.com"));
+		Assertions.assertTrue(masked.contains("[网址已隐藏]"));
+
+		// 验证敏感词被脱敏
+		Assertions.assertFalse(masked.contains("机密"));
+		Assertions.assertTrue(masked.contains("**"));
+	}
+
+	@Test
+	public void testCustomProcessor() {
+		// 创建自定义处理器
+		final RichTextMaskingProcessor processor = RichTextMaskingUtil.createProcessor(true);
+
+		// 添加自定义规则 - 手机号码
+		processor.addRule(RichTextMaskingUtil.createPartialMaskRule(
+			"手机号",
+			"1[3-9]\\d{9}",
+			3,
+			4,
+			'*'));
+
+		// 添加自定义规则 - 公司名称
+		processor.addRule(RichTextMaskingUtil.createCustomRule(
+			"公司名称",
+			"XX科技有限公司",
+			RichTextMaskingRule.MaskType.REPLACE,
+			"[公司名称已隐藏]"));
+
+		// 测试文本
+		final String text = "联系电话：13812345678，公司名称：XX科技有限公司";
+		final String masked = RichTextMaskingUtil.mask(text, processor);
+
+		// 验证手机号被脱敏
+		Assertions.assertFalse(masked.contains("13812345678"));
+		Assertions.assertTrue(masked.contains("138****5678"));
+
+		// 验证公司名称被脱敏
+		Assertions.assertFalse(masked.contains("XX科技有限公司"));
+		Assertions.assertTrue(masked.contains("[公司名称已隐藏]"));
+	}
+
+	@Test
+	public void testTagFiltering() {
+		// 创建自定义处理器
+		final RichTextMaskingProcessor processor = RichTextMaskingUtil.createProcessor(true);
+
+		// 创建只在特定标签中生效的规则
+		final RichTextMaskingRule rule = RichTextMaskingUtil.createCustomRule(
+			"标签内敏感信息",
+			"敏感信息",
+			RichTextMaskingRule.MaskType.REPLACE,
+			"[已隐藏]");
+
+		// 设置只在div标签中生效
+		final Set<String> includeTags = new HashSet<>();
+		includeTags.add("div");
+		rule.setIncludeTags(includeTags);
+
+		processor.addRule(rule);
+
+		// 测试HTML
+		final String html = "<p>这是一段敏感信息</p><div>这也是一段敏感信息</div>";
+		final String masked = RichTextMaskingUtil.mask(html, processor);
+
+		// 验证只有div标签中的敏感信息被脱敏
+		Assertions.assertTrue(masked.contains("<p>这是一段敏感信息</p>"));
+		Assertions.assertTrue(masked.contains("<div>这也是一段[已隐藏]</div>"));
+	}
+
+	@Test
+	public void testExcludeTags() {
+		// 创建自定义处理器
+		final RichTextMaskingProcessor processor = RichTextMaskingUtil.createProcessor(true);
+
+		// 创建排除特定标签的规则
+		final RichTextMaskingRule rule = RichTextMaskingUtil.createCustomRule(
+			"排除标签内敏感信息",
+			"敏感信息",
+			RichTextMaskingRule.MaskType.REPLACE,
+			"[已隐藏]");
+
+		// 设置排除code标签
+		rule.addExcludeTag("code");
+
+		processor.addRule(rule);
+
+		// 测试HTML
+		final String html = "<p>这是一段敏感信息</p><code>这是代码中的敏感信息</code>";
+		final String masked = RichTextMaskingUtil.mask(html, processor);
+
+		// 验证code标签中的敏感信息不被脱敏
+		Assertions.assertTrue(masked.contains("<p>这是一段[已隐藏]</p>"));
+		Assertions.assertTrue(masked.contains("<code>这是代码中的敏感信息</code>"));
+	}
+
+	@Test
+	public void testComplexHtml() {
+		// 测试复杂HTML内容
+		final String html = "<div class='content'>" +
+			"<h1>公司内部文档</h1>" +
+			"<p>联系人：张三 <a href='mailto:zhangsan@example.com'>zhangsan@example.com</a></p>" +
+			"<p>电话：13812345678</p>" +
+			"<div class='secret'>这是一段机密信息，请勿外传</div>" +
+			"<pre><code>// 这是一段代码\nString password = \"123456\";</code></pre>" +
+			"<p>公司网址：<a href='https://www.example.com'>https://www.example.com</a></p>" +
+			"</div>";
+
+		// 创建自定义处理器
+		final RichTextMaskingProcessor processor = RichTextMaskingUtil.createProcessor(true);
+
+		// 添加邮箱脱敏规则
+		processor.addRule(RichTextMaskingUtil.createEmailRule());
+
+		// 添加手机号脱敏规则
+		processor.addRule(RichTextMaskingUtil.createPartialMaskRule(
+			"手机号",
+			"1[3-9]\\d{9}",
+			3,
+			4,
+			'*'));
+
+		// 添加敏感词脱敏规则
+		processor.addRule(RichTextMaskingUtil.createSensitiveWordRule("机密|内部"));
+
+		// 添加网址脱敏规则
+		processor.addRule(RichTextMaskingUtil.createUrlRule("[网址已隐藏]"));
+
+		// 添加密码脱敏规则，但排除code标签
+		final RichTextMaskingRule passwordRule = RichTextMaskingUtil.createCustomRule(
+			"密码",
+			"password = \"[^\"]+\"",
+			RichTextMaskingRule.MaskType.REPLACE,
+			"password = \"******\"");
+		passwordRule.addExcludeTag("code");
+		processor.addRule(passwordRule);
+
+		final String masked = RichTextMaskingUtil.mask(html, processor);
+
+		// 验证结果
+		Assertions.assertTrue(masked.contains("<h1>公司**文档</h1>"));
+		Assertions.assertTrue(masked.contains("z***"));
+		Assertions.assertTrue(masked.contains("138****5678"));
+		Assertions.assertTrue(masked.contains("这是一段**信息"));
+		Assertions.assertFalse(masked.contains("String password = \"123456\""));
+		Assertions.assertTrue(masked.contains("[网址已隐藏]"));
+	}
+}
+