This commit is contained in:
Looly 2022-03-24 09:27:27 +08:00
parent f6feaeb2cd
commit d4a24180a4
3 changed files with 55 additions and 40 deletions

View File

@ -2,7 +2,7 @@
# 🚀Changelog
-------------------------------------------------------------------------------------------------------------
# 5.8.0 (2022-03-22)
# 5.8.0 (2022-03-24)
### ❌不兼容特性
* 【db 】 【不向下兼容 】增加MongoDB4.x支持返回MongoClient变更pr#568@Gitee
@ -52,7 +52,8 @@
* 【json 】 修复JSON对Map.Entry的解析问题
* 【core 】 修复MapConverter中map与map转换兼容问题
* 【poi 】 解决sax读取时POI-5.2.x兼容性问题
* 【core 】 修复修复判断两段时间区间交集问题pr#2210@Github
* 【core 】 修复判断两段时间区间交集问题pr#2210@Github
* 【http 】 修复标签误删问题issue#I4Z7BV@Gitee
-------------------------------------------------------------------------------------------------------------
# 5.7.22 (2022-03-01)

View File

@ -127,10 +127,10 @@ public class HtmlUtil {
// (?i)表示其后面的表达式忽略大小写
if (withTagContent) {
// 标签及其包含内容
regex = StrUtil.format("(?i)<{}\\s*?[^>]*?/?>(.*?</{}>)?", tagName, tagName);
regex = StrUtil.format("(?i)<{}(\\s+[^>]*)?/?>(.*?</{}>)?", tagName, tagName);
} else {
// 标签不包含内容
regex = StrUtil.format("(?i)<{}\\s*?[^>]*?>|</{}>", tagName, tagName);
regex = StrUtil.format("(?i)<{}(\\s+[^>]*)?/?>|</?{}>", tagName, tagName);
}
content = ReUtil.delAll(regex, content); // 非自闭标签小写

View File

@ -99,6 +99,11 @@ public class HtmlUtilTest {
result = HtmlUtil.unwrapHtmlTag(str, "img");
Assert.assertEquals("pre", result);
//闭合标签
str = "pre<img/>";
result = HtmlUtil.unwrapHtmlTag(str, "img");
Assert.assertEquals("pre", result);
//包含内容标签
str = "pre<div class=\"test_div\">abc</div>";
result = HtmlUtil.unwrapHtmlTag(str, "div");
@ -110,6 +115,15 @@ public class HtmlUtilTest {
Assert.assertEquals("pre\r\n\t\tabc\r\n", result);
}
@Test
public void unwrapTest2() {
// 避免移除i却误删img标签的情况
String htmlString = "<html><img src='aaa'><i>测试文本</i></html>";
String tagString = "i,br";
String cleanTxt = HtmlUtil.removeHtmlTag(htmlString, false, tagString.split(","));
Assert.assertEquals("<html><img src='aaa'>测试文本</html>", cleanTxt);
}
@Test
public void escapeTest() {
String html = "<html><body>123'123'</body></html>";