修复CsvParser中对正文中双引号处理逻辑问题

This commit is contained in:
Looly 2024-07-31 01:14:32 +08:00
parent c6fc880405
commit 469b433b53
4 changed files with 30 additions and 6 deletions

View File

@ -25,6 +25,7 @@
* 【core 】 修复FileTypeUtil.getType空指针问题issue#IAD5JM@Gitee * 【core 】 修复FileTypeUtil.getType空指针问题issue#IAD5JM@Gitee
* 【core 】 修复IdcardUtil.isValidHKCard校验问题issue#IAFOLI@Gitee * 【core 】 修复IdcardUtil.isValidHKCard校验问题issue#IAFOLI@Gitee
* 【core 】 修复Convert.digitToChinese(0)输出金额无`元整问题`issue#3662@Github * 【core 】 修复Convert.digitToChinese(0)输出金额无`元整问题`issue#3662@Github
* 【core 】 修复CsvParser中对正文中双引号处理逻辑问题pr#1244@Gitee
------------------------------------------------------------------------------------------------------------- -------------------------------------------------------------------------------------------------------------
# 5.8.29(2024-07-03) # 5.8.29(2024-07-03)

View File

@ -3236,7 +3236,7 @@ public class CharSequenceUtil {
* @return 是否被包装 * @return 是否被包装
*/ */
public static boolean isWrap(CharSequence str, char prefixChar, char suffixChar) { public static boolean isWrap(CharSequence str, char prefixChar, char suffixChar) {
if (null == str) { if (null == str || str.length() < 2) {
return false; return false;
} }

View File

@ -281,8 +281,8 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
} }
buf.mark(); buf.mark();
addField(currentFields, currentField.toStringAndReset()); addField(currentFields, currentField.toStringAndReset());
} else if (c == config.textDelimiter) { } else if (c == config.textDelimiter && isFieldBegin(preChar)) {
// 引号开始 // 引号开始且出现在字段开头
inQuotes = true; inQuotes = true;
copyLen++; copyLen++;
} else if (c == CharUtil.CR) { } else if (c == CharUtil.CR) {
@ -340,8 +340,13 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
// 忽略多余引号后的换行符 // 忽略多余引号后的换行符
field = StrUtil.trim(field, 1, (c-> c == CharUtil.LF || c == CharUtil.CR)); field = StrUtil.trim(field, 1, (c-> c == CharUtil.LF || c == CharUtil.CR));
field = StrUtil.unWrap(field, textDelimiter); if(StrUtil.isWrap(field, textDelimiter)){
field = StrUtil.replace(field, "" + textDelimiter + textDelimiter, textDelimiter + ""); field = StrUtil.sub(field, 1, field.length() - 1);
// https://datatracker.ietf.org/doc/html/rfc4180#section-2
// 第七条规则只有包装内的包装符需要转义
field = StrUtil.replace(field, String.valueOf(textDelimiter) + textDelimiter, String.valueOf(textDelimiter));
}
if(this.config.trimField){ if(this.config.trimField){
// issue#I49M0C@Gitee // issue#I49M0C@Gitee
field = StrUtil.trim(field); field = StrUtil.trim(field);
@ -361,6 +366,24 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR; return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR;
} }
/**
* 通过前一个字符判断是否字段开始几种情况
* <ul>
* <li>正文开头无前字符</li>
* <li>字段分隔符即上个字段结束</li>
* <li>换行符即新行开始</li>
* </ul>
*
* @param preChar 前字符
* @return 是否字段开始
*/
private boolean isFieldBegin(final int preChar) {
return preChar == -1
|| preChar == config.fieldSeparator
|| preChar == CharUtil.LF
|| preChar == CharUtil.CR;
}
/** /**
* 内部Buffer * 内部Buffer
* *

View File

@ -13,7 +13,7 @@ import static org.junit.Assert.assertEquals;
public class Pr1244Test { public class Pr1244Test {
@Test @Test
public void csvReadTest() { public void csvReadTest() {
final String csv = "a,q\"\"e,d,f"; final String csv = "a,q\"e,d,f";
final CsvReader reader = CsvUtil.getReader(new StringReader(csv)); final CsvReader reader = CsvUtil.getReader(new StringReader(csv));
final CsvData read = reader.read(); final CsvData read = reader.read();
assertEquals(4, read.getRow(0).size()); assertEquals(4, read.getRow(0).size());