This commit is contained in:
Looly
2025-11-26 21:15:01 +08:00
parent 4a8779ec42
commit 0461270e84
5 changed files with 58 additions and 110 deletions

View File

@@ -18,32 +18,17 @@ package cn.hutool.v7.poi.excel.sax;
import cn.hutool.v7.core.io.IoUtil;
import cn.hutool.v7.core.lang.Assert;
import cn.hutool.v7.core.util.ObjUtil;
import cn.hutool.v7.core.text.StrUtil;
import cn.hutool.v7.poi.excel.sax.handler.RowHandler;
import cn.hutool.v7.core.util.ObjUtil;
import cn.hutool.v7.poi.POIException;
import cn.hutool.v7.poi.excel.sax.handler.RowHandler;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.*;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.CellValueRecordInterface;
import org.apache.poi.hssf.record.EOFRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.*;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@@ -55,7 +40,7 @@ import java.util.List;
/**
* Excel2003格式的事件-用户模型方式读取器在Hutool中统一将此归类为Sax读取<br>
* 参考http://www.cnblogs.com/wshsdlau/p/5643862.html
* 参考:<a href="http://www.cnblogs.com/wshsdlau/p/5643862.html">POI Sax 事件驱动解析Excel2003文件</a>
*
* @author Looly
*/
@@ -148,7 +133,7 @@ public class Excel03SaxReader implements HSSFListener, ExcelSaxReader<Excel03Sax
* @throws POIException IO异常包装
*/
public Excel03SaxReader read(final POIFSFileSystem fs, final String idOrRidOrSheetName) throws POIException {
this.rid = getSheetIndex(idOrRidOrSheetName);
this.rid = getRid(idOrRidOrSheetName);
formatListener = new FormatTrackingHSSFListener(new MissingRecordAwareHSSFListener(this));
final HSSFRequest request = new HSSFRequest();
@@ -177,7 +162,7 @@ public class Excel03SaxReader implements HSSFListener, ExcelSaxReader<Excel03Sax
*
* @return sheet序号
*/
public int getSheetIndex() {
public int getRid() {
return this.rid;
}
@@ -396,7 +381,7 @@ public class Excel03SaxReader implements HSSFListener, ExcelSaxReader<Excel03Sax
* @return sheet索引从0开始
* @since 5.5.5
*/
private int getSheetIndex(final String idOrRidOrSheetName) {
private int getRid(final String idOrRidOrSheetName) {
Assert.notBlank(idOrRidOrSheetName, "id or rid or sheetName must be not blank!");
// rid直接处理

View File

@@ -21,8 +21,8 @@ import cn.hutool.v7.core.io.IoUtil;
import cn.hutool.v7.core.reflect.method.MethodUtil;
import cn.hutool.v7.core.text.StrUtil;
import cn.hutool.v7.core.util.ObjUtil;
import cn.hutool.v7.poi.excel.sax.handler.RowHandler;
import cn.hutool.v7.poi.POIException;
import cn.hutool.v7.poi.excel.sax.handler.RowHandler;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
@@ -76,11 +76,6 @@ public class Excel07SaxReader implements ExcelSaxReader<Excel07SaxReader> {
}
// ------------------------------------------------------------------------------ Read start
@Override
public Excel07SaxReader read(final File file, final int rid) throws POIException {
return read(file, RID_PREFIX + rid);
}
@Override
public Excel07SaxReader read(final File file, final String idOrRidOrSheetName) throws POIException {
try (final OPCPackage open = OPCPackage.open(file, PackageAccess.READ)) {
@@ -90,11 +85,6 @@ public class Excel07SaxReader implements ExcelSaxReader<Excel07SaxReader> {
}
}
@Override
public Excel07SaxReader read(final InputStream in, final int rid) throws POIException {
return read(in, RID_PREFIX + rid);
}
@Override
public Excel07SaxReader read(final InputStream in, final String idOrRidOrSheetName) throws POIException {
try (final OPCPackage opcPackage = OPCPackage.open(in)) {
@@ -115,7 +105,7 @@ public class Excel07SaxReader implements ExcelSaxReader<Excel07SaxReader> {
* @throws POIException POI异常
*/
public Excel07SaxReader read(final OPCPackage opcPackage, final int rid) throws POIException {
return read(opcPackage, RID_PREFIX + rid);
return read(opcPackage, String.valueOf(rid));
}
/**
@@ -174,22 +164,22 @@ public class Excel07SaxReader implements ExcelSaxReader<Excel07SaxReader> {
* @since 5.4.4
*/
private Excel07SaxReader readSheets(final XSSFReader xssfReader, final String idOrRidOrSheetName) throws POIException {
this.handler.sheetIndex = getSheetIndex(xssfReader, idOrRidOrSheetName);
this.handler.rid = getRid(xssfReader, idOrRidOrSheetName);
InputStream sheetInputStream = null;
try {
if (this.handler.sheetIndex > -1) {
if (this.handler.rid > -1) {
// 根据 rId# 或 rSheet# 查找sheet
sheetInputStream = xssfReader.getSheet(RID_PREFIX + (this.handler.sheetIndex + 1));
sheetInputStream = xssfReader.getSheet(RID_PREFIX + this.handler.rid);
ExcelSaxUtil.readFrom(sheetInputStream, this.handler);
this.handler.rowHandler.doAfterAllAnalysed();
} else {
this.handler.sheetIndex = -1;
this.handler.rid = -1;
// 遍历所有sheet
final Iterator<InputStream> sheetInputStreams = xssfReader.getSheetsData();
while (sheetInputStreams.hasNext()) {
// 重新读取一个sheet时行归零
this.handler.index = 0;
this.handler.sheetIndex++;
this.handler.rid++;
sheetInputStream = sheetInputStreams.next();
ExcelSaxUtil.readFrom(sheetInputStream, this.handler);
this.handler.rowHandler.doAfterAllAnalysed();
@@ -206,19 +196,20 @@ public class Excel07SaxReader implements ExcelSaxReader<Excel07SaxReader> {
}
/**
* 获取sheet索引从0开始
* 获取sheet RID索引从0开始
* <ul>
* <li>传入'rId'开头直接去除rId前缀</li>
* <li>传入纯数字表示sheetIndex通过{@link SheetRidReader}转换为rId</li>
* <li>传入纯数字表示sheetIndex通过{@link SheetRidReader}转换为rId如果找不到则认为此数字本身为rid</li>
* <li>传入'sheetName:'开头表示sheetName通过{@link SheetRidReader}转换为rId</li>
* <li>传入其它字符串表示sheetName通过{@link SheetRidReader}转换为rId</li>
* </ul>
*
* @param xssfReader {@link XSSFReader}Excel读取器
* @param idOrRidOrSheetName Excel中的sheet id或者rid编号或sheet名称0开始rid必须加rId前缀例如rId0,如果为-1处理所有编号的sheet
* @return sheet索引,从0开始
* @param idOrRidOrSheetName Excel中的sheet id或者rid编号或sheet名称1开始rid必须加rId前缀例如rId1,如果为-1处理所有编号的sheet
* @return rid,从1开始
* @since 5.5.5
*/
private int getSheetIndex(final XSSFReader xssfReader, String idOrRidOrSheetName) {
private int getRid(final XSSFReader xssfReader, String idOrRidOrSheetName) {
// rid直接处理
if (StrUtil.startWithIgnoreCase(idOrRidOrSheetName, RID_PREFIX)) {
return Integer.parseInt(StrUtil.removePrefixIgnoreCase(idOrRidOrSheetName, RID_PREFIX));
@@ -228,24 +219,24 @@ public class Excel07SaxReader implements ExcelSaxReader<Excel07SaxReader> {
final SheetRidReader ridReader = SheetRidReader.parse(xssfReader);
if (StrUtil.startWithIgnoreCase(idOrRidOrSheetName, SHEET_NAME_PREFIX)) {
// name:开头的被认为是sheet名称直接处理
// sheetName:开头的被认为是sheet名称直接处理
idOrRidOrSheetName = StrUtil.removePrefixIgnoreCase(idOrRidOrSheetName, SHEET_NAME_PREFIX);
final Integer rid = ridReader.getRidByNameBase0(idOrRidOrSheetName);
final Integer rid = ridReader.getRidByName(idOrRidOrSheetName);
if (null != rid) {
return rid;
}
} else {
// 尝试查找名称
Integer rid = ridReader.getRidByNameBase0(idOrRidOrSheetName);
Integer rid = ridReader.getRidByName(idOrRidOrSheetName);
if (null != rid) {
return rid;
}
try {
final int sheetIndex = Integer.parseInt(idOrRidOrSheetName);
rid = ridReader.getRidBySheetIdBase0(sheetIndex);
final int sheetId = Integer.parseInt(idOrRidOrSheetName);
rid = ridReader.getRidBySheetId(sheetId);
// 如果查找不到对应index则认为用户传入的直接是rid
return ObjUtil.defaultIfNull(rid, sheetIndex);
return ObjUtil.defaultIfNull(rid, sheetId);
} catch (final NumberFormatException ignore) {
// 非数字说明非index且没有对应名称抛出异常
}

View File

@@ -33,9 +33,9 @@ import java.util.List;
/**
* sheetData标签内容读取处理器
*
* <pre>
* &lt;sheetData&gt;&lt;/sheetData&gt;
* </pre>
* <pre>{@code
* <sheetData></sheetData>
* }</pre>
*
* @since 5.5.3
*/
@@ -60,7 +60,7 @@ public class SheetDataSaxHandler extends DefaultHandler {
/**
* sheet索引从0开始
*/
protected int sheetIndex;
protected int rid;
/**
* 当前非空行索引从0开始
*/
@@ -274,7 +274,7 @@ public class SheetDataSaxHandler extends DefaultHandler {
padCell(curCoordinate, maxCellCoordinate, true);
}
rowHandler.handle(sheetIndex, rowNumber, rowCellList);
rowHandler.handle(rid, rowNumber, rowCellList);
// 一行结束
// 新建一个新列之前的列抛弃可能被回收或rowHandler处理
@@ -319,7 +319,7 @@ public class SheetDataSaxHandler extends DefaultHandler {
*/
private void addCellValue(final int index, final Object value) {
this.rowCellList.add(index, value);
this.rowHandler.handleCell(this.sheetIndex, this.rowNumber, index, value, this.xssfCellStyle);
this.rowHandler.handleCell(this.rid, this.rowNumber, index, value, this.xssfCellStyle);
}
/**

View File

@@ -35,9 +35,9 @@ import java.util.Map;
/**
* 在Sax方式读取Excel时读取sheet标签中sheetId和rid的对应关系类似于:
* <pre>
* &lt;sheet name="Sheet6" sheetId="4" r:id="rId6"/&gt;
* </pre>
* <pre>{@code
* <sheet name="Sheet6" sheetId="4" r:id="rId6"/>
* }</pre>
* <p>
* 读取结果为:
*
@@ -100,21 +100,6 @@ public class SheetRidReader extends DefaultHandler {
return ID_RID_MAP.get(sheetId);
}
/**
* 根据sheetId获取rid从0开始
*
* @param sheetId Sheet的ID从0开始
* @return rid从0开始
* @since 5.5.5
*/
public Integer getRidBySheetIdBase0(final int sheetId) {
final Integer rid = getRidBySheetId(sheetId + 1);
if (null != rid) {
return rid - 1;
}
return null;
}
/**
* 根据sheet name获取rid从1开始
*
@@ -125,21 +110,6 @@ public class SheetRidReader extends DefaultHandler {
return NAME_RID_MAP.get(sheetName);
}
/**
* 根据sheet name获取rid从0开始
*
* @param sheetName Sheet的name
* @return rid从0开始
* @since 5.5.5
*/
public Integer getRidByNameBase0(final String sheetName) {
final Integer rid = getRidByName(sheetName);
if (null != rid) {
return rid - 1;
}
return null;
}
/**
* 通过sheet的序号获取rid
*

View File

@@ -39,6 +39,8 @@ import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import static org.junit.jupiter.api.Assertions.assertEquals;
/**
* Excel sax方式读取
*
@@ -49,12 +51,12 @@ public class ExcelSaxReadTest {
@Test
public void excel07Test() {
// 工具化快速读取
ExcelUtil.readBySax("aaa.xlsx", 0, createRowHandler());
ExcelUtil.readBySax("aaa.xlsx", 1, createRowHandler());
}
@Test
void readEndByExceptionTest(){
ExcelUtil.readBySax("aaa.xlsx", 0, (sheetIndex, rowIndex, rowList) -> {
ExcelUtil.readBySax("aaa.xlsx", 1, (sheetIndex, rowIndex, rowList) -> {
if (rowIndex == 1) {
throw new StopReadException();
}
@@ -84,7 +86,7 @@ public class ExcelSaxReadTest {
@Test
public void excel07FromStreamTest() {
// issue#1225 非markSupport的流读取会错误
ExcelUtil.readBySax(IoUtil.toStream(FileUtil.file("aaa.xlsx")), 0, createRowHandler());
ExcelUtil.readBySax(IoUtil.toStream(FileUtil.file("aaa.xlsx")), 1, createRowHandler());
}
@Test
@@ -125,12 +127,12 @@ public class ExcelSaxReadTest {
@Test
public void readBySaxTest() {
ExcelUtil.readBySax("blankAndDateTest.xlsx", "0", createRowHandler());
ExcelUtil.readBySax("blankAndDateTest.xlsx", "1", createRowHandler());
}
@Test
public void readBySaxByRidTest() {
ExcelUtil.readBySax("blankAndDateTest.xlsx", 0, createRowHandler());
ExcelUtil.readBySax("blankAndDateTest.xlsx", 1, createRowHandler());
}
@Test
@@ -198,7 +200,7 @@ public class ExcelSaxReadTest {
rows.add("");
}
});
Assertions.assertEquals(50L, rows.get(3));
assertEquals(50L, rows.get(3));
}
@Test
@@ -206,14 +208,14 @@ public class ExcelSaxReadTest {
// since 6.0.0修改
// 默认不在行尾对齐单元格,因此只读取了有第二个值的行
final List<Object> rows = new ArrayList<>();
ExcelUtil.readBySax("data_for_sax_test.xlsx", 0, (i, i1, list) -> {
ExcelUtil.readBySax("data_for_sax_test.xlsx", 1, (i, i1, list) -> {
if(list.size() > 1){
rows.add(list.get(1));
}
});
final FormulaCellValue value = (FormulaCellValue) rows.get(1);
Assertions.assertEquals(50L, value.getResult());
assertEquals(50L, value.getResult());
}
@Test
@@ -223,26 +225,26 @@ public class ExcelSaxReadTest {
(i, i1, list) -> rows.add(StrUtil.toString(list.get(0)))
);
Assertions.assertEquals("2020-10-09 00:00:00", rows.get(1));
assertEquals("2020-10-09 00:00:00", rows.get(1));
// 非日期格式不做转换
Assertions.assertEquals("112233", rows.get(2));
Assertions.assertEquals("1000.0", rows.get(3));
Assertions.assertEquals("2012-12-21 00:00:00", rows.get(4));
assertEquals("112233", rows.get(2));
assertEquals("1000.0", rows.get(3));
assertEquals("2012-12-21 00:00:00", rows.get(4));
}
@Test
public void dateReadXlsxTest() {
final List<String> rows = new ArrayList<>();
ExcelUtil.readBySax("data_for_sax_test.xlsx", 0,
ExcelUtil.readBySax("data_for_sax_test.xlsx", 1,
(i, i1, list) -> rows.add(StrUtil.toString(list.get(0)))
);
Assertions.assertEquals("2020-10-09 00:00:00", rows.get(1));
assertEquals("2020-10-09 00:00:00", rows.get(1));
// 非日期格式不做转换
Assertions.assertEquals("112233", rows.get(2));
assertEquals("112233", rows.get(2));
// 读取实际值,而非带有格式处理过的值
Assertions.assertEquals("1000.0", rows.get(3));
Assertions.assertEquals("2012-12-21 00:00:00", rows.get(4));
assertEquals("1000.0", rows.get(3));
assertEquals("2012-12-21 00:00:00", rows.get(4));
}
@Test
@@ -258,9 +260,9 @@ public class ExcelSaxReadTest {
final File file = FileUtil.file("aaa.xlsx");
final List<List<Object>> list = ListUtil.of();
ExcelUtil.readBySax(file, 0, (sheetIndex, rowIndex, rowList) -> list.add(rowList));
ExcelUtil.readBySax(file, 1, (sheetIndex, rowIndex, rowList) -> list.add(rowList));
Assertions.assertEquals("[, 女, , 43.22]", list.get(3).toString());
assertEquals("[, 女, , 43.22]", list.get(3).toString());
}
@Test
@@ -286,6 +288,6 @@ public class ExcelSaxReadTest {
}
});
//总共2个sheet页读取所有sheet时一共执行doAfterAllAnalysed2次。
Assertions.assertEquals(2, doAfterAllAnalysedTime.intValue());
assertEquals(2, doAfterAllAnalysedTime.intValue());
}
}