fix io bug

This commit is contained in:
Looly 2021-06-02 08:40:50 +08:00
parent c3912edeb5
commit d815b0f64e
3 changed files with 62 additions and 33 deletions

View File

@ -3,13 +3,14 @@
-------------------------------------------------------------------------------------------------------------
# 5.6.7 (2021-05-29)
# 5.6.7 (2021-06-02)
### 🐣新特性
* 【core 】 CharSequenceUtil增加join重载issue#I3TFJ5@Gitee
### 🐞Bug修复
* 【core 】 修复FileUtil.normalize去掉末尾空格问题issue#1603@Github
* 【core 】 修复CharsetDetector流关闭问题issue#1603@Github
-------------------------------------------------------------------------------------------------------------

View File

@ -1,17 +1,15 @@
package cn.hutool.core.io;
import cn.hutool.core.convert.Convert;
import cn.hutool.core.util.ArrayUtil;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.List;
/**
* 编码探测器
@ -28,29 +26,34 @@ public class CharsetDetector {
static {
String[] names = {
"US-ASCII",
"UTF-8",
"GBK",
"GB2312",
"BIG5",
"GB18030",
"UTF-16BE",
"UTF-16LE",
"UTF-16",
"UNICODE"};
final List<Charset> list = new ArrayList<>();
for (String name : names) {
try {
list.add(Charset.forName(name));
} catch (UnsupportedCharsetException ignore) {
//ignore
}
}
DEFAULT_CHARSETS = list.toArray(new Charset[0]);
"BIG5",
"UNICODE",
"US-ASCII"};
DEFAULT_CHARSETS = Convert.convert(Charset[].class, names);
}
/**
* 探测编码
* 探测文件编码
*
* @param file 文件
* @param charsets 需要测试用的编码null或空使用默认的编码数组
* @return 编码
* @since 5.6.7
*/
public static Charset detect(File file, Charset... charsets) {
return detect(FileUtil.getInputStream(file), charsets);
}
/**
* 探测编码<br>
* 注意此方法会读取流的一部分然后关闭流如重复使用流请使用使用支持reset方法的流
*
* @param in 使用后关闭此流
* @param charsets 需要测试用的编码null或空使用默认的编码数组
@ -60,34 +63,40 @@ public class CharsetDetector {
if (ArrayUtil.isEmpty(charsets)) {
charsets = DEFAULT_CHARSETS;
}
for (Charset charset : charsets) {
charset = detectCharset(in, charset);
if (null != charset) {
return charset;
final byte[] buffer = new byte[512];
try {
while (in.read(buffer) > -1) {
for (Charset charset : charsets) {
final CharsetDecoder decoder = charset.newDecoder();
if (identify(buffer, decoder)) {
return charset;
}
}
}
} catch (IOException e) {
throw new IORuntimeException(e);
} finally {
IoUtil.close(in);
}
return null;
}
/**
* 判断编码
* 判断编码判断后会关闭流
*
* @param in
* @param charset 编码
* @return 编码
*/
private static Charset detectCharset(InputStream in, Charset charset) {
try (BufferedInputStream input = IoUtil.toBuffered(in)) {
CharsetDecoder decoder = charset.newDecoder();
private static Charset detectCharset(InputStream in, Charset charset) throws IOException {
CharsetDecoder decoder = charset.newDecoder();
byte[] buffer = new byte[512];
while (input.read(buffer) > -1) {
if (identify(buffer, decoder)) {
return charset;
}
final byte[] buffer = new byte[512];
while (in.read(buffer) > -1) {
if (identify(buffer, decoder)) {
return charset;
}
} catch (IOException e) {
throw new IORuntimeException(e);
}
return null;
}

View File

@ -0,0 +1,19 @@
package cn.hutool.core.io;
import cn.hutool.core.io.resource.ResourceUtil;
import cn.hutool.core.util.CharsetUtil;
import org.junit.Assert;
import org.junit.Test;
import java.nio.charset.Charset;
public class CharsetDetectorTest {
@Test
public void detectTest(){
// 测试多个Charset对同一个流的处理是否有问题
final Charset detect = CharsetDetector.detect(ResourceUtil.getStream("test.xml"),
CharsetUtil.CHARSET_GBK, CharsetUtil.CHARSET_UTF_8);
Assert.assertEquals(CharsetUtil.CHARSET_UTF_8, detect);
}
}