Java实现excel大数据量导入

网友投稿 378 2022-11-25


Java实现excel大数据量导入

本文实例为大家分享了java实现excel大数据量导入的具体代码,供大家参考,具体内容如下

情景分析:

通常我们通过poi读取excel文件时,若在用户模式下,由于数据量较大、Sheet较多,很容易出现内存溢出的情况

用户模式读取excel的典型代码如下:

FileInputStream file = new FileInputStream("c:\\test.xlsx");

Workbook wb=new XSSFWorkbook(file);

而03版(xls)excel文件每个sheet最大长度为 65536行,07版(xlsx)excel文件每个sheet最大长度为 1048576行,因此我们主要解决07版(xlsx)excel大数据量导入出现内存溢出的问题

由于读取cvs文件效率高且占用内存少,所以我们采用xlsx转换为cvs的方式来解决

读取xlsx格式数据:

import java.io.File;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.OutputStream;

import java.io.PrintStream;

import java.text.SimpleDateFormat;

import java.util.ArrayList;

import java.util.Date;

import java.util.HashMap;

import java.util.List;

import java.util.Map;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.parsers.SAXParser;

import javax.xml.parsers.SAXParserFactory;

import org.apache.poi.hssf.usermodel.HSSFDateUtil;

import org.apache.poi.openxml4j.exceptions.OpenXML4JException;

import org.apache.poi.openxml4j.opc.OPCPackage;

import org.apache.poi.openxml4j.opc.PackageAccess;

import org.apache.poi.ss.usermodel.BuiltinFormats;

import org.apache.poi.ss.usermodel.DataFormatter;

import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;

import org.apache.poi.xssf.eventusermodel.XSSFReader;

import org.apache.poi.xssf.model.StylesTable;

import org.apache.poi.xssf.usermodel.XSSFCellStyle;

import org.apache.poi.xssf.usermodel.XSSFRichTextString;

import org.xml.sax.Attributes;

import org.xml.sax.InputSource;

import org.xml.sax.SAXException;

import org.xml.sax.XMLReader;

import org.xml.sax.helpers.DefaultHandler;

public class XLSX2CSV {

enum xssfDataType {

BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,

}

/**

* 使用xssf_sax_API处理Excel,请参考: http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api

*/

class MyXSSFSheetHandler extends DefaultHandler {

private StylesTable stylesTable;

private ReadOnlySharedStringsTable sharedStringsTable;

private final PrintStream output;

private final int minColumnCount;

private boolean vIsOpen;

private xssfDataType nextDataType;

private short formatIndex;

private String formatString;

private final DataFormatter formatter;

private int thisColumn = -1;

private int lastColumnNumber = -1;

private StringBuffer value;

private String[] record;

private List rows = new ArrayList();

private boolean isCellNull = false;

public MyXSSFSheetHandler(StylesTable styles,ReadOnlySharedStringsTable strings, int cols, PrintStream target) {

this.stylesTable = styles;

this.sharedStringsTable = strings;

this.minColumnCount = cols;

this.output = target;

this.value = new StringBuffer();

this.nextDataType = xssfDataType.NUMBER;

this.formatter = new DataFormatter();

record = new String[this.minColumnCount];

rows.clear();// 每次读取都清空行集合

}

public void startElement(String uri, String localName, String name,Attributes attributes) throws SAXException {

if ("inlineStr".equals(name) || "v".equals(name)) {

vIsOpen = true;

// Clear contents cache

value.setLength(0);

}

// c => cell

else if ("c".equals(name)) {

// Get the cell reference

String r = attributes.getValue("r");

int firstDigit = -1;

for (int c = 0; c < r.length(); ++c) {

if (Character.isDigit(r.charAt(c))) {

firstDigit = c;

break;

}

}

thisColumn = nameToColumn(r.substring(0, firstDigit));

// Set up defaults.

this.nextDataType = xssfDataType.NUMBER;

this.formatIndex = -1;

this.formatString = null;

String cellType = attributes.getValue("t");

String cellStyleStr = attributes.getValue("s");

if ("b".equals(cellType))

nextDataType = xssfDataType.BOOL;

else if ("e".equals(cellType))

nextDataType = xssfDataType.ERROR;

else if ("inlineStr".equals(cellType))

nextDataType = xssfDataType.INLINESTR;

else if ("s".equals(cellType))

nextDataType = xssfDataType.SSTINDEX;

else if ("str".equals(cellType))

nextDataType = xssfDataType.FORMULA;

else if (cellStyleStr != null) {

// It's a number, but almost certainly one

// with a special style or format

int styleIndex = Integer.parseInt(cellStyleStr);

XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);

this.formatIndex = style.getDataFormat();

this.formatString = style.getDataFormatString();

if (this.formatString == null)

this.formatString = BuiltinFormats

.getBuiltinFormat(this.formatIndex);

}

}

}

public void endElement(String uri, String localName, String name)throws SAXException {

String thisStr = null;

// v => contents of a cell

if ("v".equals(name)) {

// Process the value contents as required.

// Do now, as characters() may be called more than once

switch (nextDataType) {

case BOOL:

char first = value.charAt(0);

thisStr = first == '0' ? "FALSE" : "TRUE";

break;

case ERROR:

thisStr = "\"ERROR:" + value.toString() + '"';

break;

case FORMULA:

// A formula could result in a string value,

// so always add double-quote characters.

thisStr = '"' + value.toString() + '"';

break;

case INLINESTR:

// TODO: have seen an example of this, so it's untested.

XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());

thisStr = '"' + rtsi.toString() + '"';

break;

case SSTINDEX:

String sstIndex = value.toString();

try {

int idx = Integer.parseInt(sstIndex);

XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));

thisStr = rtss.toString();

} catch (NumberFormatException ex) {

output.println("Failed to parse SST index '" + sstIndex

+ "': " + ex.toString());

}

break;

case NUMBER:

String n = value.toString();

// 判断是否是日期格式

if (HSSFDateUtil.isADateFormat(this.formatIndex, n)) {

Double d = Double.parseDouble(n);

Date date=HSSFDateUtil.getJavaDate(d);

thisStr=formateDateToString(date);

} else if (this.formatString != null)

thisStr = formatter.formatRawCellContents(

Double.parseDouble(n), this.formatIndex,

this.formatString);

else

thisStr = n;

break;

default:

thisStr = "(TODO: Unexpected type: " + nextDataType + ")";

break;

}

// Output after we've seen the string contents

// Emit commas for any fields that were missing on this row

if (lastColumnNumber == -1) {

lastColumnNumber = 0;

}

//判断单元格的值是否为空

if (thisStr == null || "".equals(isCellNull)) {

isCellNull = true;// 设置单元格是否为空值

}

record[thisColumn] = thisStr;

// Update column

if (thisColumn > -1)

lastColumnNumber = thisColumn;

} else if ("row".equals(name)) {

// Print out any missing commas if needed

if (minColumns > 0) {

// Columns are 0 based

if (lastColumnNumber == -1) {

lastColumnNumber = 0;

}

if (isCellNull == false && record[0] != null

&& record[1] != null)// 判断是否空行

{

rows.add(record.clone());

isCellNull = false;

for (int i = 0; i < record.length; i++) {

record[i] = null;

}

}

}

lastColumnNumber = -1;

}

}

public List getRows() {

return rows;

}

public void setRows(List rows) {

this.rows = rows;

}

/**

* Captures characters only if a suitable element is open. Originally

* was just "v"; extended for inlineStr also.

*/

public void characters(char[] ch, int start, int length)throws SAXException {

if (vIsOpen)

value.append(ch, start, length);

}

/**

* Converts an Excel column name like "C" to a zero-based index.

* @param name

* @return Index corresponding to the specified name

*/

private int nameToColumn(String name) {

int column = -1;

for (int i = 0; i < name.length(); ++i) {

int c = name.charAt(i);

column = (column + 1) * 26 + c - 'A';

}

return column;

}

private String formateDateToString(Date date) {

SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//格式化日期

return sdf.format(date);

}

}

private OPCPackage xlsxPackage;

private int minColumns;

private PrintStream output;

/**

* Creates a new XLSX -> CSV converter

*

* @param pkg

* The XLSX package to process

* @param output

* The PrintStream to output the CSV to

* @param minColumns

* The minimum number of columns to output, or -1 for no minimum

*/

public XLSX2CSV(OPCPackage pkg, PrintStream output,int minColumns) {

this.xlsxPackage = pkg;

this.output = output;

this.minColumns = minColumns;

}

/**

* Parses and shows the content of one sheet using the specified styles and

* shared-strings tables.

* @param styles

* @param strings

* @param sheetInputStream

*/

public List processSheet(StylesTable styles,

ReadOnlySharedStringsTable strings, InputStream sheetInputStream)

throws IOException, ParserConfigurationException, SAXException {

InputSource sheetSource = new InputSource(sheetInputStream);

SAXParserFactory saxFactory = SAXParserFactory.newInstance();

SAXParser saxParser = saxFactory.newSAXParser();

XMLReader sheetParser = saxParser.getXMLReader();

MyXSSFSheetHandler handler = new MyXSSFSheetHandler(styles, strings,this.minColumns, this.output);

sheetParser.setContentHandler(handler);

sheetParser.parse(sheetSource);

return handler.getRows();

}

/**

* 初始化这个处理程序

* @throws IOException

* @throws OpenXML4JException

* @throws ParserConfigurationException

* @throws SAXException

*/

public List process() throws IOException, OpenXML4JException,ParserConfigurationException, SAXException {

ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);

XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);

List list = null;

StylesTable styles = xssfReader.getStylesTable();

XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();

int index = 0;

while (iter.hasNext()) {

InputStream stream = iter.next();

index++;

//默认取第一个工作薄

if(index == 1){

list = processSheet(styles, strings, stream);

stream.close();

}

}

return list;

}

/**

* 读取Excel

*

* @param path

* 文件路径

* @param sheetName

* sheet名称

* @param minColumns

* 列总数

* @return

* @throws SAXException

* @throws ParserConfigurationException

* @throws OpenXML4JException

* @throws IOException

*/

private static List readerExcel(String path,int minColumns) throws IOException, OpenXML4JException,ParserConfigurationException, SAXException {

OPCPackage p = OPCPackage.open(path, PackageAccess.READ);

XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns);

List list = xlsx2csv.process();

p.close();

return list;

}

/**

* 读取Excel

*

* @param file

* File

* @param sheetName

* sheet名称

* @param minColumns

* 列总数

* @return

* @throws SAXException

* @throws ParserConfigurationException

* @throws OpenXML4JException

* @throws IOException

*/

private static List readerExcelInputStream(File file,int minColumns) throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {

OPCPackage p = OPCPackage.openOrCreate(file);

XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns);

List list = xlsx2csv.process();

p.close();

return list;

}

//获取表头及数据内容,可在业务层调用

public static Map getTableDate(InputStream in) throws IOException, OpenXML4JException, ParserConfigurationException, SAXException{

File out = new File("C:\\date.xlsx");//默认在C盘生成date.xlsx文件

OutputStream oSyjbxftCpVutput = null;

try {

output = new FileOutputStream(out);

byte[] buf = new byte[1024];

int bytesRead;

while ((bytesRead = in.read(buf)) > 0) {

output.write(buf, 0, bytesRead);

}

} finally {

in.close();

output.close();

}

Map map = new HashMap();

List headresult = new ArrayList();//表头集合

List> dataresult = new ArrayList>();//表数据集合

List list = readerExcelInputStream(out,30);//默认读取30列

for(int i=0;i

if(i == 0){

for(String cell : list.get(i)){

if(cell != null){

headresult.add(cell);

}

}

}else{

List dataList = new ArrayList();

for(String cell : list.get(i)){

dataList.add(cell == null ? "" : cell);

}

dataresult.add(dataList);

}

}

map.put("headresult", headresult);

map.put("dataresult", dataresult);

return map;

}

public static void main(String[] args) throws Exception {

File file = new File("C:/Users/Administrator/Desktop/测试.xlsx");

System.out.println("开始读取...");

List list = XLSX2CSV.readerExcelInputStream(file, 30);//默认读取30列

System.out.println("数据量:"+list.size());

for(int i=0;i

if(i > 0){

for (String cell : list.get(i)) {

System.out.print(cell + ",");

}

System.out.println();

}

}

}

}

说明:

代码中的加载excel的核心方法:

OPCPackage pkg = OPCPackage.open(path);//文件路径

OPCPackage pkg = OPCPackage.openOrCreate(file);//文件

OPCPackage pkg = OPCPackage.open(InputStream);//文件流

文件流方式对内存依赖极大,所以实际应用时,如果只能获取文件流的话,可以先将文件通过流拷贝到本地再解析代码中的

若上传文件大小超过限制,可在配置文件中设置,SpringBoot2项目application.properties中设置:

spring.servlet.multipart.maxFileSize=50MB

spring.servlet.multipart.maxRequestSize=100MB

实际业务数据入库的话也可采用异步任务@Async的方式来提高入库效率:

import java.util.Map;

import org.springframework.beans.factory.annotation.Autowired;

import org.springframework.scheduling.annotation.Async;

import org.springframework.stereotype.Service;

import cn.com.app.dao.JkconfigDao;

@Service

public class JkrzService {

@Autowired

private JkconfigDao jkconfigDao;

@Async("myTaskAsyncPool")

public void transJkrz(Map m) {

jkconfigDao.insert(m);

}

}

线程池配置:

import java.util.concurrent.Executor;

import java.util.concurrent.ThreadPoolExecutor;

import org.springframework.beans.factory.annotation.Autowired;

import org.springframework.context.annotation.Bean;

import org.springframework.context.annotation.Configuration;

import org.springframework.scheduling.annotation.EnableAsync;

import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;

@Configuration

@EnableAsync

public class AsyncTaskConfig {

@Autowired

private TaskThreadPoolConfig config;

// ThredPoolTaskExcutor的处理流程

// 当池子大小小于corePoolSize,就新建线程,并处理请求

// 当池子大小等于corePoolSize,把请求放入workQueue中,池子里的空闲线程就去workQueue中取任务并处理

// 当workQueue放不下任务时,就新建线程入池,并处理请求,如果池子大小撑到了maximumPoolSize,就用RejectedExecutionHandler来做拒绝处理

// 当池子的线程数大于corePoolSize时,多余的线程会等待keepAliveTime长时间,如果无请求可处理就自行销毁

// 当threadNamePrefix设置为true,则核心线程也会超时关闭

@Bean

public Executor myTaskAsyncPool() {

ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();

executor.setCorePoolSize(config.getCorePoolSize());

executor.setMaxPoolSize(config.getMaxPoolSize());

executor.setQueueCapacity(config.getQueueCapacity());

executor.setKeepAliveSeconds(config.getKeepAliveSeconds());

executor.setAllowCoreThreadTimeOut(true);

executor.setThreadNamePrefix("MyExecutor-");

// rejection-policy:当pool已经达到max size的时候,如何处理新任务

// CALLER_RUNS:不在新线程中执行任务,而是由调用者所在的线程来执行

executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());

executor.initialize();

return executor;

}

}

参数配置:

#核心线程数,当线程数小于核心线程数时,即使有线程空闲,线程池也会优先创建新线程,设置allowCoreThreadTimeout=true(默认false)时,核心线程会超时退出

spring.task.pool.corePoolSize=20

#最大线程数,当线程数大于等于corePoolSize,且任务队列已满时,线程池会创建新线程来处理任务

spring.task.pool.maxPoolSize=60

#线程空闲时间,当线程空闲时间达到keepAliveSeconds(秒)时,线程会退出,直到线程数量等于corePoolSize,如果allowCoreThreadTimeout=true,则会直到线程数量等于0

spring.task.pool.keepAliveSeconds=1

#任务队列容量,当核心线程数达到最大时,新任务会放在队列中排队等待执行

spring.task.pool.queueCapacity=400

读取xls格式数据:

import org.apache.poi.hssf.eventusermodel.*;

import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;

import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;

import org.apache.poi.hssf.model.HSSFFormulaParser;

import org.apache.poi.hssf.record.*;

import org.apache.poi.hssf.usermodel.HSSFDataFormatter;

import org.apache.poi.hssf.usermodel.HSSFWorkbook;

import org.apache.poi.poifs.filesystem.POIFSFileSystem;

import java.io.FileInputStream;

import java.io.InputStream;

import java.util.ArrayList;

import java.util.List;

/**

* @desc 用于解决.xls2003版本大数据量问题

**/

public class ExcelXlsReader implements HSSFListener {

//存储所有数据

private List> dataList = new ArrayList>();

private int minColums = -1;

private POIFSFileSystem fs;

/**

* 总行数

*/

private int totalRows=0;

/**

* 上一行row的序号

*/

private int lastRowNumber;

/**

* 上一单元格的序号

*/

private int lastColumnNumber;

/**

* 是否输出formula,还是它对应的值

*/

private boolean outputFormulaValues = true;

/**

* 用于转换formulas

*/

private EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener;

//excel2003工作簿

private HSSFWorkbook stubWorkbook;

private SSTRecord sstRecord;

private FormatTrackingHSSFListener formatListener;

private final HSSFDataFormatter formatter = new HSSFDataFormatter();

/**

* 文件的绝对路径

*/

private String filePath = "";

//表索引

private int sheetIndex = 0;

private BoundSheetRecord[] orderedBSRs;

@SuppressWarnings("unchecked")

private ArrayList boundSheetRecords = new ArrayList();

private int nextRow;

private int nextColumn;

private boolean outputNextStringRecord;

//当前行

private int curRow = 0;

//存储一行记录所有单元格的容器

private List cellList = new ArrayList();

/**

* 判断整行是否为空行的标记

*/

private boolean flag = false;

@SuppressWarnings("unused")

private String sheetName;

/**

* 遍历excel下所有的sheet

*

* @param fileName

* @throws Exception

*/

public int process(String fileName) throws Exception {

filePath = fileName;

this.fs = new POIFSFileSystem(new FileInputStream(fileName));

MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);

formatListener = new FormatTrackingHSSFListener(listener);

HSSFEventFactory factory = new HSSFEventFactory();

HSSFRequest request = new HSSFRequest();

if (outputFormulaValues) {

request.addListenerForAllRecords(formatListener);

} else {

workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener);

request.addListenerForAllRecords(workbookBuildingListener);

}

factory.processWorkbookEvents(request, fs);

return totalRows; //返回该excel文件的总行数,不包括首列和空行

}

public List> process(InputStream in) throws Exception {

//filePath = fileName;

this.fs = new POIFSFileSystem(in);

MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);

formatListener = new FormatTrackingHSSFListener(listener);

HSSFEventFactory factory = new HSSFEventFactory();

HSSFRequest request = new HSSFRequest();

if (outputFormulaValues) {

request.addListenerForAllRecords(formatListener);

} else {

workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener);

request.addListenerForAllRecords(workbookBuildingListener);

}

factory.processWorkbookEvents(request, fs);

return dataList; //返回该excel文件的总行数,不包括首列和空行

}

/**

* HSSFListener 监听方法,处理Record

* 处理每个单元格

* @param record

*/

@SuppressWarnings("unchecked")

public void processRecord(Record record) {

int thisRow = -1;

int thisColumn = -1;

String thisStr = null;

String value = null;

switch (record.getSid()) {

case BoundSheetRecord.sid:

boundSheetRecords.add(record);

break;

case BOFRecord.sid: //开始处理每个sheet

BOFRecord br = (BOFRecord) record;

if (br.getType() == BOFRecord.TYPE_WORKSHEET) {

//如果有需要,则建立子工作簿

if (workbookBuildingListener != null && stubWorkbook == null) {

stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();

}

if (orderedBSRs == null) {

orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);

}

sheetName = orderedBSRs[sheetIndex].getSheetname();

sheetIndex++;

}

break;

case SSTRecord.sid:

sstRecord = (SSTRecord) record;

break;

case BlankRecord.sid: //单元格为空白

BlankRecord brec = (BlankRecord) record;

thisRow = brec.getRow();

thisColumn = brec.getColumn();

thisStr = "";

cellList.add(thisColumn, thisStr);

break;

case BoolErrRecord.sid: //单元格为布尔类型

BoolErrRecord berec = (BoolErrRecord) record;

thisRow = berec.getRow();

thisColumn = berec.getColumn();

thisStr = berec.getBooleanValue() + "";

cellList.add(thisColumn, thisStr);

checkRowIsNull(thisStr); //如果里面某个单元格含有值,则标识该行不为空行

break;

case FormulaRecord.sid://单元格为公式类型

FormulaRecord frec = (FormulaRecord) record;

thisRow = frec.getRow();

thisColumn = frec.getColumn();

if (outputFormulaValues) {

if (Double.isNaN(frec.getValue())) {

outputNextStringRecord = true;

nextRow = frec.getRow();

nextColumn = frec.getColumn();

} else {

thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';

}

} else {

thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';

}

cellList.add(thisColumn, thisStr);

checkRowIsNull(thisStr); //如果里面某个单元格含有值,则标识该行不为空行

break;

case StringRecord.sid: //单元格中公式的字符串

if (outputNextStringRecord) {

StringRecord srec = (StringRecord) record;

thisStr = srec.getString();

thisRow = nextRow;

thisColumn = nextColumn;

outputNextStringRecord = false;

}

break;

case LabelRecord.sid:

LabelRecord lrec = (LabelRecord) record;

curRow = thisRow = lrec.getRow();

thisColumn = lrec.getColumn();

value = lrec.getValue().trim();

value = value.equals("") ? "" : value;

cellList.add(thisColumn, value);

checkRowIsNull(value); //如果里面某个单元格含有值,则标识该行不为空行

break;

case LabelSSTRecord.sid: //单元格为字符串类型

LabelSSTRecord lsrec = (LabelSSTRecord) record;

curRow = thisRow = lsrec.getRow();

thisColumn = lsrec.getColumn();

if (sstRecord == null) {

cellList.add(thisColumn, "");

} else {

value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim();

value = value.equals("") ? "" : value;

cellList.add(thisColumn, value);

checkRowIsNull(value); //如果里面某个单元格含有值,则标识该行不为空行

}

break;

case NumberRecord.sid: //单元格为数字类型

NumberRecord numrec = (NumberRecord) record;

curRow = thisRow = numrec.getRow();

thisColumn = numrec.getColumn();

//第一种方式

//value = formatListener.formatNumberDateCell(numrec).trim();//这个被写死,采用的m/d/yy h:mm格式,不符合要求

//第二种方式,参照formatNumberDateCell里面的实现方法编写

Double valueDouble=((NumberRecord)numrec).getValue();

String formatString=formatListener.getFormatString(numrec);

if (formatString.contains("m/d/yy")){

formatString="yyyy-MM-dd hh:mm:ss";

}

int formatIndex=formatListener.getFormatIndex(numrec);

value=formatter.formatRawCellContents(valueDouble, formatIndex, formatString).trim();

value = value.equals("") ? "" : value;

//向容器加入列值

cellList.add(thisColumn, value);

checkRowIsNull(value); //如果里面某个单元格含有值,则标识该行不为空行

break;

default:

break;

}

//遇到新行的操作

if (thisRow != -1 && thisRow != lastRowNumber) {

lastColumnNumber = -1;

}

//空值的操作

if (record instanceof MissingCellDummyRecord) {

MissingCellDummyRecord mc = (MissingCellDummyRecord) record;

curRow = thisRow = mc.getRow();

thisColumn = mc.getColumn();

cellList.add(thisColumn, "");

}

//更新行和列的值

if (thisRow > -1)

lastRowNumber = thisRow;

if (thisColumn > -1)

lastColumnNumber = thisColumn;

//行结束时的操作

if (record instanceof LastCellOfRowDummyRecord) {

if (minColums > 0) {

//列值重新置空

if (lastColumnNumber == -1) {

lastColumnNumber = 0;

}

}

lastColumnNumber = -1;

if (flag&&curRow!=0) { //该行不为空行且该行不是第一行,发送(第一行为列名,不需要)

//ExcelReaderUtil.sendRows(filePath, sheetName, sheetIndex, curRow + 1, cellList); //每行结束时,调用sendRows()方法

totalRows++;

//添加到数据集合中

dataList.add(cellList);

}

//清空容器

cellList = new ArrayList();

//cellList.clear();

flag=false;

}

}

/**

* 如果里面某个单元格含有值,则标识该行不为空行

* @param value

*/

public void checkRowIsNull(String value){

if (value != null && !"".equals(value)) {

flag = true;

}

}

}

读取xlsx格式数据(自动获取表头长度方式):

import org.apache.poi.openxml4j.opc.OPCPackage;

import org.apache.poi.ss.usermodel.BuiltinFormats;

import org.apache.poi.ss.usermodel.DataFormatter;

import org.apache.poi.xssf.eventusermodel.XSSFReader;

import org.apache.poi.xssf.model.SharedStringsTable;

import org.apache.poi.xssf.model.StylesTable;

import org.apache.poi.xssf.usermodel.XSSFCellStyle;

import org.apache.poi.xssf.usermodel.XSSFRichTextString;

import org.xml.sax.Attributes;

import org.xml.sax.InputSource;

import org.xml.sax.SAXException;

import org.xml.sax.XMLReader;

import org.xml.sax.helpers.DefaultHandler;

import org.xml.sax.helpers.XMLReaderFactory;

import java.io.InputStream;

import java.util.ArrayList;

import java.util.List;

/**

* @desc POI读取excel有两种模式,一种是用户模式,一种是事件驱动模式

* 采用SAX事件驱动模式解决XLSX文件,可以有效解决用户模式内存溢出的问题,

* 该模式是POI官方推荐的读取大数据的模式,

* 在用户模式下,数据量较大,Sheet较多,或者是有很多无用的空行的情况下,容易出现内存溢出

*

* 用于解决.xlsx2007版本大数据量问题

**/

public class ExcelXlsxReader extends DefaultHandler {

//存储所有数据

private List> dataList = new ArrayList>();

/**

* 单元格中的数据可能的数据类型

*/

enum CellDataType {

BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, DATE, NULL

}

/**

* 共享字符串表

*/

private SharedStringsTable sst;

/**

* 上一次的索引值

*/

private String lastIndex;

/**

* 文件的绝对路径

*/

private String filePath = "";

/**

* 工作表索引

*/

private int sheetIndex = 0;

/**

* sheet名

*/

private String sheetName = "";

/**

* 总行数

*/

private int totalRows=0;

/**

* 一行内cell集合

*/

private List cellList = new ArrayList();

/**

* 判断整行是否为空行的标记

*/

private boolean flag = false;

/**

* 当前行

*/

private int curRow = 1;

/**

* 当前列

*/

private int curCol = 0;

/**

* T元素标识

*/

private boolean isTElement;

/**

* 判断上一单元格是否为文本空单元格

*/

private boolean startElementFlag = true;

private boolean endElementFlag = false;

private boolean charactersFlag = false;

/**

* 异常信息,如果为空则表示没有异常

*/

private String exceptionMessage;

/**

* 单元格数据类型,默认为字符串类型

*/

private CellDataType nextDataType = CellDataType.SSTINDEX;

private final DataFormatter formatter = new DataFormatter();

/**

* 单元格日期格式的索引

*/

private short formatIndex;

/**

* 日期格式字符串

*/

private String formatString;

//定义前一个元素和当前元素的位置,用来计算其中空的单元格数量,如A6和A8等

private String prePreRef = "A", preRef = null, ref = null;

//定义该文档一行最大的单元格数,用来补全一行最后可能缺失的单元格

private String maxRef = null;

/**

* 单元格

*/

private StylesTable stylesTable;

/**

* 遍历工作簿中所有的电子表格

* 并缓存在mySheetList中

*

* @param filename

* @throws Exception

*/

public int process(String filename) throws Exception {

filePath = filename;

OPCPackage pkg = OPCPackage.open(filename);

XSSFReader xssfReader = new XSSFReader(pkg);

stylesTable = xssfReader.getStylesTable();

SharedStringsTable sst = xssfReader.getSharedStringsTable();

XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");

this.sst = sst;

parser.setContentHandler(this);

XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) xssfReader.getSheetsData();

while (sheets.hasNext()) { //遍历sheet

curRow = 1; //标记初始行为第一行

sheetIndex++;

InputStream sheet = sheets.next(); //sheets.next()和sheets.getSheetName()不能换位置,否则sheetName报错

sheetName = sheets.getSheetName();

InputSource sheetSource = new InputSource(sheet);

parser.parse(sheetSource); //解析excel的每条记录,在这个过程中startElement()、characters()、endElement()这三个函数会依次执行

sheet.close();

}

return totalRows; //返回该excel文件的总行数,不包括首列和空行

}

public List> process(InputStream in) throws Exception {

//filePath = filename;

OPCPackage pkg = OPCPackage.open(in);

XSSFReader xssfReader = new XSSFReader(pkg);

stylesTable = xssfReader.getStylesTable();

SharedStringsTable sst = xssfReader.getSharedStringsTable();

//XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");

XMLReader parser = XMLReaderFactory.createXMLReader();

this.sst = sst;

parser.setContentHandler(this);

XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) xssfReader.getSheetsData();

while (sheets.hasNext()) { //遍历sheet

curRow = 1; //标记初始行为第一行

sheetIndex++;

InputStream sheet = sheets.next(); //sheets.next()和sheets.getSheetName()不能换位置,否则sheetName报错

sheetName = sheets.getSheetName();

InputSource sheetSource = new InputSource(sheet);

parser.parse(sheetSource); //解析excel的每条记录,在这个过程中startElement()、characters()、endElement()这三个函数会依次执行

sheet.close();

}

return dataList;

}

/**

* 第一个执行

*

* @param uri

* @param localName

* @param name

* @param attributes

* @throws SAXException

*/

@Override

public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {

//c => 单元格

if ("c".equals(name)) {

//前一个单元格的位置

if (preRef == null) {

preRef = attributes.getValue("r");

} else {

//中部文本空单元格标识 ‘endElementFlag' 判断前一次是否为文本空字符串,true则表明不是文本空字符串,false表明是文本空字符串跳过把空字符串的位置赋予preRef

if (endElementFlag){

preRef = ref;

}

}

//当前单元格的位置

ref = attributes.getValue("r");

//首部文本空单元格标识 ‘startElementFlag' 判断前一次,即首部是否为文本空字符串,true则表明不是文本空字符串,false表明是文本空字符串, 且已知当前格,即第二格带“B”标志,则ref赋予preRef

if (!startElementFlag && !flag){ //上一个单元格为文本空单元格,执行下面的,使ref=preRef;flag为true表明该单元格之前有数据值,即该单元格不是首部空单元格,则跳过

// 这里只有上一个单元格为文本空单元格,且之前的几个单元格都没有值才会执行

preRef = ref;

}

//设定单元格类型

this.setNextDataType(attributes);

endElementFlag = false;

charactersFlag = false;

startElementFlag = false;

}

//当元素为t时

if ("t".equals(name)) {

isTElement = true;

} else {

isTElement = false;

}

//置空

lastIndex = "";

}

/**

* 第二个执行

* 得到单元格对应的索引值或是内容值

* 如果单元格类型是字符串、INLINESTR、数字、日期,lastIndex则是索引值

* 如果单元格类型是布尔值、错误、公式,lastIndex则是内容值

* @param ch

* @param start

* @param length

* @throws SAXException

*/

@Override

public void characters(char[] ch, int start, int length) throws SAXException {

startElementFlag = true;

charactersFlag = true;

lastIndex += new String(ch, start, length);

}

/**

* 第三个执行

*

* @param uri

* @param localName

* @param name

* @throws SAXException

*/

@Override

public void endElement(String uri, String localName, String name) throws SAXException {

//t元素也包含字符串

if (isTElement) {

//将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符

String value = lastIndex.trim();

cellList.add(curCol, value);

endElementFlag = true;

curCol++;

isTElement = false;

//如果里面某个单元格含有值,则标识该行不为空行

if (value != null && !"".equals(value)) {

flag = true;

}

} else if ("v".equals(name)) {

//v => 单元格的值,如果单元格是字符串,则v标签的值为该字符串在SST中的索引

String value = this.getDataValue(lastIndex.trim(), "");//根据索引值获取对应的单元格值

//补全单元格之间的空单元格

if (!ref.equals(preRef)) {

int len = countNullCell(ref, preRef);

for (int i = 0; i < len; i++) {

cellList.add(curCol, "");

curCol++;

}

} else if (ref.equals(preRef) && !ref.startsWith("A")){ //ref等于preRef,且以B或者C...开头,表明首部为空格

int len = countNullCell(ref, "A");

for (int i = 0; i <= len; i++) {

cellList.add(curCol, "");

curCol++;

}

}

cellList.add(curCol, value);

curCol++;

endElementFlag = true;

//如果里面某个单元格含有值,则标识该行不为空行

if (value != null && !"".equals(value)) {

flag = true;

}

} else {

//如果标签名称为row,这说明已到行尾,调用optRows()方法

if ("row".equals(name)) {

//默认第一行为表头,以该行单元格数目为最大数目

if (curRow == 1) {

maxRef = ref;

}

//补全一行尾部可能缺失的单元格

if (maxRef != null) {

int len = -1;

//前一单元格,true则不是文本空字符串,false则是文本空字符串

if (charactersFlag){

len = countNullCell(maxRef, ref);

}else {

len = countNullCell(maxRef, preRef);

}

for (int i = 0; i <= len; i++) {

cellList.add(curCol, "");

curCol++;

}

}

if (flag&&curRow!=1){ //该行不为空行且该行不是第一行,则发送(第一行为列名,不需要)

//ExcelReaderUtil.sendRows(filePath, sheetName, sheetIndex, curRow, cellList);

totalRows++;

//添加到数据集合中

dataList.add(cellList);

}

//清空容器

cellList = new ArrayList();

//cellList.clear();

curRow++;

curCol = 0;

preRef = null;

prePreRef = null;

ref = null;

flag=false;

}

}

}

/**

* 处理数据类型

*

* @param attributes

*/

public void setNextDataType(Attributes attributes) {

nextDataType = CellDataType.NUMBER; //cellType为空,则表示该单元格类型为数字

formatIndex = -1;

formatString = null;

String cellType = attributes.getValue("t"); //单元格类型

String cellStyleStr = attributes.getValue("s"); //

String columnData = attributes.getValue("r"); //获取单元格的位置,如A1,B1

if ("b".equals(cellType)) { //处理布尔值

nextDataType = CellDataType.BOOL;

} else if ("e".equals(cellType)) { //处理错误

nextDataType = CellDataType.ERROR;

} else if ("inlineStr".equals(cellType)) {

nextDataType = CellDataType.INLINESTR;

} else if ("s".equals(cellType)) { //处理字符串

nextDataType = CellDataType.SSTINDEX;

} else if ("str".equals(cellType)) {

nextDataType = CellDataType.FORMULA;

}

if (cellStyleStr != null) { //处理日期

int styleIndex = Integer.parseInt(cellStyleStr);

XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);

formatIndex = style.getDataFormat();

formatString = style.getDataFormatString();

if (formatString.contains("m/d/yyyy") || formatString.contains("yyyy/mm/dd")|| formatString.contains("yyyy/m/d") ) {

nextDataType = CellDataType.DATE;

formatString = "yyyy-MM-dd hh:mm:ss";

}

if (formatString == null) {

nextDataType = CellDataType.NULL;

formatString = BuiltinFormats.getBuiltinFormat(formatIndex);

}

}

}

/**

* 对解析出来的数据进行类型处理

* @param value 单元格的值,

* value代表解析:BOOL的为0或1, ERROR的为内容值,FORMULA的为内容值,INLINESTR的为索引值需转换为内容值,

* SSTINDEX的为索引值需转换为内容值, NUMBER为内容值,DATE为内容值

* @param thisStr 一个空字符串

* @return

*/

@SuppressWarnings("deprecation")

public String getDataValue(String value, String thisStr) {

switch (nextDataType) {

// 这几个的顺序不能随便交换,交换了很可能会导致数据错误

case BOOL: //布尔值

char first = value.charAt(0);

thisStr = first == '0' ? "FALSE" : "TRUE";

break;

case ERROR: //错误

thisStr = "\"ERROR:" + value.toString() + '"';

break;

case FORMULA: //公式

thisStr = '"' + value.toString() + '"';

break;

case INLINESTR:

XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());

thisStr = rtsi.toString();

rtsi = null;

break;

case SSTINDEX: //字符串

String sstIndex = value.toString();

try {

int idx = Integer.parseInt(sstIndex);

XSSFRichTextString rtss = new XSSFRichTextString(sst.getEntryAt(idx));//根据idx索引值获取内容值

thisStr = rtss.toString();

//System.out.println(thisStr);

//有些字符串是文本格式的,但内容却是日期

rtss = null;

} catch (NumberFormatException ex) {

thisStr = value.toString();

}

break;

case NUMBER: //数字

if (formatString != null) {

thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString).trim();

} else {

thisStr = value;

}

thisStr = thisStr.replace("_", "").trim();

break;

case DATE: //日期

thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString);

// 对日期字符串作特殊处理,去掉T

thisStr = thisStr.replace("T", " ");

break;

default:

thisStr = " ";

break;

}

return thisStr;

}

public int countNullCell(String ref, String preRef) {

//excel2007最大行数是1048576,最大列数是16384,最后一列列名是XFD

String xfd = ref.replaceAll("\\d+", "");

String xfd_1 = preRef.replaceAll("\\d+", "");

xfd = fillChar(xfd, 3, '@', true);

xfd_1 = fillChar(xfd_1, 3, '@', true);

char[] letter = xfd.toCharArray();

char[] letter_1 = xfd_1.toCharArray();

int res = (letter[0] - letter_1[0]) * 26 * 26 + (letter[1] - letter_1[1]) * 26 + (letter[2] - letter_1[2]);

return res - 1;

}

public String fillChar(String str, int len, char let, boolean isPre) {

int len_1 = str.length();

if (len_1 < len) {

if (isPre) {

for (int i = 0; i < (len - len_1); i++) {

str = let + str;

}

} else {

for (int i = 0; i < (len - len_1); i++) {

str = str + let;

}

}

}

return str;

}

/**

* @return the exceptionMessage

*/

public String getExceptionMessage() {

return exceptionMessage;

}

}

读取工具类:

import java.io.InputStream;

import java.util.List;

public class ExcelReaderUtil {

//excel2003扩展名

public static final String EXCEL03_EXTENSION = ".xls";

//excel2007扩展名

public static final String EXCEL07_EXTENSION = ".xlsx";

//读取xls格式

public static List> readExcelXls(InputStream in) throws Exception {

ExcelXlsReader excelXls=new ExcelXlsReader();

List> dataList =excelXls.process(in);

return dataList;

}

//读取xlsx格式

public static List> readExcelXlsx(InputStream in) throws Exception {

ExcelXlsxReader excelXls=new ExcelXlsxReader();

List> dataList =excelXls.process(in);

return dataList;

}

public static void readExcel(String fileName) throws Exception {

int totalRows =0;

if (fileName.endsWith(EXCEL03_EXTENSION)) { //处理excel2003文件

ExcelXlsReader excelXls=new ExcelXlsReader();

totalRows =excelXls.process(fileName);

} else if (fileName.endsWith(EXCEL07_EXTENSION)) {//处理excel2007文件

ExcelXlsxReader excelXlsxReader = new ExcelXlsxReader();

totalRows = excelXlsxReader.process(fileName);

} else {

throw new Exception("文件格式错误,fileName的扩展名只能是xls或xlsx。");

}

System.out.println("发送的总行数:" + totalRows);

}

public static void main(String[] args) throws Exception {

String path="C:\\test.xlsx";

ExcelReaderUtil.readExcel(path);

}

}


版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:java中concat()方法的使用说明
下一篇:Java读写文件,在文件中搜索内容,并输出含有该内容的所有行方式
相关文章

 发表评论

暂时没有评论,来抢沙发吧~