前端时间写了注解方式Excel的读取和写入,它是根据注解完成Excel的操作,虽说支持大数据,但对于超大数据就无能为力了,因为它的读写期间都是将所有数据放入系统内存的,除非你有超大的内存。
因项目需要对超大数据的Excel读写操作,于是网上找了个超大数据的读写代码,这个不需要太大内存。并对此进行了简单的修改。
原理如下:
Excel超大数据读取:抽象Excel2007读取器,excel2007的底层数据结构是xml文件,采用SAX的事件驱动的方法解析 xml,需要继承DefaultHandler,在遇到文件内容时,事件会触发,这种做法可以大大降低内存的耗费,特别使用于大数据量的文件。
Excel超大数据写入:抽象excel2007读入器,先构建.xlsx一张模板,改写模板中的sheet.xml, 使用这种方法 写入.xlsx文件,不需要太大的内存。
先看调用示例:
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | String file = "E:/导入测试数据.xlsx";ExcelReader reader = newExcelReader() {publicvoidgetRows(intsheetIndex, intcurRow, List<String> rowList) {System.out.println("Sheet:"+ sheetIndex + ", Row:"+ curRow + ", Data:"+rowList);}};reader.process(file, 1);String file = "E:/导出测试数据.xlsx";ExcelWriter writer = newExcelWriter() {publicvoidgenerate() throwsException {// 电子表格开始this.beginSheet();for(intrownum = 0; rownum < 100; rownum++) {// 插入新行this.insertRow(rownum);// 建立新单元格,索引值从0开始,表示第一列this.createCell(0, "第 "+ rownum + " 行");this.createCell(1, 34343.123456789);this.createCell(2, "23.67%");this.createCell(3, "12:12:23");this.createCell(4, "2014-10-11 12:12:23");this.createCell(5, "true");this.createCell(6, "false");// 结束行this.endRow();}// 电子表格结束this.endSheet();}};writer.process(file);} | 
这里只展示了对数据的读取和写入,如果正式保存到数据库时建议读取一部分(如100条)再写入一次数据库,尽量不要读取一条就写入一条,这样会非常耗费资源。?
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | importjava.io.InputStream;importjava.math.BigDecimal;importjava.text.SimpleDateFormat;importjava.util.ArrayList;importjava.util.Date;importjava.util.Iterator;importjava.util.List;importorg.apache.poi.hssf.usermodel.HSSFDateUtil;importorg.apache.poi.openxml4j.opc.OPCPackage;importorg.apache.poi.xssf.eventusermodel.XSSFReader;importorg.apache.poi.xssf.model.SharedStringsTable;importorg.apache.poi.xssf.usermodel.XSSFRichTextString;importorg.xml.sax.Attributes;importorg.xml.sax.InputSource;importorg.xml.sax.SAXException;importorg.xml.sax.XMLReader;importorg.xml.sax.helpers.DefaultHandler;importorg.xml.sax.helpers.XMLReaderFactory;/*** Excel超大数据读取,抽象Excel2007读取器,excel2007的底层数据结构是xml文件,采用SAX的事件驱动的方法解析* xml,需要继承DefaultHandler,在遇到文件内容时,事件会触发,这种做法可以大大降低 内存的耗费,特别使用于大数据量的文件。* @version 2014-9-2*/publicabstractclassExcelReader extendsDefaultHandler {// 共享字符串表privateSharedStringsTable sst;// 上一次的内容privateString lastContents;privatebooleannextIsString;privateintsheetIndex = -1;privateList<String> rowList = newArrayList<String>();// 当前行privateintcurRow = 0;// 当前列privateintcurCol = 0;// 日期标志privatebooleandateFlag;// 数字标志privatebooleannumberFlag;privatebooleanisTElement;/*** 遍历工作簿中所有的电子表格* @param filename* @throws Exception*/publicvoidprocess(String filename) throwsException {OPCPackage pkg = OPCPackage.open(filename);XSSFReader r = newXSSFReader(pkg);SharedStringsTable sst = r.getSharedStringsTable();XMLReader parser = fetchSheetParser(sst);Iterator<InputStream> sheets = r.getSheetsData();while(sheets.hasNext()) {curRow = 0;sheetIndex++;InputStream sheet = sheets.next();InputSource sheetSource = newInputSource(sheet);parser.parse(sheetSource);sheet.close();}}/*** 只遍历一个电子表格,其中sheetId为要遍历的sheet索引,从1开始,1-3* @param filename* @param sheetId* @throws Exception*/publicvoidprocess(String filename, intsheetId) throwsException {OPCPackage pkg = OPCPackage.open(filename);XSSFReader r = newXSSFReader(pkg);SharedStringsTable sst = r.getSharedStringsTable();XMLReader parser = fetchSheetParser(sst);// 根据 rId# 或 rSheet# 查找sheetInputStream sheet2 = r.getSheet("rId"+ sheetId);sheetIndex++;InputSource sheetSource = newInputSource(sheet2);parser.parse(sheetSource);sheet2.close();}publicXMLReader fetchSheetParser(SharedStringsTable sst)throwsSAXException {XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");this.sst = sst;parser.setContentHandler(this);returnparser;}publicvoidstartElement(String uri, String localName, String name,Attributes attributes) throwsSAXException {// System.out.println("startElement: " + localName + ", " + name + ", " + attributes);// c => 单元格if("c".equals(name)) {// 如果下一个元素是 SST 的索引,则将nextIsString标记为trueString cellType = attributes.getValue("t");if("s".equals(cellType)) {nextIsString = true;} else{nextIsString = false;}// 日期格式String cellDateType = attributes.getValue("s");if("1".equals(cellDateType)) {dateFlag = true;} else{dateFlag = false;}String cellNumberType = attributes.getValue("s");if("2".equals(cellNumberType)) {numberFlag = true;} else{numberFlag = false;}}// 当元素为t时if("t".equals(name)) {isTElement = true;} else{isTElement = false;}// 置空lastContents = "";}publicvoidendElement(String uri, String localName, String name)throwsSAXException {// System.out.println("endElement: " + localName + ", " + name);// 根据SST的索引值的到单元格的真正要存储的字符串// 这时characters()方法可能会被调用多次if(nextIsString) {try{intidx = Integer.parseInt(lastContents);lastContents = newXSSFRichTextString(sst.getEntryAt(idx)).toString();} catch(Exception e) {}}// t元素也包含字符串if(isTElement) {String value = lastContents.trim();rowList.add(curCol, value);curCol++;isTElement = false;// v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引// 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符} elseif("v".equals(name)) {String value = lastContents.trim();value = value.equals("") ? " ": value;try{// 日期格式处理if(dateFlag) {Date date = HSSFDateUtil.getJavaDate(Double.valueOf(value));SimpleDateFormat dateFormat = newSimpleDateFormat("dd/MM/yyyy");value = dateFormat.format(date);}// 数字类型处理if(numberFlag) {BigDecimal bd = newBigDecimal(value);value = bd.setScale(3, BigDecimal.ROUND_UP).toString();}} catch(Exception e) {// 转换失败仍用读出来的值}rowList.add(curCol, value);curCol++;} else{// 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法if(name.equals("row")) {getRows(sheetIndex + 1, curRow, rowList);rowList.clear();curRow++;curCol = 0;}}}publicvoidcharacters(char[] ch, intstart, intlength)throwsSAXException {// 得到单元格内容的值lastContents += newString(ch, start, length);}/*** 获取行数据回调* @param sheetIndex* @param curRow* @param rowList*/publicabstractvoidgetRows(intsheetIndex, intcurRow, List<String> rowList);/*** 测试方法*/publicstaticvoidmain(String[] args) throwsException {String file = "E:/导入测试数据.xlsx";ExcelReader reader = newExcelReader() {publicvoidgetRows(intsheetIndex, intcurRow, List<String> rowList) {System.out.println("Sheet:"+ sheetIndex + ", Row:"+ curRow + ", Data:"+rowList);}};reader.process(file, 1);}} | 
以下为写入程序
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 | importjava.io.File;importjava.io.FileInputStream;importjava.io.FileOutputStream;importjava.io.FileWriter;importjava.io.IOException;importjava.io.InputStream;importjava.io.OutputStream;importjava.io.Writer;importjava.util.Calendar;importjava.util.Enumeration;importjava.util.zip.ZipEntry;importjava.util.zip.ZipFile;importjava.util.zip.ZipOutputStream;importorg.apache.poi.hssf.util.CellReference;importorg.apache.poi.ss.usermodel.DateUtil;importorg.apache.poi.xssf.usermodel.XSSFSheet;importorg.apache.poi.xssf.usermodel.XSSFWorkbook;/*** Excel超大数据写入,抽象excel2007读入器,先构建.xlsx一张模板,改写模板中的sheet.xml,* 使用这种方法 写入.xlsx文件,不需要太大的内存* @version 2014-9-2*/publicabstractclassExcelWriter {privateSpreadsheetWriter sw;/*** 写入电子表格的主要流程** @param fileName* @throws Exception*/publicvoidprocess(String fileName) throwsException {// 建立工作簿和电子表格对象XSSFWorkbook wb = newXSSFWorkbook();XSSFSheet sheet = wb.createSheet("sheet1");// 持有电子表格数据的xml文件名 例如 /xl/worksheets/sheet1.xmlString sheetRef = sheet.getPackagePart().getPartName().getName();// 保存模板FileOutputStream os = newFileOutputStream("template.xlsx");wb.write(os);os.close();// 生成xml文件File tmp = File.createTempFile("sheet", ".xml");Writer fw = newFileWriter(tmp);sw = newSpreadsheetWriter(fw);generate();fw.close();// 使用产生的数据替换模板File templateFile = newFile("template.xlsx");FileOutputStream out = newFileOutputStream(fileName);substitute(templateFile, tmp, sheetRef.substring(1), out);out.close();// 删除文件之前调用一下垃圾回收器,否则无法删除模板文件System.gc();// 删除临时模板文件if(templateFile.isFile() && templateFile.exists()) {templateFile.delete();}}/*** 类使用者应该使用此方法进行写操作** @throws Exception*/publicabstractvoidgenerate() throwsException;publicvoidbeginSheet() throwsIOException {sw.beginSheet();}publicvoidinsertRow(introwNum) throwsIOException {sw.insertRow(rowNum);}publicvoidcreateCell(intcolumnIndex, String value) throwsIOException {sw.createCell(columnIndex, value, -1);}publicvoidcreateCell(intcolumnIndex, doublevalue) throwsIOException {sw.createCell(columnIndex, value, -1);}publicvoidendRow() throwsIOException {sw.endRow();}publicvoidendSheet() throwsIOException {sw.endSheet();}/**** @param zipfile* the template file* @param tmpfile* the XML file with the sheet data* @param entry* the name of the sheet entry to substitute, e.g.* xl/worksheets/sheet1.xml* @param out* the stream to write the result to*/privatestaticvoidsubstitute(File zipfile, File tmpfile, String entry,OutputStream out) throwsIOException {ZipFile zip = newZipFile(zipfile);ZipOutputStream zos = newZipOutputStream(out);@SuppressWarnings("unchecked")Enumeration<ZipEntry> en = (Enumeration<ZipEntry>) zip.entries();while(en.hasMoreElements()) {ZipEntry ze = en.nextElement();if(!ze.getName().equals(entry)) {zos.putNextEntry(newZipEntry(ze.getName()));InputStream is = zip.getInputStream(ze);copyStream(is, zos);is.close();}}zos.putNextEntry(newZipEntry(entry));InputStream is = newFileInputStream(tmpfile);copyStream(is, zos);is.close();zos.close();}privatestaticvoidcopyStream(InputStream in, OutputStream out)throwsIOException {byte[] chunk = newbyte[1024];intcount;while((count = in.read(chunk)) >= 0) {out.write(chunk, 0, count);}}/*** 在写入器中写入电子表格**/publicstaticclassSpreadsheetWriter {privatefinalWriter _out;privateint_rownum;privatestaticString LINE_SEPARATOR = System.getProperty("line.separator");publicSpreadsheetWriter(Writer out) {_out = out;}publicvoidbeginSheet() throwsIOException {_out.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"_out.write("<sheetData>"+ LINE_SEPARATOR);}publicvoidendSheet() throwsIOException {_out.write("</sheetData>");_out.write("</worksheet>");}/*** 插入新行** @param rownum* 以0开始*/publicvoidinsertRow(intrownum) throwsIOException {_out.write("<row r=\""+ (rownum + 1) + "\">"+ LINE_SEPARATOR);this._rownum = rownum;}/*** 插入行结束标志*/publicvoidendRow() throwsIOException {_out.write("</row>"+ LINE_SEPARATOR);}/*** 插入新列** @param columnIndex* @param value* @param styleIndex* @throws IOException*/publicvoidcreateCell(intcolumnIndex, String value, intstyleIndex)throwsIOException {String ref = newCellReference(_rownum, columnIndex).formatAsString();_out.write("<c r=\""+ ref + "\" t=\"inlineStr\"");if(styleIndex != -1)_out.write(" s=\""+ styleIndex + "\"");_out.write(">");_out.write("<is><t>"+ encoderXML(value) + "</t></is>");_out.write("</c>");}publicvoidcreateCell(intcolumnIndex, String value)throwsIOException {createCell(columnIndex, value, -1);}publicvoidcreateCell(intcolumnIndex, doublevalue, intstyleIndex)throwsIOException {String ref = newCellReference(_rownum, columnIndex).formatAsString();_out.write("<c r=\""+ ref + "\" t=\"n\"");if(styleIndex != -1)_out.write(" s=\""+ styleIndex + "\"");_out.write(">");_out.write("<v>"+ value + "</v>");_out.write("</c>");}publicvoidcreateCell(intcolumnIndex, doublevalue)throwsIOException {createCell(columnIndex, value, -1);}publicvoidcreateCell(intcolumnIndex, Calendar value, intstyleIndex)throwsIOException {createCell(columnIndex, DateUtil.getExcelDate(value, false),styleIndex);}}// XML EncodeprivatestaticfinalString[] xmlCode = newString[256];static{// Special charactersxmlCode[‘\‘‘] = "‘";xmlCode[‘\"‘] = "\""; // double quotexmlCode[‘&‘] = "&"; // ampersandxmlCode[‘<‘] = "<"; // lower thanxmlCode[‘>‘] = ">"; // greater than}/*** <p>* Encode the given text into xml.* </p>** @param string* the text to encode* @return the encoded string*/publicstaticString encoderXML(String string) {if(string == null)return"";intn = string.length();charcharacter;String xmlchar;StringBuffer buffer = newStringBuffer();// loop over all the characters of the String.for(inti = 0; i < n; i++) {character = string.charAt(i);// the xmlcode of these characters are added to a StringBuffer// one by onetry{xmlchar = xmlCode[character];if(xmlchar == null) {buffer.append(character);} else{buffer.append(xmlCode[character]);}} catch(ArrayIndexOutOfBoundsException aioobe) {buffer.append(character);}}returnbuffer.toString();}/*** 测试方法*/publicstaticvoidmain(String[] args) throwsException {String file = "E:/导出测试数据.xlsx";ExcelWriter writer = newExcelWriter() {publicvoidgenerate() throwsException {// 电子表格开始this.beginSheet();for(intrownum = 0; rownum < 100; rownum++) {// 插入新行this.insertRow(rownum);// 建立新单元格,索引值从0开始,表示第一列this.createCell(0, "第 "+ rownum + " 行");this.createCell(1, 34343.123456789);this.createCell(2, "23.67%");this.createCell(3, "12:12:23");this.createCell(4, "2014-10-11 12:12:23");this.createCell(5, "true");this.createCell(6, "false");// 结束行this.endRow();}// 电子表格结束this.endSheet();}};writer.process(file);}} | 
原文:http://www.cnblogs.com/apescode/p/5873687.html