public class TextActivity extends Activity implements OnClickListener, FileSelectCallbacks { private final static String TAG = "TextActivity"; private TextView tv_content; @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_text); findViewById(R.id.btn_open).setOnClickListener(this); tv_content = (TextView) findViewById(R.id.tv_content); } @Override public void onClick(View v) { if (v.getId() == R.id.btn_open) { FileSelectFragment.show(this, new String[] {"doc"}, null); } } @Override public void onConfirmSelect(String absolutePath, String fileName, Map<String, Object> map_param) { String path = String.format("%s/%s", absolutePath, fileName); Log.d(TAG, "path="+path); //tm-extractors-0.4.jar与poi的包在编译时会冲突,二者只能同时导入一个 String content = readWord(path).trim(); Log.d(TAG, "content="+content); tv_content.setText(content); } @Override public boolean isFileValid(String absolutePath, String fileName, Map<String, Object> map_param) { return true; } private String readWord(String file) { String text = ""; try { FileInputStream in = new FileInputStream(new File(file)); WordExtractor extractor = new WordExtractor(); text = extractor.extractText(in); } catch (Exception e) { e.printStackTrace(); } return text; } }
public class HtmlActivity extends Activity implements OnClickListener, FileSelectCallbacks { private final static String TAG = "HtmlActivity"; private WebView wv_content; @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_html); findViewById(R.id.btn_open).setOnClickListener(this); wv_content = (WebView) findViewById(R.id.wv_content); } @Override public void onClick(View v) { if (v.getId() == R.id.btn_open) { FileSelectFragment.show(this, new String[] {"doc", "docx"}, null); } } @Override public void onConfirmSelect(String absolutePath, String fileName, Map<String, Object> map_param) { String path = String.format("%s/%s", absolutePath, fileName); Log.d(TAG, "path="+path); //tm-extractors-0.4.jar与poi的包在编译时会冲突,二者只能同时导入一个 WordUtil wu = new WordUtil(path); Log.d(TAG, "htmlPath="+wu.htmlPath); wv_content.loadUrl("file:///" + wu.htmlPath); } @Override public boolean isFileValid(String absolutePath, String fileName, Map<String, Object> map_param) { return true; } }
public class WordUtil { private final static String TAG = "WordUtil"; public String htmlPath; private String docPath; private String picturePath; private List<Picture> pictures; private TableIterator tableIterator; private int presentPicture = 0; private FileOutputStream output; private String htmlBegin = "<html><meta charset=\"utf-8\"><body>"; private String htmlEnd = "</body></html>"; private String tableBegin = "<table style=\"border-collapse:collapse\" border=1 bordercolor=\"black\">"; private String tableEnd = "</table>"; private String rowBegin = "<tr>", rowEnd = "</tr>"; private String columnBegin = "<td>", columnEnd = "</td>"; private String lineBegin = "<p>", lineEnd = "</p>"; private String centerBegin = "<center>", centerEnd = "</center>"; private String boldBegin = "<b>", boldEnd = "</b>"; private String underlineBegin = "<u>", underlineEnd = "</u>"; private String italicBegin = "<i>", italicEnd = "</i>"; private String fontSizeTag = "<font size=\"%d\">"; private String fontColorTag = "<font color=\"%s\">"; private String fontEnd = "</font>"; private String spanColor = "<span style=\"color:%s;\">", spanEnd = "</span>"; private String divRight = "<div align=\"right\">", divEnd = "</div>"; private String imgBegin = "<img src=\"%s\" >"; public WordUtil(String doc_name) { docPath = doc_name; htmlPath = FileUtil.createFile("html", FileUtil.getFileName(docPath) + ".html"); Log.d(TAG, "htmlPath=" + htmlPath); try { output = new FileOutputStream(new File(htmlPath)); presentPicture = 0; output.write(htmlBegin.getBytes()); if (docPath.endsWith(".doc")) { readDOC(); } else if (docPath.endsWith(".docx")) { readDOCX(); } output.write(htmlEnd.getBytes()); output.close(); } catch (Exception e) { e.printStackTrace(); } } //读取word中的内容并写到sd卡上的html文件中 private void readDOC() { try { FileInputStream in = new FileInputStream(docPath); POIFSFileSystem pfs = new POIFSFileSystem(in); HWPFDocument hwpf = new HWPFDocument(pfs); Range range = hwpf.getRange(); pictures = hwpf.getPicturesTable().getAllPictures(); tableIterator = new TableIterator(range); int numParagraphs = range.numParagraphs();// 得到页面所有的段落数 for (int i = 0; i < numParagraphs; i++) { // 遍历段落数 Paragraph p = range.getParagraph(i); // 得到文档中的每一个段落 if (p.isInTable()) { int temp = i; if (tableIterator.hasNext()) { Table table = tableIterator.next(); output.write(tableBegin.getBytes()); int rows = table.numRows(); for (int r = 0; r < rows; r++) { output.write(rowBegin.getBytes()); TableRow row = table.getRow(r); int cols = row.numCells(); int rowNumParagraphs = row.numParagraphs(); int colsNumParagraphs = 0; for (int c = 0; c < cols; c++) { output.write(columnBegin.getBytes()); TableCell cell = row.getCell(c); int max = temp + cell.numParagraphs(); colsNumParagraphs = colsNumParagraphs + cell.numParagraphs(); for (int cp = temp; cp < max; cp++) { Paragraph p1 = range.getParagraph(cp); output.write(lineBegin.getBytes()); writeParagraphContent(p1); output.write(lineEnd.getBytes()); temp++; } output.write(columnEnd.getBytes()); } int max1 = temp + rowNumParagraphs; for (int m = temp + colsNumParagraphs; m < max1; m++) { temp++; } output.write(rowEnd.getBytes()); } output.write(tableEnd.getBytes()); } i = temp; } else { output.write(lineBegin.getBytes()); writeParagraphContent(p); output.write(lineEnd.getBytes()); } } } catch (Exception e) { e.printStackTrace(); } } private void readDOCX() { try { ZipFile docxFile = new ZipFile(new File(docPath)); ZipEntry sharedStringXML = docxFile.getEntry("word/document.xml"); InputStream inputStream = docxFile.getInputStream(sharedStringXML); XmlPullParser xmlParser = Xml.newPullParser(); xmlParser.setInput(inputStream, "utf-8"); boolean isTable = false; // 表格 boolean isSize = false; // 文字大小 boolean isColor = false; // 文字颜色 boolean isCenter = false; // 居中对齐 boolean isRight = false; // 靠右对齐 boolean isItalic = false; // 斜体 boolean isUnderline = false; // 下划线 boolean isBold = false; // 加粗 boolean isRegion = false; // 在那个区域中 int pic_ndex = 1; // docx中的图片名从image1开始,所以索引从1开始 int event_type = xmlParser.getEventType(); while (event_type != XmlPullParser.END_DOCUMENT) { switch (event_type) { case XmlPullParser.START_TAG: // 开始标签 String tagBegin = xmlParser.getName(); if (tagBegin.equalsIgnoreCase("r")) { isRegion = true; } if (tagBegin.equalsIgnoreCase("jc")) { // 判断对齐方式 String align = xmlParser.getAttributeValue(0); if (align.equals("center")) { output.write(centerBegin.getBytes()); isCenter = true; } if (align.equals("right")) { output.write(divRight.getBytes()); isRight = true; } } if (tagBegin.equalsIgnoreCase("color")) { // 判断文字颜色 String color = xmlParser.getAttributeValue(0); output.write(String.format(spanColor, color).getBytes()); isColor = true; } if (tagBegin.equalsIgnoreCase("sz")) { // 判断文字大小 if (isRegion == true) { int size = getSize(Integer.valueOf(xmlParser.getAttributeValue(0))); output.write(String.format(fontSizeTag, size).getBytes()); isSize = true; } } if (tagBegin.equalsIgnoreCase("tbl")) { // 检测到表格 output.write(tableBegin.getBytes()); isTable = true; } else if (tagBegin.equalsIgnoreCase("tr")) { // 表格行 output.write(rowBegin.getBytes()); } else if (tagBegin.equalsIgnoreCase("tc")) { // 表格列 output.write(columnBegin.getBytes()); } if (tagBegin.equalsIgnoreCase("pic")) { // 检测到图片 ZipEntry pic_entry = FileUtil.getPicEntry(docxFile, pic_ndex); if (pic_entry != null) { byte[] pictureBytes = FileUtil.getPictureBytes(docxFile, pic_entry); writeDocumentPicture(pictureBytes); } pic_ndex++; // 转换一张后,索引+1 } if (tagBegin.equalsIgnoreCase("p") && !isTable) {// 检测到段落,如果在表格中就无视 output.write(lineBegin.getBytes()); } if (tagBegin.equalsIgnoreCase("b")) { // 检测到加粗 isBold = true; } if (tagBegin.equalsIgnoreCase("u")) { // 检测到下划线 isUnderline = true; } if (tagBegin.equalsIgnoreCase("i")) { // 检测到斜体 isItalic = true; } // 检测到文本 if (tagBegin.equalsIgnoreCase("t")) { if (isBold == true) { // 加粗 output.write(boldBegin.getBytes()); } if (isUnderline == true) { // 检测到下划线,输入<u> output.write(underlineBegin.getBytes()); } if (isItalic == true) { // 检测到斜体,输入<i> output.write(italicBegin.getBytes()); } String text = xmlParser.nextText(); output.write(text.getBytes()); // 写入文本 if (isItalic == true) { // 输入斜体结束标签</i> output.write(italicEnd.getBytes()); isItalic = false; } if (isUnderline == true) { // 输入下划线结束标签</u> output.write(underlineEnd.getBytes()); isUnderline = false; } if (isBold == true) { // 输入加粗结束标签</b> output.write(boldEnd.getBytes()); isBold = false; } if (isSize == true) { // 输入字体结束标签</font> output.write(fontEnd.getBytes()); isSize = false; } if (isColor == true) { // 输入跨度结束标签</span> output.write(spanEnd.getBytes()); isColor = false; } // if (isCenter == true) { // 输入居中结束标签</center>。要在段落结束之前再输入该标签,因为该标签会强制换行 // output.write(centerEnd.getBytes()); // isCenter = false; // } if (isRight == true) { // 输入区块结束标签</div> output.write(divEnd.getBytes()); isRight = false; } } break; // 结束标签 case XmlPullParser.END_TAG: String tagEnd = xmlParser.getName(); if (tagEnd.equalsIgnoreCase("tbl")) { // 输入表格结束标签</table> output.write(tableEnd.getBytes()); isTable = false; } if (tagEnd.equalsIgnoreCase("tr")) { // 输入表格行结束标签</tr> output.write(rowEnd.getBytes()); } if (tagEnd.equalsIgnoreCase("tc")) { // 输入表格列结束标签</td> output.write(columnEnd.getBytes()); } if (tagEnd.equalsIgnoreCase("p")) { // 输入段落结束标签</p>,如果在表格中就无视 if (isTable == false) { if (isCenter == true) { // 输入居中结束标签</center> output.write(centerEnd.getBytes()); isCenter = false; } output.write(lineEnd.getBytes()); } } if (tagEnd.equalsIgnoreCase("r")) { isRegion = false; } break; default: break; } event_type = xmlParser.next(); } } catch (Exception e) { e.printStackTrace(); } } private int getSize(int sizeType) { if (sizeType >= 1 && sizeType <= 8) { return 1; } else if (sizeType >= 9 && sizeType <= 11) { return 2; } else if (sizeType >= 12 && sizeType <= 14) { return 3; } else if (sizeType >= 15 && sizeType <= 19) { return 4; } else if (sizeType >= 20 && sizeType <= 29) { return 5; } else if (sizeType >= 30 && sizeType <= 39) { return 6; } else if (sizeType >= 40) { return 7; } else { return 3; } } private String getColor(int colorType) { if (colorType == 1) { return "#000000"; } else if (colorType == 2) { return "#0000FF"; } else if (colorType == 3 || colorType == 4) { return "#00FF00"; } else if (colorType == 5 || colorType == 6) { return "#FF0000"; } else if (colorType == 7) { return "#FFFF00"; } else if (colorType == 8) { return "#FFFFFF"; } else if (colorType == 9 || colorType == 15) { return "#CCCCCC"; } else if (colorType == 10 || colorType == 11) { return "#00FF00"; } else if (colorType == 12 || colorType == 16) { return "#080808"; } else if (colorType == 13 || colorType == 14) { return "#FFFF00"; } else { return "#000000"; } } public void writeDocumentPicture(byte[] pictureBytes) { picturePath = FileUtil.createFile("html", FileUtil.getFileName(docPath) + presentPicture + ".jpg"); FileUtil.writePicture(picturePath, pictureBytes); presentPicture++; String imageString = String.format(imgBegin, picturePath); try { output.write(imageString.getBytes()); } catch (Exception e) { e.printStackTrace(); } } public void writeParagraphContent(Paragraph paragraph) { Paragraph p = paragraph; int pnumCharacterRuns = p.numCharacterRuns(); for (int j = 0; j < pnumCharacterRuns; j++) { CharacterRun run = p.getCharacterRun(j); if (run.getPicOffset() == 0 || run.getPicOffset() >= 1000) { if (presentPicture < pictures.size()) { writeDocumentPicture(pictures.get(presentPicture).getContent()); } } else { try { String text = run.text(); if (text.length() >= 2 && pnumCharacterRuns < 2) { output.write(text.getBytes()); } else { String fontSizeBegin = String.format(fontSizeTag, getSize(run.getFontSize())); String fontColorBegin = String.format(fontColorTag, getColor(run.getColor())); output.write(fontSizeBegin.getBytes()); output.write(fontColorBegin.getBytes()); if (run.isBold()) { output.write(boldBegin.getBytes()); } if (run.isItalic()) { output.write(italicBegin.getBytes()); } output.write(text.getBytes()); if (run.isBold()) { output.write(boldEnd.getBytes()); } if (run.isItalic()) { output.write(italicEnd.getBytes()); } output.write(fontEnd.getBytes()); output.write(fontEnd.getBytes()); } } catch (Exception e) { e.printStackTrace(); } } } } }
原文:http://blog.csdn.net/aqi00/article/details/69942521