java实现word文件转html文件-eolink官网

java实现word文件转html文件

最近在项目开发中用户提出要在电脑上没有装office时在浏览器中打开word文件，最后确定的逻辑：用户选择想要查看的文件，页面js判断文件是否为word。不是执行下载，是后端根据word文件后缀访问对应转换方法。文件已存在对应html文件直接返回html文件地址，不存在先生成对应html文件再返回地址。js直接通过open()打开新的页签，展示word文件内容。新人一枚，如果代码中存在错误或有更好的实现万望指正！

相关jar包

代码

import java.io.ByteArrayOutputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.OutputStream;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.OutputKeys;

import javax.xml.transform.Transformer;

import javax.xml.transform.TransformerException;

import javax.xml.transform.TransformerFactory;

import javax.xml.transform.dom.DOMSource;

import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.converter.PicturesManager;

import org.apache.poi.hwpf.converter.WordToHtmlConverter;

import org.apache.poi.hwpf.usermodel.PictureType;

import org.apache.poi.xwpf.converter.core.BasicURIResolver;

import org.apache.poi.xwpf.converter.core.FileImageExtractor;

import org.apache.poi.xwpf.converter.core.FileURIResolver;

import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;

import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;

import org.apache.poi.xwpf.usermodel.XWPFDocument;

import org.w3c.dom.Document;

/**

* word 转换成html 2017-2-27

public class WordToHtml {

/**

* 将word2003转换为html文件 2017-2-27

* @param wordPath word文件路径

* @param wordName word文件名称无后缀

* @param suffix word文件后缀

* @throws IOException

* @throws TransformerException

* @throws ParserConfigurationException

public String Word2003ToHtml(String wordPath,String wordName,String suffix) throws IOException, TransformerException, ParserConfigurationException {

String htmlPath = wordPath + File.separator + wordName + "_show" + File.separator;

String htmlName = wordName + ".html";

final String imagePath = htmlPath + "image" + File.separator;

//判断html文件是否存在

File htmlFile = new File(htmlPath + htmlName);

if(htmlFile.exists()){

return htmlFile.getAbsolutePath();

}

//原word文档

final String file = wordPath + File.separator + wordName + suffix;

InputStream input = new FileInputStream(new File(file));

HWPFDocument wordDocument = new HWPFDocument(input);

WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());

//设置图片存放的位置

http:// wordToHtmlConverter.setPicturesManager(new PicturesManager() {

public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {

File imgPath = new File(imagePath);

if(!imgPath.exists()){http:////图片目录不存在则创建

imgPath.mkdirs();

}

File file = new File(imagePath + suggestedName);

try {

OutputStream os = new FileOutputStream(file);

os.write(content);

os.close();

} catch (FileNotFoundException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

}

//图片在html文件上的路径相对路径

return "image/" + suggestedName;

}

});

//解析word文档

wordToHtmlConverter.processDocument(wordDocument);

Document htmlDocument = wordToHtmlConverter.getDocument();

//生成html文件上级文件夹

File folder = new File(htmlPath);

if(!folder.exists()){

folder.mkdirs();

}

//生成html文件地址

OutputStream outStream = new FileOutputStream(htmlFile);

DOMSource domSource = new DOMSource(htmlDocument);

StreamResult streamResult = new StreamResult(outStream);

TransformerFactory factory = TransformerFactory.newInstance();

Transformer serializer = factory.newTransformer();

serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");

serializer.setOutputProperty(OutputKeys.INDENT, "yes");

serializer.setOutputProperty(OutputKeys.METHOD, "html");

serializer.transform(domSource, streamResult);

outStream.close();

return htmlFile.getAbsolutePath();

}

/**

* 2007版本word转换成html 2017-2-27

* @param wordPath word文件路径

* @param wordName word文件名称无后缀

* @param suffix word文件后缀

* @return

* @throws IOException

public String Word2007ToHtml(String wordPath,String wordName,String suffix) throws IOException {

String htmlPath = wordPath + File.separator + wordName + "_show" + File.separator;

String htmlName = wordName + ".html";

String imagePath = htmlPath + "image" + File.separator;

//判断html文件是否存在

File htmlFile = new File(htmlPath + htmlName);

if(htmlFile.exists()){

return htmlFile.getAbsolutePath();

}

//word文件

File wordFile = new File(wordPath + File.separator + wordName + suffix);

// 1) 加载word文档生成 XWPFDocument对象

InputStream in = new FileInputStream(wordFile);

XWPFDocument document = new XWPFDocument(in);

// 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)

File imgFolder = new File(imagePath);

XHTMLOptions options = XHTMLOptions.create();

options.setExtractor(new FileImageExtractor(imgFolder));

//html中图片的路径相对路径

options.URIResolver(new BasicURIResolver("image"));

options.setIgnoreStylesIfUnused(false);

options.setFragment(true);

// 3) 将 XWPFDocument转换成XHTML

//生成html文件上级文件夹

File folder = new File(htmlPath);

if(!folder.exists()){

folder.mkdirs();

}

OutputStream out = new FileOutputStream(htmlFile);

XHTMLConverter.getInstance().convert(document, out, options);

return htmlFile.getAbsolutePath();

}

文件目录：

Flask接口签名sign原理与实例代码浅析

412 2023-05-31

java实现word文件转html文件

多平台统一管理软件接口，如何实现多平台统一管理软件接口

Flask接口签名sign原理与实例代码浅析

java中的接口是类吗

推荐文章

接口调用是什么意思？几种常用接口调用方式

接口设计原则

8款在线 API 接口文档管理工具

api管理系统是什么？

什么是接口调试？接口调试的步骤有哪些？

api 接口管理系统有哪些？

接口测试有几种测试方法

API文档生成工具有哪些？

微服务和api网关区别

交换机配置步骤

最近发表

热评文章

在线接口文档管理工具推荐，支持在线测试，HTTP接口

开源的在线接口文档wiki工具Mindoc的介绍与使

如何优雅的进行接口设计？接口设计的六大原则是什么？

什么是API测试,api检测公司

遇到百度网址安全中心提醒您该页面可能存在钓鱼欺诈信息

软件接口设计怎么做？前后端分离软件接口设计思路

java实现word文件转html文件

微信扫一扫：分享

推荐文章

最近发表

热评文章