Java实现Word/Pdf/TXT转html的示例

网友投稿 539 2022-11-14


Java实现Word/Pdf/TXT转html的示例

引言:

最近公司在做一个教育培训学习及在线考试的项目,本人主要从事网络课程模块,主要做课程分类,课程,课件的创建及在线学习和统计的功能,因为课件涉及到多种类型,像视频,音频,图文,外部链接及文档类型.其中就涉及到一个问题,就是文档型课件课程在网页上的展示和学习问题,因为要在线统计学习的课程,学习的人员,学习的时长,所以不能像传统做法将文档下载到本地学习,那样就不受系统控制了,所以最终的方案是,在上传文档型课件的时候,将其文件对应的转换成HTML文件,以便在网页上能够浏览学习

下边主要针对word,pdf和txt文本文件进行转换

一:java实现将word转换为html

1:引入依赖

fr.opensagres.xdocreport

fr.opensagres.xdocreport.document

1.0.5

fr.opensagres.xdocreport

org.apache.poi.xwpf.converter.xhtml

1.0.5

org.apache.poi

poi

3.12

org.apache.poi

poi-scratchpad

3.12

2:代码demo

package com.svse.controller;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.OutputKeys;

import javax.xml.transform.Transformer;

import javax.xml.transform.TransformerException;

import javax.xml.transform.TransformerFactory;

import javax.xml.transform.dom.DOMSource;

import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.converter.PicturesManager;

import org.apache.poi.hwpf.converter.WordToHtmlConverter;

import org.apache.poi.hwpf.usermodel.PictureType;

import org.apache.poi.xwpf.converter.core.BasicURIResolver;

import org.apache.poi.xwpf.converter.core.FileImageExtractor;

import org.apache.poi.xwpf.converter.core.FileURIResolver;

import org.apache.poi.xwpf.converter.core.IURIResolver;

import org.apache.poi.xwpf.converter.core.IXWPFConverter;

import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;

import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;

import org.apache.poi.xwpf.usermodel.XWPFDocument;

/**

* word 转换成html

*/

public class TestWordToHtml {

public static final String STORAGEPATH="C://works//files//";

public static final String IP="192.168.30.222";

pubtdLVOkaDHlic static final String PORT="8010";

public static void main(String[] args) throws IOException, TransformerException, ParserConfigurationException {

TestWordToHtml wt=new TestWordToHtml();

//wt.Word2003ToHtml("甲骨文考证.doc");

wt.Word2007ToHtml("甲骨文考证.docx");

}

/**

* 2003版本word转换成html

* @throws IOException

* @throws TransformerException

* @throws ParserConfigurationException

*/

public void Word2003ToHtml(String fileName) throws IOException, TransformerException, ParserConfigurationException {

final String imagepath = STORAGEPATH+"fileImage/";//解析时候如果doc文件中有图片 图片会保存在此路径

final String strRanString=getRandomNum();

String filepath =STORAGEPATH;

String htmlName =fileName.substring(0, fileName.indexOf("."))+ "2003.html";

final String file = filepath + fileName;

InputStream input = new FileInputStream(new File(file));

HWPFDocument wordDocument = new HWPFDocument(input);

WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());

//设置图片存放的位置

wordToHtmlConverter.setPicturesManager(new PicturesManager() {

public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {

File imgPath = new File(imagepath);

if(!imgPath.exists()){//图片目录不存在则创建

imgPath.mkdirs();

}

File file = new File(imagepath +strRanString+suggestedName);

try {

OutputStream os = new FileOutputStream(file);

os.write(content);

os.close();

} catch (FileNotFoundException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

}

return "http://"+IP+":"+PORT+"//uploadFile/fileImage/"+strRanString+suggestedName;

// return imagepath +strRanString+suggestedName;

}

});

//解析word文档

wordToHtmlConverter.processDocument(wordDocument);

Document htmlDocument = wordToHtmlConverter.getDocument();

File htmlFile = new File(filepath +strRanString+htmlName);

OutputStream outStream = new FileOutputStream(htmlFile);

DOMSource domSource = new DOMSource(htmlDocument);

StreamResult streamResult = new StreamResult(outStream);

TransformerFactory factory = TransformerFactory.newInstance();

Transformer serializer = factory.newTransformer();

serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");

serializer.setOutputProperty(OutputKeys.INDENT, "yes");

serializer.setOutputProperty(OutputKeys.METHOD, "html");

serializer.transform(domSource, streamResult);

outStream.close();

System.out.println("生成html文件路径:"+ "http://"+IP+":"+PORT+"//uploadFile/"+strRanString+htmlName);

}

/**

* 2007版本word转换成html

* @throws IOException

*/

public void Word2007ToHtml(String fileName) throws IOException {

final String strRanString=getRandomNum();

String filepath = STORAGEPATH+strRanString;

String htmlName =fileName.substring(0, fileName.indexOf("."))+ "2007.html";

File f = new File(STORAGEPATH+fileName);

if (!f.exists()) {

System.out.println("Sorry File does not Exists!");

} else {

if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {

try {

// 1) 加载word文档生成 XWPFDocument对象

InputStream in = new FileInputStream(f);

XWPFDocument document = new XWPFDocument(in);

// 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)

File imageFolderFile = new File(filepath);

XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(ihttp://mageFolderFile));

options.setExtractor(new FileImageExtractor(imageFolderFile));

options.URIResolver(new IURIResolver() {

public String resolve(String uri) {

//http://192.168.30.222:8010//uploadFile/....

return "http://"+IP+":"+PORT+"//uploadFile/"+strRanString +"/"+ uri;

}

});

options.setIgnoreStylesIfUnused(false);

options.setFragment(true);

// 3) 将 XWPFDocument转换成XHTML

OutputStream out = new FileOutputStream(new File(filepath + htmlName));

IXWPFConverter converter = XHTMLConverter.getInstance();

converter.convert(document,out, options);

//XHTMLConverter.getInstance().convert(document, out, options);

System.out.println("html路径:"+"http://"+IP+":"+PORT+"//uploadFile/"+strRanString+htmlName);

} catch (Exception e) {

e.printStackTrace();

}

} else {

System.out.println("Enter only MS Office 2007+ files");

}

}

}

/**

*功能说明:生成时间戳

*创建人:zsq

*创建时间:2019年12月7日 下午2:37:09

*

*/

public static String getRandomNum(){

Date dt = new Date();

SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");

String str=sdf.format(dt);

return str;

}

}

二:Java实现将Pdf转换为html

1: 引入依赖

net.sf.cssbox

pdf2dom

1.7

org.apache.pdfbox

pdfbox

2.0.12

org.apache.pdfbox

pdfbox-tools

2.0.12

2:代码Demo

public class PdfToHtml {

/*

pdf转换html

*/

public void pdfToHtmlTest(String inPdfPath,String outputHtmlPath) {

// String outputPath = "C:\\works\\files\\ZSQ保密知识测试题库.html";

//try() 写在()里面会自动关闭流

try{

BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(outputHtmlPath)),"utf-8"));

//加载PDF文档

//PDDocument document = PDDocument.load(bytes);

PDDocument document = PDDocument.load(new File(inPdfPath));

PDFDomTree pdfDomTree = new PDFDomTree();

pdfDomTree.writeText(document,out);

} catch (Exception e) {

e.printStackTrace();

}

}

public static void main(String[] args) throws IOException {

PdfToHtml ph=new PdfToHtml();

String pdfPath="C:\\works\\files\\武研中心行政考勤制度.pdf";

String outputPath="C:\\works\\files\\武研中心行政考勤制度.html";

ph.pdfToHtmlTest(pdfPath,outputPath);

}

}

三:Java实现将TXT转换为html

以上就是Java实现Word/Pdf/TXT转html的示例的详细内容,更多关于Java Word/Pdf/TXT转html的资料请关注我们其它相关文章!


版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:SpringBoot使用 druid 连接池来优化分页语句
下一篇:SpringBoot使用@ResponseBody返回图片的实现
相关文章

 发表评论

暂时没有评论,来抢沙发吧~