Flask接口签名sign原理与实例代码浅析
666
2023-01-01
利用openoffice+jodconverter
本文实例为大家分享了openoffice+jodconverter-code-3.0-bate4实现ppt转图片的具体代码,供大家参考,具体内容如下
安装openoffice4 (用于把文档(ppt)转成pdf)根据系统的位数安装
使用jodconverter-core3.0-beta-4(要上传maven本地仓库)
安装ImageMagick:yum install ImageMagick(用于pdf转图片)
安装pdftotext 用于提取文字大纲 yum install poppler-utils
perl脚本(用于提取pdf文档的文字大纲)
使用jodconverter调用OpenOffice 将office文档转换为PDF时。如果转换程序异常中止而OpenOffice并没有停止运行的话。
openoffice
1、启动tomcat时,启动openoffice服务(个人感觉有风险问题)
2、手工用命令,启动openoffice服务,在使用链接服务(推荐)
package com.document.servers.impl;
import java.io.File;
import java.net.ConnectException;
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import org.artofsolving.jodconverter.OfficeDocumentConverter;
import org.artofsolving.jodconverter.office.DefaultOfficeManagerConfiguration;
import org.artofsolving.jodconverter.office.ExternalOfficeManagerConfiguration;
import org.artofsolving.jodconverter.office.OfficeConnectionProtocol;
import org.artofsolving.jodconverter.office.OfficeManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
//import com.artofsolving.jodconverter.DefaultDocumentFormatRegistry;
//import com.artofsolving.jodconverter.DocumentConverter;
//import com.artofsolving.jodconverter.DocumentFamily;
//import com.artofsolving.jodconverter.DocumentFormat;
//import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
//import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
//import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;
import com.document.servers.OfficeService;
/**
* linux下:
* cd /opt/openoffice4/program
* ./soffice "-accept=socket,host=localhost,port=8100;urp;StarOffice.ServiceManager" -nologo -headless -nofirststartwizard &
*/
@Service("officeService")
public class OfficeServiceImpl http://implements OfficeService {
private static final Logger logger = LoggerFactory.getLogger(OfficeServiceImpl.class);
private OfficeManager officeManager;
private OfficeDocumentConverter documentConverter;
// @PostConstruct
// public void init() throws Exception {
// // TODO Auto-generated method stub
// officeManager = new DefaultOfficeManagerConfiguration().setOfficeHome("/opt/openoffice4").buildOfficeManager();
//
// documentConverter = new OfficeDocumentConverter(officeManager);
// // officeManager.stop();
//
// logger.warn("openoffice starting....");
// try {
// officeManager.start();
// logger.warn("openoffice started");
// } catch (Excepthttp://ion e) {
// logger.error("office start failed:{}", e);
// }
// }
//
// @PreDestroy
// public void destroy() throws Exception {
// // TODO Auto-generated method stub
// logger.info("shutdown office service....");
// if (officeManager != null) {
// try {
//
// officeManager.stop();
// logger.info("office closed");
// } catch (Exception e) {
// logger.error("office close failed:{}", e);
// }
// }
// }
// public void convert(String inputfilename, String outputfilename) {
// logger.info("convert...." + inputfilename + " to " + outputfilename);
// documentConverter.convert(new File(inputfilename), new File(outputfilename));
// }
public void manualConvert(String inputfilename, String outputfilename) {
logger.info("convert...." + inputfilename + " to " + outputfilename);
// connect to an OpenOffice.org instance running on port 8100
ExternalOfficeManagerConfiguration externalProcessOfficeManager = new
ExternalOfficeManagerConfiguration();
externalProcessOfficeManager.setConnectOnStart(true);
externalProcessOfficeManager.setPortNumber(8100);
officeManager = externalProcessOfficeManager.buildOfficeManager();
officeManager.start();
logger.info("openoffice服务已链接");
documentConverter = new OfficeDocumentConverter(officeManager);
documentConverter.convert(new File(inputfilename), new File(outputfilename));
}
}
转换处理方法
package com.document.servers.impl;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.document.defined.model.ImagePPT;
import com.document.servers.OfficeService;
import com.document.servers.PPTConvertServers;
import com.document.tool.ImageMagickUtils;
import com.document.tool.SystemConfig;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.ppt.util.Command;
@Service("pPTConvertServers")
public class PPTConvertServersImpl implements PPTConvertServers {
private static final Logger logger = LoggerFactory.getLogger(PPTConvertServersImpl.class);
@Autowired
private OfficeService officeService;
/**
* (non-Javadoc)
* 利用openoffice把ppt转图片
*/
public Map
throws Exception {
logger.info("ppt转pdf,{}");
// ppt文件地址
String ppt_target_file = filePath;
// pdf文件地址
String path = filePath.substring(0, filePath.lastIndexOf("."));
String pdf_target_file = path + ".pdf";
// 输出jpg文件地址
String images_target_file = path + "/jpg" + "-%d.jpg";
// if(exists(pdf_target_file)){
// unlink(pdf_target_file);//删除
// }
// copy(new File(ppt_target_file), ppt_target_file, true);
if (!extension.equals(".pdf")) {
officeService.manualConvert(ppt_target_file, pdf_target_file);// 转成pdf文件
}
StringWriter writer = new StringWriter();
// 提取文字大纲
String[] pdf_lines = extractOutLineFromPDF(pdf_target_file);
File filepath = new File(images_target_file);
File parentFile = filepath.getParentFile();
if (!parentFile.exists()) {
logger.info("创建图片目录");
parentFile.mkdirs();
}
Command.run("convert " + pdf_target_file + " " + images_target_file, writer);// 转成图片
String basePath = request.getScheme() + "://" + request.getServerName() + "/";
PDDocument document = PDDocument.load(new File(pdf_target_file));
int pageCount = document.getNumberOfPages();
document.close();
List
String pathUrl = filename.substring(0, filename.lastIndexOf("."));
if (pageCount > 0) {
for (int i = 0; i < pageCount; i++) {
ImagePPT imagePPT = new ImagePPT();
imagePPT.setId(i + 1);
if (pdf_lines.length > 0) {
try {
imagePPT.setTitle(pdf_lines[i]);
} catch (Exception e) {
// TODO Auto-generated catch block
imagePPT.setTitle(title);
logger.info("title,数组越界");
//e.printStackTrace();
}
} else {
imagePPT.setTitle(title);
}
imagePPT.setUrl(basePath + "images/" + pathUrl + "/jpg-" + i + ".jpg");
imagePPT.setPreviewUrl(basePath + "preview/images/" + pathUrl + "/preview/pjpg-" + i + ".jpg");
// String oimgDir = SystemConfig.getBlobDirectory() + pathUrl + "/jpg-" + i + ".jpg";
// String pimgDir = SystemConfig.getBlobDirectory() + pathUrl + "/preview/pjpg-" + i + ".jpg";
// File pfilepath = new File(pimgDir);
// File pf = pfilepath.getParentFile();
// if (!pf.exists()) {
// pf.mkdirs();
// }
//ImageMagickUtils.scale(oimgDir, pimgDir, 240, 180);//预览图
list.add(imagePPT);
}
}
// 拼接json字符串
ObjectMapper objectMapper = new ObjectMapper();
String jsonlist = objectMapper.writeValueAsString(list);
// logger.info(jsonlist);
Map
map.put("json", jsonlist.toString());
map.put("totalPage", pageCount);
return map;
}
/**
* 文件已经上传过 ,进行替换性转换
*/
public Map
String filename) throws Exception {
logger.info("替换,ppt转pdf,{}");
// ppt文件地址
String ppt_target_file = filePath;
// pdf文件地址
String path = filePath.substring(0, filePath.lastIndexOf("."));
String pdf_target_file = path + ".pdf";
// 输出jpg文件地址
String images_target_file = path + "/jpg" + "-%d.jpg";
if (!extension.equals(".pdf")) {
officeService.manualConvert(ppt_target_file, pdf_target_file);// 转成pdf文件
}
StringWriter writer = new StringWriter();
// 提取文字大纲
String[] pdf_lines = extractOutLineFromPDF(pdf_target_file);
File filepath = new File(images_target_file);
File parentFile = filepath.getParentFile();
if (!parentFile.exists()) {
logger.info("替换创建图片目录");
parentFile.mkdirs();
}
Command.run("convert " + pdf_target_file + " " + images_target_file, writer);// 转成图片
String basePath = request.getScheme() + "://" + request.getServerName() + "/";
PDDocument document = PDDocument.load(new File(pdf_target_file));
int pageCount = document.getNumberOfPages();
document.close();
List
String pathUrl = filename.substring(0, filename.lastIndexOf("."));
if (pageCount > 0) {
for (int i = 0; i < pageCount; i++) {
ImagePPT imagePPT = new ImagePPT();
imagePPT.setId(i + 1);
if (pdf_lines.length > 0) {
try {
imagePPT.setTitle(pdf_lines[i]);
} catch (Exception e) {
// TODO Auto-generated catch block
imagePPT.setTitle(title);
logger.info("title,数组越界");
// e.printStackTrace();
}
} else {
imagePPT.setTitle(title);
}
imagePPT.setUrl(basePath + "images/" + pathUrl + "/jpg-" + i + ".jpg");
imagePPT.setPreviewUrl(basePath + "preview/images/" + pathUrl + "/preview/pjpg-" + i + ".jpg");
// String oimgDir = SystemConfig.getBlobDirectory() + pathUrl + "/jpg-" + i + ".jpg";
// String pimgDir = SystemConfig.getBlobDirectory() + pathUrl + "/preview/pjpg-" + i + ".jpg";
// File pfilepath = new File(pimgDir);
// File pf = pfilepath.getParentFile();
// if (!pf.exists()) {
// pf.mkdirs();
// }
// ImageMagickUtils.scale(oimgDir, pimgDir, 240, 180);
list.add(imagePPT);
}
}
// 拼接json字符串
ObjectMapper objectMapper = new ObjectMapper();
String jsonlist = objectMapper.writeValueAsString(list);
// logger.info(jsonlist);
Map
map.put("json", jsonlist.toString());
map.put("totalPage", pageCount);
return map;
}
/**
* 提取pdf文字大纲
* @param pdf_file
* @return
* @throws UnsupportedEncodingException
* @throws Exception
*/
public static String[] extractOutLineFromPDF(String pdf_file) throws UnsupportedEncodingException {
String svndir = PPTConvertServersImpl.class.getResource("").getPath();
svndir = svndir.split("WEB-INF")[0];
svndir = svndir.replaceFirst("file:", "");
logger.info(svndir);
String command = "/usr/bin/perl " + svndir + "WEB-INF/sh/pdf_outline.pl " + pdf_file;
logger.info(command);
ByteArrayOutputStream writer = new ByteArrayOutputStream();
Command.run2(command, writer);
String outline = writer.toString("utf-8");
logger.info("title pdf,{}", outline);
String[] items = outline.split("///");
return items;
}
/**
* 文件是否存在
*
* @param filename
* @return @throws IOException
*/
public static boolean exists(String filename) {
try {
File file = new File(filename);
return file.exists();
} catch (Exception e) {
return false;
}
}
/**
* 删除文件
*
* @param filename
* @return
*/
public static boolean unlink(String filename) {
try {
File file = new File(filename);
if (file.isFile()) {
file.delete();
return true;
}
http://return false;
} catch (Exception e) {
return false;
}
}
/**
* 拷贝文件
*
* @param file
* @param newname
* @param overwrite
* @return
*/
public static boolean copy(File file, String newname, boolean overwrite) {
try {
if (!overwrite && new File(newname).exists()) {
return false;
}
FileInputStream input = new FileInputStream(file);
File dest = new File(newname);
if (!mkdir(dest.getParent())) {
return false;
}
FileOutputStream output = new FileOutputStream(newname);
byte[] b = new byte[1024 * 5];
int len;
while ((len = input.read(b)) != -1) {
output.write(b, 0, len);
}
output.flush();
output.close();
input.close();
return true;
} catch (Exception e) {
e.printStackTrace();
return false;
}
}
/**
* 创建目录
*
* @param dirname
* @return
*/
public static boolean mkdir(String dir) {
try {
File file = new File(dir);
if (!file.exists()) {
file.mkdirs();
}
return true;
} catch (Exception e) {
e.printStackTrace();
return false;
}
}
}
上传ppt文件处理类:
package com.document.handle.controller;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.Date;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.ModelAndView;
import com.document.servers.PPTConvertServers;
import com.document.tool.FilenameUtils;
import com.document.tool.SystemConfig;
import com.fasterxml.jackson.annotation.PropertyAccessor;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectWriter;
@Controller
public class PptToImageController {
private static final Logger logger = LoggerFactory.getLogger(PptToImageController.class);
private static final String TYPE_BLOB = "BLOB";
private static final String CALLBACK = "callback"; // 回调函数的参数名
@Autowired
private PPTConvertServers pPTConvertServers;
@RequestMapping(value = "/convert/upload")
public ModelAndView updateFile(HttpServletRequest request, HttpServletResponse response) {
ModelAndView mav = new ModelAndView();
mav.http://addObject("name", "Hello Word");
mav.setViewName("/ppt/uploadFile");
logger.info("/convert/upload");
return mav;
}
/**
* 显示上传文件的页面表单。
*/
@SuppressWarnings("unchecked")
private ModelAndView showUploadForm(HttpServletRequest request, String type) {
// 所有请求参数
Map
Enumeration
while (paramNames.hasMoreElements()) {
String name = paramNames.nextElement();
String value = request.getParameter(name);
if (null != value) {
params.put(name, value);
}
}
ModelAndView mav = new ModelAndView();
mav.setViewName("/upload/" + type.toLowerCase());
mav.addObject("parameters", params);
return mav;
}
/**
* 保存用户上传的文件。
* @throws UnsupportedEncodingException
*/
private Map
// 文件内容MD5串,避免文件重复上传
String md5 = null;
try {
md5 = DigestUtils.md5Hex(file.getBytes());
logger.info("文件内容MD5串,{}", md5);
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
String originalFilename = file.getOriginalFilename();
String extension = FilenameUtils.getExtension(originalFilename); // 文件扩展名
String filename = null;
if (md5 != null) {
filename = FilenameUtils.generateFileNameMd5(extension, md5);
} else {
filename = FilenameUtils.generateFileName(extension);
}
String filenameUrl = null; // 文件访问的URL
String absoluteFilename = null; // 文件存储的绝对路径
filenameUrl = SystemConfig.getBlobUrl() + filename;
absoluteFilename = SystemConfig.getBlobDirectory() + filename;
// 检查是否需要创建目录
File filepath = new File(absoluteFilename);
File parentFile = filepath.getParentFile();
if (!parentFile.exists()) {
parentFile.mkdirs();
}
Map
// 所有请求参数
Enumeration
while (paramNames.hasMoreElements()) {
String name = paramNames.nextElement();
String value = request.getParameter(name);
if (null != value) {
params.put(name, value);
}
}
String pdftitle = originalFilename.substring(0, originalFilename.lastIndexOf("."));
params.put("title", pdftitle);
Map
if (filepath.exists()) {
// 文件已上传过,文件进行替换
try {
officeMap = pPTConvertServers.replace_ppt(request, absoluteFilename, extension, pdftitle, filename);
params.put("totalPage", officeMap.get("totalPage"));
params.put("data", officeMap.get("json"));
params.put("status", "success");
} catch (Exception e) {
// TODO Auto-generated catch block
logger.info("把ppt文件转pdf失败,{}", e);
params.put("status", "fail");
params.put("data", "把ppt文件转pdf失败");
params.put("totalPage", 0);
e.printStackTrace();
}
return params;
}
// 保存文件
BufferedOutputStream bos = null;
try {
byte[] fileBytes = file.getBytes();
bos = new BufferedOutputStream(new FileOutputStream(filepath));
bos.write(fileBytes);
} catch (IOException e) {
logger.error("保存'" + originalFilename + "'时发生异常,Cause: ", e);
} finally {
if (null != bos) {
try {
bos.close();
} catch (IOException e) {
}
}
}
// params.put("url", filenameUrl);
// params.put("originalFilename", originalFilename);
// params.put("filesize", file.getSize());
// 把ppt文件转pdf,pdf转图片
try {
officeMap = pPTConvertServers.deal_ppt(request, absoluteFilename, extension, pdftitle, filename);
params.put("totalPage", officeMap.get("totalPage"));
params.put("data", officeMap.get("json"));
params.put("status", "success");
} catch (Exception e) {
// TODO Auto-generated catch block
logger.info("把ppt文件转pdf失败,{}", e);
params.put("status", "fail");
params.put("data", "把ppt文件转pdf失败");
params.put("totalPage", 0);
e.printStackTrace();
}
return params;
}
/**
* 处理文件上传。
* @throws IOException
*
*/
@RequestMapping(value = "/convert/upload", method = RequestMethod.POST,produces = "text/html;charset=UTF-8")
public @ResponseBody String uploadFilePost(HttpServletRequest request,
@RequestParam("file") MultipartFile file) throws IOException {
String callback = request.getParameter(CALLBACK); // 回调函数的函数名
String json = "请上传文件";
Map
ObjectMapper mapper = new ObjectMapper();
mapper.setVisibility(PropertyAccessor.FIELD, Visibility.ANY);
ObjectWriter writer = mapper.writerWithType(Map.class);
if (!file.isEmpty()) {
params = saveUploadedFile(request, file, TYPE_BLOB);
if (params == null) {
params = new HashMap
json = "文件已上传过";
params.put("status", "fail");
params.put("data", json);
json = writer.writeValueAsString(params);
return json.toString();
}
try {
json = writer.writeValueAsString(params);
// json = (String) params.get("data");
} catch (Exception e) {
logger.error("转换Blob上传参数为JSON时发生异常,Cause: ", e);
}
if (StringUtils.isBlank(callback)) {
return json.toString();
} else {
return callback + "(" + json.toString() + ");";
}
}
// 还没上传文件的
params.put("status", "fail");
params.put("data", json);
json = writer.writeValueAsString(params);
return json.toString();
}
}
预览图代理输出-----处理类:
package com.document.handle.controller;
import java.io.File;
import java.io.IOException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
import com.document.tool.ImageMagickUtils;
import com.document.tool.SystemConfig;
@Controller
public class ImageAgentController {
private static final Logger LOG = LoggerFactory.getLogger(ImageAgentController.class);
/**
* ppt预览图片代理输出
* @throws IOException
*/
@RequestMapping("/preview/images/{year}/{month}/{md5id}/{preview}/{filename}.{ext}")
public void cropImage(@PathVariable String year, @PathVariable String month, @PathVariable String md5id,@PathVariable String preview, @PathVariable String filename, @PathVariable String ext, HttpServletRequest request, HttpServletResponse response) throws IOException {
//String rootDir = "/data05/ovp/images/";
String rootDir = SystemConfig.getBlobDirectory();
String oname = filename.substring(1,filename.length());//原图文件名
String dirString = rootDir + year+"/" +month + "/" + md5id + "/"+oname+"." + ext;
String targetFileString = rootDir + year+"/" +month + "/" + md5id + "/preview/" + filename + "." + ext;
LOG.info("corpImage..." + dirString + " -> " +targetFileString );
File newfile = new File(targetFileString);
String pathString = newfile.getParent();
LOG.info("pathString...{} {}" , pathString);
File pathFile = new File(pathString);
if(!pathFile.exists()){
LOG.info("---create file---");
pathFile.mkdirs();
}
boolean status = ImageMagickUtils.scale(dirString, targetFileString, 240, 180);
if(status){
response.reset();
response.setContentType("image/" + ext);
java.io.InputStream in = new java.io.FileInputStream(targetFileString);
//FilenameUrlUtils.getImageFilename(targetFileString);
if ( in != null )
{
byte[] b = new byte[1024];
int len;
while( (len = in.read(b)) != -1 )
{
response.getOutputStream().write(b);
}
in.close();
}
}
}
}
提取文字大纲的perl脚本:
use strict;
use warnings;
use utf8;
use open ':encoding(utf8)';
binmode(STDOUT, ":utf8");
sub trim($)
{
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}
if(!$ARGV[0]){
die;
}
open my $fh, "pdftotext -layout -enc UTF-8 $ARGV[0] - |" or die $!;
my $firstline=<$fh>;
print trim($firstline);
my $pageNum = 1;
while ( my $line = <$fh> ) {
if ( $line =~ /\xC/ ) {
my $count = ($line =~ tr/\xC//);
for(my $i=0;$i<$count-1;$i++){
print "///".$pageNum;
$pageNum++;
}
if(trim($line)){
print "///".trim($line);
}
$pageNum++;
}
}
close $fh;
可能遇到的问题:
1、ppt转pdf时,遇到启动失败(不清楚是不是再次启动引起的)
2、转换后的pdf 表格里的中文会出现乱码
3、有时会出现关闭服务器的所用服务(尚不清楚什么原因引起的)
4、处理请求时,经常出现超时504
版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。
发表评论
暂时没有评论,来抢沙发吧~