SpringBoot word文档转pdf
雨夜归人93 人气:10一.背景
项目中有个需求大体意思是,上传一个word模板,根据word模板合成word文件,再将word文件转为pdf。
二.方案选择
1.Spire.Doc for Java方案
Spire.Doc for Java这个是商用收费的,不过API文档丰富且集成简单,免费版仅支持3页转换。类似的还有ITEXT,这个商用也是受限制的。
2.docx4j方案
开源可商用,仅支持docx格式的word。
3.jodconverter+LibreOffice 方案
开源可商用,调用本地office服务,进行pdf转换,类似的还有jodconverter+openOffice。
4.其他
至于其他的由于不支持跨平台不做考虑。
三.实操
1.docx4j
首先尝试了docx4j,因为docx4j本身支持模板替换的操作,可一次性做替换及文档类型转换,而且仅支持docx类型,对于本次需求问题不大。
1.依赖仅需要一个即可
<dependency> <groupId>org.docx4j</groupId> <artifactId>docx4j-export-fo</artifactId> <version>6.1.0</version> </dependency>
2.主要代码
@Slf4j public class PdfUtil { public static <T> void exportByLocalPath(HttpServletResponse response, String fileName, String path, Map<String,String> params){ try (InputStream in = PdfUtil.class.getClassLoader().getResourceAsStream(path)) { convertDocxToPdf(in, response,fileName,params); } catch (Exception e) { log.error("docx文档转换为PDF失败", e.getMessage()); } } /** * docx文档转换为PDF * @param in * @param response * @return */ public static void convertDocxToPdf(InputStream in, HttpServletResponse response, String fileName, Map<String,String> params) throws Exception { response.setContentType("application/pdf"); String fullFileName = new String(fileName.getBytes(), StandardCharsets.ISO_8859_1); response.setHeader("Content-disposition", "attachment;filename=" + fullFileName + ".pdf"); WordprocessingMLPackage wmlPackage = WordprocessingMLPackage.load(in); if (params!=null&&!params.isEmpty()) { MainDocumentPart documentPart = wmlPackage.getMainDocumentPart(); cleanDocumentPart(documentPart); documentPart.variableReplace(params); } setFontMapper(wmlPackage); Docx4J.toPDF(wmlPackage,response.getOutputStream()); } /** * 清除文档空白占位符 * @param documentPart * @return {@link boolean} */ public static boolean cleanDocumentPart(MainDocumentPart documentPart) throws Exception { if (documentPart == null) { return false; } Document document = documentPart.getContents(); String wmlTemplate = XmlUtils.marshaltoString(document, true, false, Context.jc); document = (Document) XmlUtils.unwrap(DocxVariableClearUtil.doCleanDocumentPart(wmlTemplate, Context.jc)); documentPart.setContents(document); return true; } /** * 设置字体样式 * @param mlPackage */ private static void setFontMapper(WordprocessingMLPackage mlPackage) throws Exception { Mapper fontMapper = new IdentityPlusMapper(); fontMapper.put("隶书", PhysicalFonts.get("LiSu")); fontMapper.put("宋体", PhysicalFonts.get("SimSun")); fontMapper.put("微软雅黑", PhysicalFonts.get("Microsoft Yahei")); fontMapper.put("黑体", PhysicalFonts.get("SimHei")); fontMapper.put("楷体", PhysicalFonts.get("KaiTi")); fontMapper.put("新宋体", PhysicalFonts.get("NSimSun")); fontMapper.put("华文行楷", PhysicalFonts.get("STXingkai")); fontMapper.put("华文仿宋", PhysicalFonts.get("STFangsong")); fontMapper.put("宋体扩展", PhysicalFonts.get("simsun-extB")); fontMapper.put("仿宋", PhysicalFonts.get("FangSong")); fontMapper.put("仿宋_GB2312", PhysicalFonts.get("FangSong_GB2312")); fontMapper.put("幼圆", PhysicalFonts.get("YouYuan")); fontMapper.put("华文宋体", PhysicalFonts.get("STSong")); fontMapper.put("华文中宋", PhysicalFonts.get("STZhongsong")); mlPackage.setFontMapper(fontMapper); } }
清除工具类,用于处理占位符替换不生效的问题,这里参考文章
public class DocxVariableClearUtil { /** * 去任意XML标签 */ private static final Pattern XML_PATTERN = Pattern.compile("<[^>]*>"); private DocxVariableClearUtil() { } /** * start符号 */ private static final char PREFIX = '$'; /** * 中包含 */ private static final char LEFT_BRACE = '{'; /** * 结尾 */ private static final char RIGHT_BRACE = '}'; /** * 未开始 */ private static final int NONE_START = -1; /** * 未开始 */ private static final int NONE_START_INDEX = -1; /** * 开始 */ private static final int PREFIX_STATUS = 1; /** * 左括号 */ private static final int LEFT_BRACE_STATUS = 2; /** * 右括号 */ private static final int RIGHT_BRACE_STATUS = 3; /** * doCleanDocumentPart * * @param wmlTemplate * @param jc * @return * @throws JAXBException */ public static Object doCleanDocumentPart(String wmlTemplate, JAXBContext jc) throws JAXBException { // 进入变量块位置 int curStatus = NONE_START; // 开始位置 int keyStartIndex = NONE_START_INDEX; // 当前位置 int curIndex = 0; char[] textCharacters = wmlTemplate.toCharArray(); StringBuilder documentBuilder = new StringBuilder(textCharacters.length); documentBuilder.append(textCharacters); // 新文档 StringBuilder newDocumentBuilder = new StringBuilder(textCharacters.length); // 最后一次写位置 int lastWriteIndex = 0; for (char c : textCharacters) { switch (c) { case PREFIX: // 不管其何状态直接修改指针,这也意味着变量名称里面不能有PREFIX keyStartIndex = curIndex; curStatus = PREFIX_STATUS; break; case LEFT_BRACE: if (curStatus == PREFIX_STATUS) { curStatus = LEFT_BRACE_STATUS; } break; case RIGHT_BRACE: if (curStatus == LEFT_BRACE_STATUS) { // 接上之前的字符 newDocumentBuilder.append(documentBuilder.substring(lastWriteIndex, keyStartIndex)); // 结束位置 int keyEndIndex = curIndex + 1; // 替换 String rawKey = documentBuilder.substring(keyStartIndex, keyEndIndex); // 干掉多余标签 String mappingKey = XML_PATTERN.matcher(rawKey).replaceAll(""); if (!mappingKey.equals(rawKey)) { char[] rawKeyChars = rawKey.toCharArray(); // 保留原格式 StringBuilder rawStringBuilder = new StringBuilder(rawKey.length()); // 去掉变量引用字符 for (char rawChar : rawKeyChars) { if (rawChar == PREFIX || rawChar == LEFT_BRACE || rawChar == RIGHT_BRACE) { continue; } rawStringBuilder.append(rawChar); } // 要求变量连在一起 String variable = mappingKey.substring(2, mappingKey.length() - 1); int variableStart = rawStringBuilder.indexOf(variable); if (variableStart > 0) { rawStringBuilder = rawStringBuilder.replace(variableStart, variableStart + variable.length(), mappingKey); } newDocumentBuilder.append(rawStringBuilder.toString()); } else { newDocumentBuilder.append(mappingKey); } lastWriteIndex = keyEndIndex; curStatus = NONE_START; keyStartIndex = NONE_START_INDEX; } default: break; } curIndex++; } // 余部 if (lastWriteIndex < documentBuilder.length()) { newDocumentBuilder.append(documentBuilder.substring(lastWriteIndex)); } return XmlUtils.unmarshalString(newDocumentBuilder.toString(), jc); } }
2.poi-tl+jodconverter+LibreOffice 方案
poi-tl这个是专门用来进行word模板合成的开源库,文档很详细。
LibreOffice 下载最新的稳定版本即可。
1.maven依赖
<!-- word合成 --> <!-- 这里注意版本,1.5版本依赖的poi 3.x的版本 --> <dependency> <groupId>com.deepoove</groupId> <artifactId>poi-tl</artifactId> <version>1.5.1</version> </dependency> <!-- jodconverter word转pdf --> <!-- jodconverter-core这个依赖,理论上不用加的,jodconverter-local已经依赖了,但测试的时候不添加依赖找不到 --> <dependency> <groupId>org.jodconverter</groupId> <artifactId>jodconverter-core</artifactId> <version>4.2.0</version> </dependency> <dependency> <groupId>org.jodconverter</groupId> <artifactId>jodconverter-local</artifactId> <version>4.2.0</version> </dependency> <dependency> <groupId>org.jodconverter</groupId> <artifactId>jodconverter-spring-boot-starter</artifactId> <version>4.2.0</version> </dependency> <!-- 工具类,非必须 --> <dependency> <groupId>cn.hutool</groupId> <artifactId>hutool-all</artifactId> <version>5.4.3</version> </dependency>
2.主要代码
JodConverterConfig配置类
@Configuration public class JodConverterConfig { @Autowired private OfficeManager officeManager; @Bean public DocumentConverter documentConverter() { return LocalConverter.builder() .officeManager(officeManager) .build(); } }
yml配置文件
jodconverter: local: enabled: true office-home: "C:\\Program Files\\LibreOffice"
PdfService合成导出代码
@Slf4j @Component public class PdfService { @Autowired private DocumentConverter documentConverter; public void docxToPDF(InputStream inputStream,HttpServletResponse response,String fileName) { response.setContentType("application/pdf"); try { String fullFileName = new String(fileName.getBytes(), StandardCharsets.ISO_8859_1); response.setHeader("Content-disposition","attachment;filename=\\"+fullFileName+".pdf\\"); documentConverter .convert(inputStream) .as(DefaultDocumentFormatRegistry.DOCX) .to(response.getOutputStream()) .as(DefaultDocumentFormatRegistry.PDF) .execute(); } catch (OfficeException |IOException e) { log.error("word转pdf失败:{}",e.getMessage()); } } public void exportByLocalPath(HttpServletResponse response, String fileName, String path, Object params) throws Exception { BufferedOutputStream outputStream = null; BufferedInputStream wordInputStream = null; try (InputStream in = PdfService.class.getClassLoader().getResourceAsStream(path)) { // 生成临时文件 String outPutWordPath = System.getProperty("java.io.tmpdir").replaceAll(File.separator + "$", "") + fileName+".docx"; File tempFile = FileUtil.touch(outPutWordPath); outputStream = FileUtil.getOutputStream(tempFile); // word模板合成写到临时文件 WordUtil.replaceWord(outputStream, in, params); // word 转pdf wordInputStream = FileUtil.getInputStream(tempFile); docxToPDF(wordInputStream, response,fileName); // 移除临时文件 FileUtil.del(tempFile); } catch (Exception e) { log.error("docx文档转换为PDF失败", e.getMessage()); } finally { IoUtil.close(outputStream); IoUtil.close(wordInputStream); } }
四.结论
1.docx4j方案
- 依赖少
- 同时支持word合成及格式转换
- 转化效率较差
- 对于含样式及图片转换不友好,容易排版混乱
2.jodconverter+LibreOffice 方案
- 操作稳定
- 转换效率快
- 集成依赖设置较多
- 依赖本地服务
- LibreOffice打开word可能排版样式错乱
- 最后考虑项目需求,最终选择了jodconverter+LibreOffice方案。
以上为个人经验,希望能给大家一个参考,也希望大家多多支持。
加载全部内容