PDFUtil.java 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. package com.kingdee.eas.custom.compensation.utils;
  2. import com.kingdee.bos.BOSException;
  3. import org.apache.pdfbox.contentstream.operator.Operator;
  4. import org.apache.pdfbox.cos.COSArray;
  5. import org.apache.pdfbox.cos.COSName;
  6. import org.apache.pdfbox.cos.COSString;
  7. import org.apache.pdfbox.pdfparser.PDFStreamParser;
  8. import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
  9. import org.apache.pdfbox.pdmodel.PDDocument;
  10. import org.apache.pdfbox.pdmodel.PDPage;
  11. import org.apache.pdfbox.pdmodel.PDPageTree;
  12. import org.apache.pdfbox.pdmodel.common.PDStream;
  13. import java.io.ByteArrayOutputStream;
  14. import java.io.IOException;
  15. import java.io.OutputStream;
  16. import java.util.ArrayList;
  17. import java.util.Iterator;
  18. import java.util.List;
  19. /**
  20. * @Description pdf工具类
  21. * @Date 2025/10/28 12:45
  22. * @Created by 59279
  23. */
  24. public class PDFUtil {
  25. /**
  26. * 移除PDF文档中的文字水印
  27. *
  28. * @param file PDF文件的字节数组
  29. * @param searchString 需要移除的水印文字数组,可变参数
  30. * @return 移除水印后的PDF文件字节数组
  31. * @throws BOSException 当处理PDF文件出现IO异常时抛出
  32. */
  33. //移除文字水印
  34. public static byte[] removeWatermark(byte[] file, String... searchString) throws BOSException, IOException {
  35. PDDocument document = null;
  36. ByteArrayOutputStream bos = null;
  37. try {
  38. //通过文件名加载文档
  39. document = PDDocument.load(file);
  40. //document = Loader.loadPDF(file);
  41. bos = new ByteArrayOutputStream();
  42. PDPageTree pages = document.getPages();
  43. Iterator<PDPage> iter = pages.iterator();
  44. //遍历所有页面,移除指定的文字水印
  45. while (iter.hasNext()) {
  46. PDPage page = iter.next();
  47. //去除文字水印
  48. for (String string : searchString) {
  49. replaceText(page, string, "");
  50. }
  51. }
  52. //移除最后一页(可能是空白页)
  53. //document.removePage(document.getNumberOfPages() - 1);
  54. //将处理后的文档保存到字节数组输出流
  55. document.save(bos);
  56. return bos.toByteArray();
  57. } finally {
  58. if (document != null) {
  59. document.close();
  60. }
  61. if (bos != null) {
  62. bos.close();
  63. }
  64. }
  65. }
  66. /**
  67. * 替换PDF页面中指定的文本内容
  68. *
  69. * @param page PDF页面对象,用于定位和修改文本内容
  70. * @param searchString 需要被替换的原始文本字符串
  71. * @param replacement 用来替换的新文本字符串
  72. * @throws IOException 当读取或写入PDF流时发生错误
  73. */
  74. //替换pdf文本内容
  75. public static void replaceText(PDPage page, String searchString, String replacement) throws IOException {
  76. PDFStreamParser parser = new PDFStreamParser(page);
  77. parser.parse();
  78. List<?> tokens = parser.getTokens();
  79. //List<?> tokens = parser.parse();
  80. // 遍历解析后的PDF操作符和对象,查找并替换文本内容
  81. for (int j = 0; j < tokens.size(); j++) {
  82. Object next = tokens.get(j);
  83. if (next instanceof Operator) {
  84. Operator op = (Operator) next;
  85. String pstring = "";
  86. int prej = 0;
  87. // 处理单行文本显示操作符"Tj"
  88. if (op.getName().equals("Tj")) {
  89. COSString previous = (COSString) tokens.get(j - 1);
  90. String string = previous.getString();
  91. string = string.replaceFirst(searchString, replacement);
  92. previous.setValue(string.getBytes());
  93. }
  94. // 处理多行或多段文本显示操作符"TJ"
  95. else if (op.getName().equals("TJ")) {
  96. COSArray previous = (COSArray) tokens.get(j - 1);
  97. // 提取数组中的所有字符串内容
  98. for (int k = 0; k < previous.size(); k++) {
  99. Object arrElement = previous.getObject(k);
  100. if (arrElement instanceof COSString) {
  101. COSString cosString = (COSString) arrElement;
  102. String string = cosString.getString();
  103. if (j == prej) {
  104. pstring += string;
  105. } else {
  106. prej = j;
  107. pstring = string;
  108. }
  109. }
  110. }
  111. // 如果匹配搜索字符串,则进行替换
  112. if (searchString.equals(pstring.trim())) {
  113. COSString cosString2 = (COSString) previous.getObject(0);
  114. cosString2.setValue(replacement.getBytes());
  115. // 移除多余的数组元素,只保留替换后的第一个元素
  116. int total = previous.size() - 1;
  117. for (int k = total; k > 0; k--) {
  118. previous.remove(k);
  119. }
  120. }
  121. }
  122. }
  123. }
  124. // 将修改后的内容重新写入PDF页面
  125. List<PDStream> contents = new ArrayList<>();
  126. Iterator<PDStream> streams = page.getContentStreams();
  127. while (streams.hasNext()) {
  128. PDStream updatedStream = streams.next();
  129. OutputStream out = updatedStream.createOutputStream(COSName.FLATE_DECODE);
  130. ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
  131. tokenWriter.writeTokens(tokens);
  132. contents.add(updatedStream);
  133. out.close();
  134. }
  135. page.setContents(contents);
  136. }
  137. }