PDFUtil.java 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. package com.kingdee.eas.custom.compensation.utils;
  2. import com.kingdee.bos.BOSException;
  3. import org.apache.pdfbox.Loader;
  4. import org.apache.pdfbox.contentstream.operator.Operator;
  5. import org.apache.pdfbox.cos.COSArray;
  6. import org.apache.pdfbox.cos.COSName;
  7. import org.apache.pdfbox.cos.COSString;
  8. import org.apache.pdfbox.pdfparser.PDFStreamParser;
  9. import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
  10. import org.apache.pdfbox.pdmodel.PDDocument;
  11. import org.apache.pdfbox.pdmodel.PDPage;
  12. import org.apache.pdfbox.pdmodel.PDPageTree;
  13. import org.apache.pdfbox.pdmodel.common.PDStream;
  14. import java.io.ByteArrayOutputStream;
  15. import java.io.IOException;
  16. import java.io.OutputStream;
  17. import java.util.ArrayList;
  18. import java.util.Iterator;
  19. import java.util.List;
  20. /**
  21. * @Description pdf工具类
  22. * @Date 2025/10/28 12:45
  23. * @Created by 59279
  24. */
  25. public class PDFUtil {
  26. /**
  27. * 移除PDF文档中的文字水印
  28. *
  29. * @param file PDF文件的字节数组
  30. * @param searchString 需要移除的水印文字数组,可变参数
  31. * @return 移除水印后的PDF文件字节数组
  32. * @throws BOSException 当处理PDF文件出现IO异常时抛出
  33. */
  34. //移除文字水印
  35. public static byte[] removeWatermark(byte[] file, String... searchString) throws BOSException, IOException {
  36. PDDocument document = null;
  37. ByteArrayOutputStream bos = null;
  38. try {
  39. //通过文件名加载文档
  40. document = Loader.loadPDF(file);
  41. bos = new ByteArrayOutputStream();
  42. PDPageTree pages = document.getPages();
  43. Iterator<PDPage> iter = pages.iterator();
  44. //遍历所有页面,移除指定的文字水印
  45. while (iter.hasNext()) {
  46. PDPage page = iter.next();
  47. //去除文字水印
  48. for (String string : searchString) {
  49. replaceText(page, string, "");
  50. }
  51. }
  52. //移除最后一页(可能是空白页)
  53. //document.removePage(document.getNumberOfPages() - 1);
  54. //将处理后的文档保存到字节数组输出流
  55. document.save(bos);
  56. return bos.toByteArray();
  57. } finally {
  58. if (document != null) {
  59. document.close();
  60. }
  61. if (bos != null) {
  62. bos.close();
  63. }
  64. }
  65. }
  66. /**
  67. * 替换PDF页面中指定的文本内容
  68. *
  69. * @param page PDF页面对象,用于定位和修改文本内容
  70. * @param searchString 需要被替换的原始文本字符串
  71. * @param replacement 用来替换的新文本字符串
  72. * @throws IOException 当读取或写入PDF流时发生错误
  73. */
  74. //替换pdf文本内容
  75. public static void replaceText(PDPage page, String searchString, String replacement) throws IOException {
  76. PDFStreamParser parser = new PDFStreamParser(page);
  77. List<?> tokens = parser.parse();
  78. // 遍历解析后的PDF操作符和对象,查找并替换文本内容
  79. for (int j = 0; j < tokens.size(); j++) {
  80. Object next = tokens.get(j);
  81. if (next instanceof Operator) {
  82. Operator op = (Operator) next;
  83. String pstring = "";
  84. int prej = 0;
  85. // 处理单行文本显示操作符"Tj"
  86. if (op.getName().equals("Tj")) {
  87. COSString previous = (COSString) tokens.get(j - 1);
  88. String string = previous.getString();
  89. string = string.replaceFirst(searchString, replacement);
  90. previous.setValue(string.getBytes());
  91. }
  92. // 处理多行或多段文本显示操作符"TJ"
  93. else if (op.getName().equals("TJ")) {
  94. COSArray previous = (COSArray) tokens.get(j - 1);
  95. // 提取数组中的所有字符串内容
  96. for (int k = 0; k < previous.size(); k++) {
  97. Object arrElement = previous.getObject(k);
  98. if (arrElement instanceof COSString) {
  99. COSString cosString = (COSString) arrElement;
  100. String string = cosString.getString();
  101. if (j == prej) {
  102. pstring += string;
  103. } else {
  104. prej = j;
  105. pstring = string;
  106. }
  107. }
  108. }
  109. // 如果匹配搜索字符串,则进行替换
  110. if (searchString.equals(pstring.trim())) {
  111. COSString cosString2 = (COSString) previous.getObject(0);
  112. cosString2.setValue(replacement.getBytes());
  113. // 移除多余的数组元素,只保留替换后的第一个元素
  114. int total = previous.size() - 1;
  115. for (int k = total; k > 0; k--) {
  116. previous.remove(k);
  117. }
  118. }
  119. }
  120. }
  121. }
  122. // 将修改后的内容重新写入PDF页面
  123. List<PDStream> contents = new ArrayList<>();
  124. Iterator<PDStream> streams = page.getContentStreams();
  125. while (streams.hasNext()) {
  126. PDStream updatedStream = streams.next();
  127. OutputStream out = updatedStream.createOutputStream(COSName.FLATE_DECODE);
  128. ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
  129. tokenWriter.writeTokens(tokens);
  130. contents.add(updatedStream);
  131. out.close();
  132. }
  133. page.setContents(contents);
  134. }
  135. }