|
|
@@ -0,0 +1,147 @@
|
|
|
+package com.kingdee.eas.custom.compensation.utils;
|
|
|
+
|
|
|
+import com.kingdee.bos.BOSException;
|
|
|
+import org.apache.pdfbox.Loader;
|
|
|
+import org.apache.pdfbox.contentstream.operator.Operator;
|
|
|
+import org.apache.pdfbox.cos.COSArray;
|
|
|
+import org.apache.pdfbox.cos.COSName;
|
|
|
+import org.apache.pdfbox.cos.COSString;
|
|
|
+import org.apache.pdfbox.pdfparser.PDFStreamParser;
|
|
|
+import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
|
|
|
+import org.apache.pdfbox.pdmodel.PDDocument;
|
|
|
+import org.apache.pdfbox.pdmodel.PDPage;
|
|
|
+import org.apache.pdfbox.pdmodel.PDPageTree;
|
|
|
+import org.apache.pdfbox.pdmodel.common.PDStream;
|
|
|
+
|
|
|
+import java.io.ByteArrayOutputStream;
|
|
|
+import java.io.IOException;
|
|
|
+import java.io.OutputStream;
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.Iterator;
|
|
|
+import java.util.List;
|
|
|
+
|
|
|
+/**
|
|
|
+ * @Description pdf工具类
|
|
|
+ * @Date 2025/10/28 12:45
|
|
|
+ * @Created by 59279
|
|
|
+ */
|
|
|
+public class PDFUtil {
|
|
|
+ /**
|
|
|
+ * 移除PDF文档中的文字水印
|
|
|
+ *
|
|
|
+ * @param file PDF文件的字节数组
|
|
|
+ * @param searchString 需要移除的水印文字数组,可变参数
|
|
|
+ * @return 移除水印后的PDF文件字节数组
|
|
|
+ * @throws BOSException 当处理PDF文件出现IO异常时抛出
|
|
|
+ */
|
|
|
+ //移除文字水印
|
|
|
+ public static byte[] removeWatermark(byte[] file, String... searchString) throws BOSException, IOException {
|
|
|
+ PDDocument document = null;
|
|
|
+ ByteArrayOutputStream bos = null;
|
|
|
+ try {
|
|
|
+ //通过文件名加载文档
|
|
|
+ document = Loader.loadPDF(file);
|
|
|
+ bos = new ByteArrayOutputStream();
|
|
|
+ PDPageTree pages = document.getPages();
|
|
|
+ Iterator<PDPage> iter = pages.iterator();
|
|
|
+ //遍历所有页面,移除指定的文字水印
|
|
|
+ while (iter.hasNext()) {
|
|
|
+ PDPage page = iter.next();
|
|
|
+ //去除文字水印
|
|
|
+ for (String string : searchString) {
|
|
|
+ replaceText(page, string, "");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //移除最后一页(可能是空白页)
|
|
|
+ //document.removePage(document.getNumberOfPages() - 1);
|
|
|
+ //将处理后的文档保存到字节数组输出流
|
|
|
+ document.save(bos);
|
|
|
+ return bos.toByteArray();
|
|
|
+ } finally {
|
|
|
+ if (document != null) {
|
|
|
+ document.close();
|
|
|
+ }
|
|
|
+ if (bos != null) {
|
|
|
+ bos.close();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 替换PDF页面中指定的文本内容
|
|
|
+ *
|
|
|
+ * @param page PDF页面对象,用于定位和修改文本内容
|
|
|
+ * @param searchString 需要被替换的原始文本字符串
|
|
|
+ * @param replacement 用来替换的新文本字符串
|
|
|
+ * @throws IOException 当读取或写入PDF流时发生错误
|
|
|
+ */
|
|
|
+ //替换pdf文本内容
|
|
|
+ public static void replaceText(PDPage page, String searchString, String replacement) throws IOException {
|
|
|
+ PDFStreamParser parser = new PDFStreamParser(page);
|
|
|
+ List<?> tokens = parser.parse();
|
|
|
+
|
|
|
+ // 遍历解析后的PDF操作符和对象,查找并替换文本内容
|
|
|
+ for (int j = 0; j < tokens.size(); j++) {
|
|
|
+ Object next = tokens.get(j);
|
|
|
+ if (next instanceof Operator) {
|
|
|
+ Operator op = (Operator) next;
|
|
|
+ String pstring = "";
|
|
|
+ int prej = 0;
|
|
|
+ // 处理单行文本显示操作符"Tj"
|
|
|
+ if (op.getName().equals("Tj")) {
|
|
|
+ COSString previous = (COSString) tokens.get(j - 1);
|
|
|
+ String string = previous.getString();
|
|
|
+ string = string.replaceFirst(searchString, replacement);
|
|
|
+ previous.setValue(string.getBytes());
|
|
|
+ }
|
|
|
+ // 处理多行或多段文本显示操作符"TJ"
|
|
|
+ else if (op.getName().equals("TJ")) {
|
|
|
+ COSArray previous = (COSArray) tokens.get(j - 1);
|
|
|
+
|
|
|
+ // 提取数组中的所有字符串内容
|
|
|
+ for (int k = 0; k < previous.size(); k++) {
|
|
|
+ Object arrElement = previous.getObject(k);
|
|
|
+ if (arrElement instanceof COSString) {
|
|
|
+ COSString cosString = (COSString) arrElement;
|
|
|
+ String string = cosString.getString();
|
|
|
+
|
|
|
+ if (j == prej) {
|
|
|
+ pstring += string;
|
|
|
+ } else {
|
|
|
+ prej = j;
|
|
|
+ pstring = string;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 如果匹配搜索字符串,则进行替换
|
|
|
+ if (searchString.equals(pstring.trim())) {
|
|
|
+ COSString cosString2 = (COSString) previous.getObject(0);
|
|
|
+ cosString2.setValue(replacement.getBytes());
|
|
|
+
|
|
|
+ // 移除多余的数组元素,只保留替换后的第一个元素
|
|
|
+ int total = previous.size() - 1;
|
|
|
+ for (int k = total; k > 0; k--) {
|
|
|
+ previous.remove(k);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 将修改后的内容重新写入PDF页面
|
|
|
+ List<PDStream> contents = new ArrayList<>();
|
|
|
+ Iterator<PDStream> streams = page.getContentStreams();
|
|
|
+ while (streams.hasNext()) {
|
|
|
+ PDStream updatedStream = streams.next();
|
|
|
+ OutputStream out = updatedStream.createOutputStream(COSName.FLATE_DECODE);
|
|
|
+ ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
|
|
|
+ tokenWriter.writeTokens(tokens);
|
|
|
+ contents.add(updatedStream);
|
|
|
+ out.close();
|
|
|
+ }
|
|
|
+ page.setContents(contents);
|
|
|
+ }
|
|
|
+
|
|
|
+}
|