package com.kingdee.eas.custom.compensation.utils; import com.kingdee.bos.BOSException; import org.apache.pdfbox.contentstream.operator.Operator; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdfwriter.ContentStreamWriter; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageTree; import org.apache.pdfbox.pdmodel.common.PDStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; /** * @Description pdf工具类 * @Date 2025/10/28 12:45 * @Created by 59279 */ public class PDFUtil { /** * 移除PDF文档中的文字水印 * * @param file PDF文件的字节数组 * @param searchString 需要移除的水印文字数组,可变参数 * @return 移除水印后的PDF文件字节数组 * @throws BOSException 当处理PDF文件出现IO异常时抛出 */ //移除文字水印 public static byte[] removeWatermark(byte[] file, String... searchString) throws BOSException, IOException { PDDocument document = null; ByteArrayOutputStream bos = null; try { //通过文件名加载文档 document = PDDocument.load(file); //document = Loader.loadPDF(file); bos = new ByteArrayOutputStream(); PDPageTree pages = document.getPages(); Iterator iter = pages.iterator(); //遍历所有页面,移除指定的文字水印 while (iter.hasNext()) { PDPage page = iter.next(); //去除文字水印 for (String string : searchString) { replaceText(page, string, ""); } } //移除最后一页(可能是空白页) //document.removePage(document.getNumberOfPages() - 1); //将处理后的文档保存到字节数组输出流 document.save(bos); return bos.toByteArray(); } finally { if (document != null) { document.close(); } if (bos != null) { bos.close(); } } } /** * 替换PDF页面中指定的文本内容 * * @param page PDF页面对象,用于定位和修改文本内容 * @param searchString 需要被替换的原始文本字符串 * @param replacement 用来替换的新文本字符串 * @throws IOException 当读取或写入PDF流时发生错误 */ //替换pdf文本内容 public static void replaceText(PDPage page, String searchString, String replacement) throws IOException { PDFStreamParser parser = new PDFStreamParser(page); parser.parse(); List tokens = parser.getTokens(); //List tokens = parser.parse(); // 遍历解析后的PDF操作符和对象,查找并替换文本内容 for (int j = 0; j < tokens.size(); j++) { Object next = tokens.get(j); if (next instanceof Operator) { Operator op = (Operator) next; String pstring = ""; int prej = 0; // 处理单行文本显示操作符"Tj" if (op.getName().equals("Tj")) { COSString previous = (COSString) tokens.get(j - 1); String string = previous.getString(); string = string.replaceFirst(searchString, replacement); previous.setValue(string.getBytes()); } // 处理多行或多段文本显示操作符"TJ" else if (op.getName().equals("TJ")) { COSArray previous = (COSArray) tokens.get(j - 1); // 提取数组中的所有字符串内容 for (int k = 0; k < previous.size(); k++) { Object arrElement = previous.getObject(k); if (arrElement instanceof COSString) { COSString cosString = (COSString) arrElement; String string = cosString.getString(); if (j == prej) { pstring += string; } else { prej = j; pstring = string; } } } // 如果匹配搜索字符串,则进行替换 if (searchString.equals(pstring.trim())) { COSString cosString2 = (COSString) previous.getObject(0); cosString2.setValue(replacement.getBytes()); // 移除多余的数组元素,只保留替换后的第一个元素 int total = previous.size() - 1; for (int k = total; k > 0; k--) { previous.remove(k); } } } } } // 将修改后的内容重新写入PDF页面 List contents = new ArrayList<>(); Iterator streams = page.getContentStreams(); while (streams.hasNext()) { PDStream updatedStream = streams.next(); OutputStream out = updatedStream.createOutputStream(COSName.FLATE_DECODE); ContentStreamWriter tokenWriter = new ContentStreamWriter(out); tokenWriter.writeTokens(tokens); contents.add(updatedStream); out.close(); } page.setContents(contents); } }