| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149 |
- package com.kingdee.eas.custom.compensation.utils;
- import com.kingdee.bos.BOSException;
- import org.apache.pdfbox.contentstream.operator.Operator;
- import org.apache.pdfbox.cos.COSArray;
- import org.apache.pdfbox.cos.COSName;
- import org.apache.pdfbox.cos.COSString;
- import org.apache.pdfbox.pdfparser.PDFStreamParser;
- import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
- import org.apache.pdfbox.pdmodel.PDDocument;
- import org.apache.pdfbox.pdmodel.PDPage;
- import org.apache.pdfbox.pdmodel.PDPageTree;
- import org.apache.pdfbox.pdmodel.common.PDStream;
- import java.io.ByteArrayOutputStream;
- import java.io.IOException;
- import java.io.OutputStream;
- import java.util.ArrayList;
- import java.util.Iterator;
- import java.util.List;
- /**
- * @Description pdf工具类
- * @Date 2025/10/28 12:45
- * @Created by 59279
- */
- public class PDFUtil {
- /**
- * 移除PDF文档中的文字水印
- *
- * @param file PDF文件的字节数组
- * @param searchString 需要移除的水印文字数组,可变参数
- * @return 移除水印后的PDF文件字节数组
- * @throws BOSException 当处理PDF文件出现IO异常时抛出
- */
- //移除文字水印
- public static byte[] removeWatermark(byte[] file, String... searchString) throws BOSException, IOException {
- PDDocument document = null;
- ByteArrayOutputStream bos = null;
- try {
- //通过文件名加载文档
- document = PDDocument.load(file);
- //document = Loader.loadPDF(file);
- bos = new ByteArrayOutputStream();
- PDPageTree pages = document.getPages();
- Iterator<PDPage> iter = pages.iterator();
- //遍历所有页面,移除指定的文字水印
- while (iter.hasNext()) {
- PDPage page = iter.next();
- //去除文字水印
- for (String string : searchString) {
- replaceText(page, string, "");
- }
- }
- //移除最后一页(可能是空白页)
- //document.removePage(document.getNumberOfPages() - 1);
- //将处理后的文档保存到字节数组输出流
- document.save(bos);
- return bos.toByteArray();
- } finally {
- if (document != null) {
- document.close();
- }
- if (bos != null) {
- bos.close();
- }
- }
- }
- /**
- * 替换PDF页面中指定的文本内容
- *
- * @param page PDF页面对象,用于定位和修改文本内容
- * @param searchString 需要被替换的原始文本字符串
- * @param replacement 用来替换的新文本字符串
- * @throws IOException 当读取或写入PDF流时发生错误
- */
- //替换pdf文本内容
- public static void replaceText(PDPage page, String searchString, String replacement) throws IOException {
- PDFStreamParser parser = new PDFStreamParser(page);
- parser.parse();
- List<?> tokens = parser.getTokens();
- //List<?> tokens = parser.parse();
- // 遍历解析后的PDF操作符和对象,查找并替换文本内容
- for (int j = 0; j < tokens.size(); j++) {
- Object next = tokens.get(j);
- if (next instanceof Operator) {
- Operator op = (Operator) next;
- String pstring = "";
- int prej = 0;
- // 处理单行文本显示操作符"Tj"
- if (op.getName().equals("Tj")) {
- COSString previous = (COSString) tokens.get(j - 1);
- String string = previous.getString();
- string = string.replaceFirst(searchString, replacement);
- previous.setValue(string.getBytes());
- }
- // 处理多行或多段文本显示操作符"TJ"
- else if (op.getName().equals("TJ")) {
- COSArray previous = (COSArray) tokens.get(j - 1);
- // 提取数组中的所有字符串内容
- for (int k = 0; k < previous.size(); k++) {
- Object arrElement = previous.getObject(k);
- if (arrElement instanceof COSString) {
- COSString cosString = (COSString) arrElement;
- String string = cosString.getString();
- if (j == prej) {
- pstring += string;
- } else {
- prej = j;
- pstring = string;
- }
- }
- }
- // 如果匹配搜索字符串,则进行替换
- if (searchString.equals(pstring.trim())) {
- COSString cosString2 = (COSString) previous.getObject(0);
- cosString2.setValue(replacement.getBytes());
- // 移除多余的数组元素,只保留替换后的第一个元素
- int total = previous.size() - 1;
- for (int k = total; k > 0; k--) {
- previous.remove(k);
- }
- }
- }
- }
- }
- // 将修改后的内容重新写入PDF页面
- List<PDStream> contents = new ArrayList<>();
- Iterator<PDStream> streams = page.getContentStreams();
- while (streams.hasNext()) {
- PDStream updatedStream = streams.next();
- OutputStream out = updatedStream.createOutputStream(COSName.FLATE_DECODE);
- ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
- tokenWriter.writeTokens(tokens);
- contents.add(updatedStream);
- out.close();
- }
- page.setContents(contents);
- }
- }
|