/*
 * Decompiled with CFR 0.152.
 */
package org.apache.poi1.hwpf.extractor;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import org.apache.poi1.hwpf.HWPFDocument;
import org.apache.poi1.hwpf.model.TextPiece;
import org.apache.poi1.hwpf.usermodel.Paragraph;
import org.apache.poi1.hwpf.usermodel.Range;
import org.apache.poi1.poifs.filesystem.POIFSFileSystem;

public class WordExtractor {
    private POIFSFileSystem fs;
    private HWPFDocument doc;

    public WordExtractor(InputStream is) throws IOException {
        this(HWPFDocument.verifyAndBuildPOIFS(is));
    }

    public WordExtractor(POIFSFileSystem fs) throws IOException {
        this(new HWPFDocument(fs));
        this.fs = fs;
    }

    public WordExtractor(HWPFDocument doc) throws IOException {
        this.doc = doc;
    }

    public static void main(String[] args) throws IOException {
        if (args.length == 0) {
            System.err.println("Use:");
            System.err.println("   java org.apache.poi.hwpf.extractor.WordExtractor <filename>");
            System.exit(1);
        }
        FileInputStream fin = new FileInputStream(args[0]);
        WordExtractor extractor = new WordExtractor(fin);
        System.out.println(extractor.getText());
    }

    public String[] getParagraphText() {
        String[] ret;
        try {
            Range r = this.doc.getRange();
            ret = new String[r.numParagraphs()];
            for (int i = 0; i < ret.length; ++i) {
                Paragraph p = r.getParagraph(i);
                ret[i] = p.text();
                if (!ret[i].endsWith("\r")) continue;
                ret[i] = ret[i] + "\n";
            }
        }
        catch (Exception e) {
            ret = new String[]{this.getTextFromPieces()};
        }
        return ret;
    }

    public String getTextFromPieces() {
        StringBuffer textBuf = new StringBuffer();
        Iterator textPieces = this.doc.getTextTable().getTextPieces().iterator();
        while (textPieces.hasNext()) {
            TextPiece piece = (TextPiece)textPieces.next();
            String encoding = "Cp1252";
            if (piece.usesUnicode()) {
                encoding = "UTF-16LE";
            }
            try {
                String text = new String(piece.getRawBytes(), encoding);
                textBuf.append(text);
            }
            catch (UnsupportedEncodingException e) {
                throw new InternalError("Standard Encoding " + encoding + " not found, JVM broken");
            }
        }
        String text = textBuf.toString();
        text = text.replaceAll("\r\r\r", "\r\n\r\n\r\n");
        if ((text = text.replaceAll("\r\r", "\r\n\r\n")).endsWith("\r")) {
            text = text + "\n";
        }
        return text;
    }

    public String getText() {
        StringBuffer ret = new StringBuffer();
        String[] text = this.getParagraphText();
        for (int i = 0; i < text.length; ++i) {
            ret.append(text[i]);
        }
        return ret.toString();
    }
}

