/*
 * Decompiled with CFR 0.152.
 */
package org.textmining.text.extraction1;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.poi1.hwpf.model.CHPBinTable;
import org.apache.poi1.hwpf.model.CHPX;
import org.apache.poi1.hwpf.model.ComplexFileTable;
import org.apache.poi1.hwpf.model.TextPiece;
import org.apache.poi1.hwpf.model.TextPieceTable;
import org.apache.poi1.poifs.filesystem.DocumentEntry;
import org.apache.poi1.poifs.filesystem.DocumentInputStream;
import org.apache.poi1.poifs.filesystem.POIFSFileSystem;
import org.apache.poi1.util.LittleEndian;
import org.textmining.text.extraction1.FastSavedException;
import org.textmining.text.extraction1.PasswordProtectedException;
import org.textmining.text.extraction1.Word6Extractor;
import org.textmining.text.extraction1.WordTextBuffer;
import org.textmining.text.extraction1.sprm.SprmIterator;
import org.textmining.text.extraction1.sprm.SprmOperation;

public class WordExtractor {
    public String extractText(InputStream in) throws Exception {
        ArrayList text = new ArrayList();
        POIFSFileSystem fsys = new POIFSFileSystem(in);
        DocumentEntry headerProps = (DocumentEntry)fsys.getRoot().getEntry("WordDocument");
        DocumentInputStream din = fsys.createDocumentInputStream("WordDocument");
        byte[] header = new byte[headerProps.getSize()];
        din.read(header);
        din.close();
        short info = LittleEndian.getShort(header, 10);
        if ((info & 4) != 0) {
            throw new FastSavedException("Fast-saved files are unsupported at this time");
        }
        if ((info & 0x100) != 0) {
            throw new PasswordProtectedException("This document is password protected");
        }
        short nFib = LittleEndian.getShort(header, 2);
        switch (nFib) {
            case 101: 
            case 102: 
            case 103: 
            case 104: {
                Word6Extractor oldExtractor = new Word6Extractor();
                return oldExtractor.extractText(header);
            }
        }
        boolean useTable1 = (info & 0x200) != 0;
        int complexOffset = LittleEndian.getInt(header, 418);
        String tableName = null;
        tableName = useTable1 ? "1Table" : "0Table";
        DocumentEntry table = (DocumentEntry)fsys.getRoot().getEntry(tableName);
        byte[] tableStream = new byte[table.getSize()];
        din = fsys.createDocumentInputStream(tableName);
        din.read(tableStream);
        din.close();
        int chpOffset = LittleEndian.getInt(header, 250);
        int chpSize = LittleEndian.getInt(header, 254);
        int fcMin = LittleEndian.getInt(header, 24);
        CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin);
        ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin);
        TextPieceTable tpt = cft.getTextPieceTable();
        List textPieces = tpt.getTextPieces();
        din = null;
        fsys = null;
        table = null;
        headerProps = null;
        List textRuns = cbt.getTextRuns();
        Iterator runIt = textRuns.iterator();
        Iterator textIt = textPieces.iterator();
        TextPiece currentPiece = (TextPiece)textIt.next();
        int currentTextStart = currentPiece.getStart();
        int currentTextEnd = currentPiece.getEnd();
        WordTextBuffer finalTextBuf = new WordTextBuffer();
        while (runIt.hasNext()) {
            String str;
            CHPX chpx = (CHPX)runIt.next();
            boolean deleted = this.isDeleted(chpx.getGrpprl());
            if (deleted) continue;
            int runStart = chpx.getStart();
            int runEnd = chpx.getEnd();
            while (runStart >= currentTextEnd) {
                currentPiece = (TextPiece)textIt.next();
                currentTextStart = currentPiece.getStart();
                currentTextEnd = currentPiece.getEnd();
            }
            if (runEnd < currentTextEnd) {
                if (runStart - currentTextStart < 0 || runEnd - currentTextStart < 0) continue;
                str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
                finalTextBuf.append(str);
                continue;
            }
            if (runEnd > currentTextEnd) {
                while (runEnd > currentTextEnd) {
                    if (runStart - currentTextStart >= 0 && currentTextEnd - currentTextStart >= 0) {
                        str = currentPiece.substring(runStart - currentTextStart, currentTextEnd - currentTextStart);
                        finalTextBuf.append(str);
                    }
                    if (textIt.hasNext()) {
                        currentPiece = (TextPiece)textIt.next();
                        runStart = currentTextStart = currentPiece.getStart();
                        currentTextEnd = currentPiece.getEnd();
                        continue;
                    }
                    return finalTextBuf.toString();
                }
                if (runEnd - currentTextStart < 0) continue;
                str = currentPiece.substring(0, runEnd - currentTextStart);
                finalTextBuf.append(str);
                continue;
            }
            if (runStart - currentTextStart < 0 || runEnd - currentTextStart < 0) continue;
            str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            if (textIt.hasNext()) {
                currentPiece = (TextPiece)textIt.next();
                currentTextStart = currentPiece.getStart();
                currentTextEnd = currentPiece.getEnd();
            }
            finalTextBuf.append(str);
        }
        return finalTextBuf.toString();
    }

    private boolean isDeleted(byte[] grpprl) {
        SprmIterator iterator = new SprmIterator(grpprl);
        while (iterator.hasNext()) {
            SprmOperation op = iterator.next();
            if (op.getOperation() != 0 || op.getOperand() == 0) continue;
            return true;
        }
        return false;
    }
}

