/*
 * Decompiled with CFR 0.152.
 */
package net.nooj4nlp.engine;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import javax.swing.JOptionPane;
import javax.swing.text.BadLocationException;
import net.nooj4nlp.engine.Charlist;
import net.nooj4nlp.engine.Dic;
import net.nooj4nlp.engine.Engine;
import net.nooj4nlp.engine.Language;
import net.nooj4nlp.engine.Ntext;
import net.nooj4nlp.engine.RefObject;
import net.nooj4nlp.engine.TextIO;
import net.nooj4nlp.engine.TransitionObject;
import net.nooj4nlp.engine.TransitionPair;
import net.nooj4nlp.engine.Zip;
import net.nooj4nlp.engine.helper.ParameterCheck;
import net.nooj4nlp.gui.main.Launcher;
import org.apache.commons.io.FilenameUtils;

public class Corpus
implements Serializable {
    private static final long serialVersionUID = 3406281615638257605L;
    private static final String CORPUS_BIN = "corpus.bin";
    public ArrayList<String> listOfFileTexts = new ArrayList();
    public String languageName;
    public transient Language lan;
    public int encodingType;
    public String encodingCode;
    public String encodingName;
    public String delimPattern;
    public String[] xmlNodes;
    public int nbOfTextUnits;
    public int nbOfChars;
    public int nbOfDiffChars;
    public int nbOfLetters;
    public int nbOfDiffLetters;
    public int nbOfDelimiters;
    public int nbOfDiffDelimiters;
    public int nbOfBlanks;
    public int nbOfDiffBlanks;
    public int nbOfDigits;
    public int nbOfDiffDigits;
    public int nbOfTokens;
    public int nbOfDiffTokens;
    public int nbOfWords;
    public int nbOfDiffWords;
    public transient Charlist charlist = null;
    public transient double multiplier = 1.0;
    public transient HashMap<String, Integer> hTokens = null;
    public transient HashMap<String, ArrayList<Object>> hAmbiguities = null;
    public transient HashMap<String, ArrayList<Object>> hUnambiguities = null;
    public transient HashMap<String, Integer> hDigrams = null;
    public transient HashMap<String, Integer> hLexemes = null;
    public transient HashMap<String, Integer> hUnknowns = null;
    public transient HashMap<String, Integer> hPhrases = null;
    public transient ArrayList<String> listOfResources = null;
    public ArrayList<Object> annotations = null;

    public Corpus(String delimPattern, String[] xmlNodes, int encodingType, String encodingCode, String encodingName, String languageName) {
        this.delimPattern = delimPattern;
        this.xmlNodes = xmlNodes;
        this.encodingType = encodingType;
        this.encodingCode = encodingCode;
        if (encodingName == null) {
            encodingName = "";
        }
        this.encodingName = encodingName;
        this.languageName = languageName;
        this.lan = new Language(languageName);
        this.annotations = new ArrayList();
        this.hLexemes = new HashMap();
        this.hPhrases = new HashMap();
        this.nbOfDiffWords = -1;
        this.nbOfWords = -1;
        this.nbOfDiffTokens = -1;
        this.nbOfTokens = -1;
        this.nbOfDiffDigits = -1;
        this.nbOfDigits = -1;
        this.nbOfDiffBlanks = -1;
        this.nbOfBlanks = -1;
        this.nbOfDiffDelimiters = -1;
        this.nbOfDelimiters = -1;
        this.nbOfDiffLetters = -1;
        this.nbOfLetters = -1;
        this.nbOfDiffChars = -1;
        this.nbOfChars = -1;
        this.nbOfTextUnits = -1;
    }

    public final boolean addTextFile(String corpusDirName, String textFullPath, Engine engine) throws IOException, BadLocationException, ClassNotFoundException {
        ParameterCheck.mandatoryString("corpusDirName", corpusDirName);
        ParameterCheck.mandatoryString("textFullPath", textFullPath);
        Ntext ntext = null;
        String fileExtension = FilenameUtils.getExtension(textFullPath);
        if (fileExtension.equals("jnot") && this.encodingName.equals("Default")) {
            String errMessage = null;
            RefObject<Object> tempErrMessage = new RefObject<Object>(errMessage);
            ntext = Ntext.load(textFullPath, this.languageName, tempErrMessage);
            errMessage = (String)tempErrMessage.argvalue;
            if (ntext == null) {
                System.out.println("Format is incorrect for file: " + textFullPath + "\n" + errMessage + "NooJ: text file format does not match corpus file format");
            }
        } else {
            ntext = new Ntext(this);
            try {
                ntext.buffer = TextIO.loadText(textFullPath, this.encodingType, this.encodingCode, this.encodingName, this.lan.chartable);
            }
            catch (RuntimeException ex) {
                System.out.println("Format is incorrect for file: " + textFullPath + "\n" + ex.getMessage() + "NooJ: text file format does not match corpus file format");
                return false;
            }
        }
        if (ntext.buffer == null) {
            System.out.println("Cannot load text from file: " + textFullPath);
            return false;
        }
        ntext.DelimPattern = this.delimPattern;
        ntext.XmlNodes = this.xmlNodes;
        if (engine != null) {
            if (this.xmlNodes != null) {
                ntext.delimitXmlTextUnitsAndImportXmlTags(this, engine, this.xmlNodes, this.annotations, this.hLexemes, this.hPhrases);
            } else {
                ntext.delimitTextUnits(engine);
            }
        }
        String fileNameWithExtension = FilenameUtils.getName(textFullPath);
        String fileNameWithoutExtension = String.valueOf(FilenameUtils.removeExtension(fileNameWithExtension)) + "." + "jnot";
        File corpusDir = new File(corpusDirName);
        if (!corpusDir.exists()) {
            new File(corpusDirName).mkdir();
        }
        File textFile = new File(corpusDirName, fileNameWithoutExtension);
        String textFileName = textFile.getPath();
        ntext.saveForCorpus(textFileName);
        if (this.listOfFileTexts == null) {
            this.listOfFileTexts = new ArrayList();
        }
        if (this.listOfFileTexts.indexOf(fileNameWithoutExtension) == -1) {
            this.listOfFileTexts.add(fileNameWithoutExtension);
        }
        return true;
    }

    public final void saveIn(String resDirPath) throws IOException {
        ParameterCheck.mandatoryString("resDirPath", resDirPath);
        int nbOfHacks = 0;
        if (this.annotations != null) {
            if (this.hTokens != null) {
                this.annotations.add(0, "$tokens$");
                this.annotations.add(1, this.hTokens);
                ++nbOfHacks;
            }
            if (this.listOfResources != null) {
                this.annotations.add(0, "$resources$");
                this.annotations.add(1, this.listOfResources);
                ++nbOfHacks;
            }
            this.annotations.add(0, "$multiplier$");
            this.annotations.add(1, null);
            ++nbOfHacks;
        }
        File resDir = new File(resDirPath);
        resDir.mkdir();
        File corpusDir = new File(resDirPath, CORPUS_BIN);
        String corpusDirFullName = corpusDir.getName();
        try {
            FileOutputStream fileOutputStream = new FileOutputStream(corpusDir);
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(fileOutputStream);
            objectOutputStream.writeObject(this);
            objectOutputStream.flush();
            objectOutputStream.close();
            fileOutputStream.close();
        }
        catch (RuntimeException e) {
            System.out.println("NooJ: Cannot save corpus in file " + corpusDirFullName + ":\n" + e.getMessage());
            return;
        }
        if (nbOfHacks > 0) {
            int i = 0;
            while (i < nbOfHacks) {
                this.annotations.remove(0);
                this.annotations.remove(0);
                ++i;
            }
        }
    }

    public static Corpus load(String corpusName, String languageName) throws IOException, ClassNotFoundException {
        Corpus corpus;
        ParameterCheck.mandatoryString("corpusName", corpusName);
        ParameterCheck.mandatoryString("languageName", languageName);
        String crpDirName = String.valueOf(corpusName) + "_dir";
        File corpusDir = new File(crpDirName);
        File corpusFile = new File(corpusName);
        if (corpusDir.exists()) {
            Date corpusDate;
            Date corpusDirDate;
            if (corpusDir.isDirectory() && (corpusDirDate = new Date(corpusDir.lastModified())).before(corpusDate = new Date(corpusFile.lastModified()))) {
                corpusDir.delete();
            }
        } else {
            try {
                Zip.uncompressDir(crpDirName, corpusName);
            }
            catch (RuntimeException ex) {
                System.out.println("NooJ cannot load corrupted corpus file: " + ex.getMessage());
                if (new File(crpDirName).isDirectory()) {
                    try {
                        corpusDir.delete();
                    }
                    catch (Exception corpusDate) {
                        // empty catch block
                    }
                }
                return null;
            }
        }
        if ((corpus = Corpus.loadIn(crpDirName, languageName)) == null) {
            System.out.println("NooJ cannot load corrupted corpus file in folder: " + crpDirName);
            return null;
        }
        corpus.listOfFileTexts = new ArrayList();
        File[] fileArray = corpusDir.listFiles();
        int n = fileArray.length;
        int n2 = 0;
        while (n2 < n) {
            File fileFromCorpusDir = fileArray[n2];
            String fileFromCorpusDirName = fileFromCorpusDir.getName();
            String ext = FilenameUtils.getExtension(fileFromCorpusDirName);
            if (ext.equals("jnot") && fileFromCorpusDir.isFile()) {
                corpus.listOfFileTexts.add(fileFromCorpusDirName);
            }
            ++n2;
        }
        return corpus;
    }

    private static void computehLexemes(Corpus crp) {
        ParameterCheck.mandatory("crp", crp);
        crp.hLexemes = new HashMap();
        crp.hPhrases = new HashMap();
        crp.hUnknowns = new HashMap();
        if (crp.annotations.size() > 0) {
            int i = 0;
            while (i < crp.annotations.size()) {
                String lex = (String)crp.annotations.get(i);
                if (lex != null) {
                    String entry = null;
                    RefObject<Object> entryBuilder = new RefObject<Object>(entry);
                    String lemma = null;
                    RefObject<Object> lemmaBuilder = new RefObject<Object>(lemma);
                    String info = null;
                    RefObject<Object> infoBuilder = new RefObject<Object>(info);
                    boolean tempVar = !Dic.parseDELAF(lex, entryBuilder, lemmaBuilder, infoBuilder);
                    entry = (String)entryBuilder.argvalue;
                    lemma = (String)lemmaBuilder.argvalue;
                    info = (String)infoBuilder.argvalue;
                    if (tempVar) {
                        Dic.writeLog("Error: invalid annotation: \"" + lex + "\"");
                    }
                    if (info.equals("UNKNOWN")) {
                        if (!crp.hUnknowns.containsKey(lex)) {
                            crp.hUnknowns.put(lex, i);
                        }
                    } else if (lemma.equals("SYNTAX")) {
                        if (!crp.hPhrases.containsKey(lex)) {
                            crp.hPhrases.put(lex, i);
                        }
                    } else if (!crp.hLexemes.containsKey(lex)) {
                        crp.hLexemes.put(lex, i);
                    }
                }
                ++i;
            }
        }
    }

    private static Corpus loadIn(String resDirPath, String languagename) throws IOException, ClassNotFoundException {
        ParameterCheck.mandatoryString("resDirPath", resDirPath);
        FileInputStream fileInputStream = null;
        Corpus crp = null;
        File corpusDir = new File(resDirPath, CORPUS_BIN);
        String corpusDirFullName = corpusDir.getName();
        try {
            fileInputStream = new FileInputStream(corpusDir);
            ObjectInputStream objectInputStream = new ObjectInputStream(fileInputStream);
            crp = (Corpus)objectInputStream.readObject();
            fileInputStream.close();
            crp.lan = new Language(crp.languageName);
        }
        catch (RuntimeException ex) {
            System.out.println("NooJ: Cannot load corpus file " + corpusDirFullName + "\n" + ex.getMessage());
            if (fileInputStream != null) {
                fileInputStream.close();
            }
            return null;
        }
        if (crp.lan != null) {
            if (crp.annotations != null && crp.annotations.size() > 0) {
                String keyword;
                crp.multiplier = 1.0;
                do {
                    if ((keyword = (String)crp.annotations.get(0)) == null) continue;
                    if (keyword.equals("$colors$")) {
                        crp.annotations.remove(0);
                        crp.annotations.remove(0);
                        continue;
                    }
                    if (keyword.equals("$multiplier$")) {
                        crp.multiplier = 100.0;
                        crp.annotations.remove(0);
                        crp.annotations.remove(0);
                        continue;
                    }
                    if (keyword.equals("$resources$")) {
                        crp.listOfResources = (ArrayList)crp.annotations.get(1);
                        crp.annotations.remove(0);
                        crp.annotations.remove(0);
                        continue;
                    }
                    if (!keyword.equals("$tokens$")) continue;
                    crp.hTokens = (HashMap)crp.annotations.get(1);
                    crp.annotations.remove(0);
                    crp.annotations.remove(0);
                } while (crp.annotations.size() > 0 && keyword != null && (keyword.equals("$tokens$") || keyword.equals("$colors$") || keyword.equals("$multiplier$") || keyword.equals("$resources$")));
            }
            if (crp.annotations != null) {
                Corpus.computehLexemes(crp);
            }
            return crp;
        }
        return null;
    }

    public boolean getRidOfUnusedAnnotationsForCorpus(String corpusFullName) {
        boolean[] exist = new boolean[this.annotations.size()];
        int i = 0;
        while (i < exist.length) {
            exist[i] = false;
            ++i;
        }
        for (String fName : this.listOfFileTexts) {
            String fullName = String.valueOf(corpusFullName) + "_dir" + System.getProperty("file.separator") + fName;
            try {
                Ntext myText = Ntext.loadForCorpus(fullName, this.lan, this.multiplier);
                if (myText == null) continue;
                int tuNb = 1;
                while (tuNb <= myText.mft.tuAddresses.length - 1) {
                    ArrayList<TransitionObject> transitions = myText.mft.aTransitions.get(tuNb);
                    int it = 0;
                    while (it < transitions.size()) {
                        ArrayList<TransitionPair> outgoings = transitions.get(it).getOutgoings();
                        int io = 0;
                        while (io < outgoings.size()) {
                            int tokenId = outgoings.get(io).getTokenId();
                            if (tokenId < exist.length) {
                                exist[tokenId] = true;
                            }
                            ++io;
                        }
                        ++it;
                    }
                    ++tuNb;
                }
            }
            catch (IOException e) {
                JOptionPane.showMessageDialog(Launcher.getDesktopPane(), e.getMessage(), "Error while loading text for corpus!", 0);
                return false;
            }
        }
        int ia = 0;
        while (ia < this.annotations.size()) {
            Object info;
            RefObject<Object> infoRef;
            Object entry;
            RefObject<Object> entryRef;
            String label;
            if (!exist[ia] && (label = (String)this.annotations.get(ia)) != null && Dic.parseDELAS(label, entryRef = new RefObject<Object>((entry = null)), infoRef = new RefObject<Object>((info = null)))) {
                this.annotations.set(ia, null);
            }
            ++ia;
        }
        return true;
    }
}

