/*
 * Decompiled with CFR 0.152.
 */
package com.wcohen.ss.abbvGapsHmm;

import com.wcohen.ss.abbvGapsHmm.AbbreviationAlignmentContainer;
import com.wcohen.ss.abbvGapsHmm.AbbvGapsHMM;
import com.wcohen.ss.abbvGapsHmm.Acronym;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class AlignmentPredictionModel {
    public static final String SEPARATOR = "#_#";
    public static String _trainingDataDir;
    public static String _trueLabelsFile;
    public static String _trainingCorpusFile;
    private AbbvGapsHMM _abbvHmm = new AbbvGapsHMM();

    public AlignmentPredictionModel() throws IOException {
        this.setTrainingDataDir("train/");
    }

    public void setTrainingDataDir(String trainDir) {
        _trainingDataDir = trainDir;
        _trueLabelsFile = _trainingDataDir + "abbvAlign_pairs.txt";
        _trainingCorpusFile = _trainingDataDir + "abbvAlign_corpus.txt";
    }

    public void setTfIdfData(String dataFile) throws IOException {
        this._abbvHmm.setTfIdfData(dataFile);
    }

    public void setModelParamsFile(String paramFilename) {
        this._abbvHmm.setParamFile(paramFilename);
    }

    public void setModelParamsFile() {
        this.setModelParamsFile("hmmModelParams.txt");
    }

    public static ArrayList<Map<String, String>> loadLabels(String labelsFile) {
        if (labelsFile == null) {
            return null;
        }
        ArrayList labels = null;
        try {
            String docLine;
            BufferedReader fi = new BufferedReader(new FileReader(labelsFile));
            labels = new ArrayList();
            while ((docLine = fi.readLine()) != null) {
                HashMap<String, String> docAcronymMap = new HashMap<String, String>();
                String[] acronyms = docLine.split(SEPARATOR);
                for (int i = 0; i < acronyms.length; ++i) {
                    String singleAcronym = acronyms[i];
                    if (singleAcronym.isEmpty()) continue;
                    String[] parts = singleAcronym.split("\t");
                    if (parts.length != 2) {
                        System.out.println("BAD FORMAT in " + labelsFile + ": " + singleAcronym);
                        continue;
                    }
                    docAcronymMap.put(parts[0].trim(), parts[1].trim());
                }
                labels.add(docAcronymMap);
            }
            fi.close();
        }
        catch (Exception e) {
            e.printStackTrace();
            System.exit(1);
        }
        return labels;
    }

    public static List<String> loadTrainingCorpus(String corpusFile) {
        ArrayList<String> trainingCorpus = null;
        try {
            String line;
            trainingCorpus = new ArrayList<String>();
            BufferedReader fi = new BufferedReader(new FileReader(corpusFile));
            while ((line = fi.readLine()) != null) {
                trainingCorpus.add(line);
            }
            fi.close();
        }
        catch (Exception e) {
            e.printStackTrace();
            System.exit(1);
        }
        return trainingCorpus;
    }

    public boolean trainOnAll() {
        ArrayList<Map<String, String>> trueLabels = AlignmentPredictionModel.loadLabels(_trueLabelsFile);
        List<String> corpus = AlignmentPredictionModel.loadTrainingCorpus(_trainingCorpusFile);
        ArrayList<List<Acronym>> trainingExtractedCandidates = new ArrayList<List<Acronym>>();
        ArrayList<Map<String, String>> trueLabelsForTraining = new ArrayList<Map<String, String>>();
        Integer docID = 0;
        while (docID < corpus.size()) {
            trainingExtractedCandidates.add(this.extractCandidatePairs(corpus.get(docID)));
            trueLabelsForTraining.add((Map<String, String>)trueLabels.get(docID));
            docID = docID + 1;
        }
        return this._abbvHmm.train(trainingExtractedCandidates, trueLabelsForTraining, true);
    }

    public boolean trainOnCandidates() {
        List<String> corpus = AlignmentPredictionModel.loadTrainingCorpus(_trainingCorpusFile);
        ArrayList<List<Acronym>> trainingExtractedCandidates = new ArrayList<List<Acronym>>();
        Integer docID = 0;
        while (docID < corpus.size()) {
            trainingExtractedCandidates.add(this.extractCandidatePairs(corpus.get(docID)));
            docID = docID + 1;
        }
        return this._abbvHmm.train(trainingExtractedCandidates, null, true);
    }

    public boolean train(List<String> corpus, List<Integer> trainingSet, List<Map<String, String>> trueLabels) {
        ArrayList<List<Acronym>> trainingExtractedCandidates = new ArrayList<List<Acronym>>();
        ArrayList<Map<String, String>> trueLabelsForTraining = new ArrayList<Map<String, String>>();
        if (trainingSet != null) {
            for (Integer docID : trainingSet) {
                trainingExtractedCandidates.add(this.extractCandidatePairs(corpus.get(docID)));
                trueLabelsForTraining.add(trueLabels.get(docID));
            }
        } else {
            for (int docID = 0; docID < corpus.size(); ++docID) {
                trainingExtractedCandidates.add(this.extractCandidatePairs(corpus.get(docID)));
                trueLabelsForTraining.add(trueLabels.get(docID));
            }
        }
        return this._abbvHmm.train(trainingExtractedCandidates, trueLabelsForTraining, true);
    }

    public AbbreviationAlignmentContainer<AbbvGapsHMM.Emissions, AbbvGapsHMM.States> predict(String sf, String lf) {
        return this.predictAlignment(new Acronym(sf, lf));
    }

    public AbbreviationAlignmentContainer<AbbvGapsHMM.Emissions, AbbvGapsHMM.States> predictAlignment(Acronym candidatePair) {
        return this._abbvHmm.viterbi(candidatePair);
    }

    public Acronym predict(Acronym candidatePair) {
        AbbreviationAlignmentContainer<AbbvGapsHMM.Emissions, AbbvGapsHMM.States> alignment = this.predictAlignment(candidatePair);
        Acronym currAcronym = null;
        try {
            if (alignment == null) {
                return null;
            }
            currAcronym = alignment.getAcronym();
            if (currAcronym != null) {
                AbbreviationAlignmentContainer<AbbvGapsHMM.Emissions, AbbvGapsHMM.States> acronymAlignment = this.predictAlignment(currAcronym);
                currAcronym._probability = acronymAlignment.getProbability();
                currAcronym._alignment = alignment;
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        return currAcronym;
    }

    public Map<String, Acronym> acronymsArrayToMap(Collection<Acronym> pairs) {
        HashMap<String, Acronym> out = new HashMap<String, Acronym>();
        for (Acronym acronymPair : pairs) {
            if (out.containsKey(acronymPair._shortForm)) {
                Acronym prevAcronym = (Acronym)out.get(acronymPair._shortForm);
                if (acronymPair._probability == null || prevAcronym._probability == null || acronymPair._probability.compareTo(prevAcronym._probability) <= 0) continue;
                out.put(acronymPair._shortForm, acronymPair);
                continue;
            }
            out.put(acronymPair._shortForm, acronymPair);
        }
        return out;
    }

    public Collection<Acronym> predict(String text) {
        List<Acronym> candidates = this.extractCandidatePairs(text);
        ArrayList<Acronym> predictions = new ArrayList<Acronym>();
        for (Acronym candidateAcronym : candidates) {
            Acronym currPrediction = this.predict(candidateAcronym);
            if (currPrediction == null) continue;
            predictions.add(currPrediction);
        }
        return predictions;
    }

    public boolean trainIfNeeded() {
        if (!this._abbvHmm.loadModelParams()) {
            return this.trainOnCandidates();
        }
        return true;
    }

    public List<Acronym> extractCandidatePairs(String text) {
        ArrayList<Acronym> extractedPairs = new ArrayList<Acronym>();
        extractedPairs.addAll(this.extractSingleAcronyms(text));
        extractedPairs.addAll(this.extractPatternAcronyms(text));
        return extractedPairs;
    }

    protected List<Acronym> extractPatternAcronyms(String text) {
        ArrayList<Acronym> extractedPairs = new ArrayList<Acronym>();
        extractedPairs.addAll(this.extractHeadNounPattern_2Parts(text));
        extractedPairs.addAll(this.extractHeadNounPattern_3Parts(text));
        extractedPairs.addAll(this.extractTrailingNounPattern_2Parts(text));
        extractedPairs.addAll(this.extractTrailingNounPattern_3Parts(text));
        return extractedPairs;
    }

    protected void addCandidatePair(List<Acronym> allPairs, String longFormCandidate, String shortFormCandidate) {
        Acronym pair = this.parseCandidate(longFormCandidate, shortFormCandidate);
        if (pair != null && !pair._shortForm.isEmpty()) {
            allPairs.add(pair);
        }
    }

    protected List<Acronym> extractHeadNounPattern_3Parts(String text) {
        ArrayList<Acronym> extractedPairs = new ArrayList<Acronym>();
        String nounExp = "([a-zA-Z0-9\\-]{1,20})";
        String shortFormExp = "\\(([^\\(]*?)\\)";
        Matcher matcher = Pattern.compile(nounExp + " " + nounExp + " " + shortFormExp + ",? " + nounExp + " " + shortFormExp + ",? and " + nounExp + " " + shortFormExp).matcher(text);
        int startPos = 0;
        while (startPos < text.length() && matcher.find(startPos)) {
            String mainNoun = matcher.group(1);
            String part1 = matcher.group(2);
            String part1_short = matcher.group(3);
            String part2 = matcher.group(4);
            String part2_short = matcher.group(5);
            String part3 = matcher.group(6);
            String part3_short = matcher.group(7);
            startPos = matcher.regionEnd() + 1;
            this.addCandidatePair(extractedPairs, mainNoun + " " + part1, part1_short);
            this.addCandidatePair(extractedPairs, mainNoun + " " + part2, part2_short);
            this.addCandidatePair(extractedPairs, mainNoun + " " + part3, part3_short);
        }
        return extractedPairs;
    }

    protected List<Acronym> extractHeadNounPattern_2Parts(String text) {
        ArrayList<Acronym> extractedPairs = new ArrayList<Acronym>();
        String nounExp = "([a-zA-Z0-9\\-]{1,20})";
        String shortFormExp = "\\(([^\\(]*?)\\)";
        Matcher matcher = Pattern.compile(nounExp + " " + nounExp + " " + shortFormExp + ",? and " + nounExp + " " + shortFormExp).matcher(text);
        int startPos = 0;
        while (startPos < text.length() && matcher.find(startPos)) {
            String mainNoun = matcher.group(1);
            String part1 = matcher.group(2);
            String part1_short = matcher.group(3);
            String part2 = matcher.group(4);
            String part2_short = matcher.group(5);
            startPos = matcher.regionEnd() + 1;
            this.addCandidatePair(extractedPairs, mainNoun + " " + part1, part1_short);
            this.addCandidatePair(extractedPairs, mainNoun + " " + part2, part2_short);
        }
        return extractedPairs;
    }

    protected List<Acronym> extractTrailingNounPattern_3Parts(String text) {
        ArrayList<Acronym> extractedPairs = new ArrayList<Acronym>();
        String finalNounExp = "([a-zA-Z0-9\\-]{1,20})";
        String nounExp = "(.{1,20}?)";
        String shortFormExp = "\\(([^\\(]*?)\\)";
        Matcher matcher = Pattern.compile(nounExp + " " + shortFormExp + ",? " + nounExp + " " + shortFormExp + ",? and " + nounExp + " " + shortFormExp + " " + finalNounExp).matcher(text);
        int startPos = 0;
        while (startPos < text.length() && matcher.find(startPos)) {
            String part1 = matcher.group(1);
            String part1_short = matcher.group(2);
            String part2 = matcher.group(3);
            String part2_short = matcher.group(4);
            String part3 = matcher.group(5);
            String part3_short = matcher.group(6);
            String mainNoun = matcher.group(7);
            startPos = matcher.regionEnd() + 1;
            this.addCandidatePair(extractedPairs, part1 + " " + mainNoun, part1_short);
            this.addCandidatePair(extractedPairs, part2 + " " + mainNoun, part2_short);
            this.addCandidatePair(extractedPairs, part3 + " " + mainNoun, part3_short);
        }
        return extractedPairs;
    }

    protected List<Acronym> extractTrailingNounPattern_2Parts(String text) {
        ArrayList<Acronym> extractedPairs = new ArrayList<Acronym>();
        String finalNounExp = "([a-zA-Z0-9\\-]{1,20})";
        String nounExp = "(.{1,20}?)";
        String shortFormExp = "\\(([^\\(]*?)\\)";
        Matcher matcher = Pattern.compile(nounExp + " " + shortFormExp + ",? and " + nounExp + " " + shortFormExp + " " + finalNounExp).matcher(text);
        int startPos = 0;
        while (startPos < text.length() && matcher.find(startPos)) {
            String part1 = matcher.group(1);
            String part1_short = matcher.group(2);
            String part2 = matcher.group(3);
            String part2_short = matcher.group(4);
            String mainNoun = matcher.group(5);
            startPos = matcher.regionEnd() + 1;
            this.addCandidatePair(extractedPairs, part1 + " " + mainNoun, part1_short);
            this.addCandidatePair(extractedPairs, part2 + " " + mainNoun, part2_short);
        }
        return extractedPairs;
    }

    protected List<Acronym> extractSingleAcronyms(String text) {
        ArrayList<Acronym> extractedPairs = new ArrayList<Acronym>();
        int iOpen = text.indexOf("(");
        int iClose = -1;
        String mOutOfPar = "";
        String mInPar = "";
        while (iOpen != -1) {
            iClose = -1;
            int numPar = 0;
            for (int p = iOpen + 1; p < text.length(); ++p) {
                if (text.charAt(p) == '(') {
                    ++numPar;
                }
                if (text.charAt(p) != ')') continue;
                if (numPar > 0) {
                    --numPar;
                    continue;
                }
                iClose = p;
                break;
            }
            if (iClose != -1) {
                mInPar = text.substring(iOpen + 1, iClose);
                mOutOfPar = text.substring(0, iOpen).trim();
                this.addCandidatePair(extractedPairs, mOutOfPar, mInPar);
            }
            iOpen = text.indexOf("(", iOpen + 1);
        }
        return extractedPairs;
    }

    protected Acronym parseCandidate(String outOfParenthesis, String inParenthesis) {
        String[] parts;
        int i;
        if (inParenthesis.indexOf(";") != -1) {
            i = inParenthesis.indexOf(";");
            inParenthesis = inParenthesis.substring(0, i);
        }
        if (outOfParenthesis.indexOf(";") != -1) {
            i = outOfParenthesis.indexOf(";");
            outOfParenthesis = outOfParenthesis.substring(i + 1);
        }
        String shortForm = inParenthesis.trim();
        String longForm = outOfParenthesis.trim();
        if (!this.isShortForm(shortForm)) {
            longForm = inParenthesis.trim();
            parts = outOfParenthesis.trim().split(" ");
            shortForm = parts[parts.length - 1];
        }
        if (!this.isValidShortForm(shortForm)) {
            return null;
        }
        if (!this.isValidExpression(shortForm) || !this.isValidExpression(longForm)) {
            return null;
        }
        parts = longForm.split(" ");
        int sfSize = shortForm.length();
        int maxLongFormLength = Math.min(sfSize + 5, sfSize * 2);
        int finalLfSize = Math.min(maxLongFormLength, parts.length);
        String finalLongForm = "";
        for (int i2 = parts.length - 1; i2 > parts.length - finalLfSize - 1; --i2) {
            finalLongForm = parts[i2] + " " + finalLongForm;
        }
        if (shortForm.equalsIgnoreCase(finalLongForm = finalLongForm.trim())) {
            return null;
        }
        return new Acronym(shortForm, finalLongForm);
    }

    protected String chunkLongForm(String longForm, int size) {
        int i;
        int foundWords = 0;
        for (i = longForm.length() - 1; i >= 0 && foundWords < size; --i) {
            if (i != 0 && Character.isLetterOrDigit(longForm.charAt(i - 1))) continue;
            ++foundWords;
        }
        return longForm.substring(i + 1, longForm.length());
    }

    protected boolean isValidExpression(String exp) {
        return exp != null && !exp.isEmpty();
    }

    protected boolean isShortForm(String candidate) {
        String[] parts = candidate.split(" ");
        return parts.length <= 3;
    }

    protected boolean isValidShortForm(String candidate) {
        if (candidate.length() > 15) {
            return false;
        }
        if (candidate.length() < 1) {
            return false;
        }
        if (!Pattern.matches("^[a-zA-Z0-9].*", candidate)) {
            return false;
        }
        return Pattern.matches(".*[a-zA-Z].*", candidate);
    }

    public List<Double> getEmmisions() {
        return this._abbvHmm.getEmmisionParams();
    }

    public List<Double> getTransitions() {
        return this._abbvHmm.getTransitionParams();
    }

    public void setStartingParams(List<Double> emmisions, List<Double> transitions) {
        this._abbvHmm.setStartingParams(emmisions, transitions);
    }
}

