/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.patterns.surface;

import edu.stanford.nlp.classify.Classifier;
import edu.stanford.nlp.classify.GeneralDataset;
import edu.stanford.nlp.classify.LogisticClassifier;
import edu.stanford.nlp.classify.LogisticClassifierFactory;
import edu.stanford.nlp.classify.RVFDataset;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.RVFDatum;
import edu.stanford.nlp.patterns.CandidatePhrase;
import edu.stanford.nlp.patterns.DataInstance;
import edu.stanford.nlp.patterns.PatternsAnnotations;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.util.CollectionValuedMap;
import edu.stanford.nlp.util.Execution;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Random;

public class LearnImportantFeatures {
    @Execution.Option(name="answerClass")
    public Class answerClass = CoreAnnotations.AnswerAnnotation.class;
    @Execution.Option(name="answerLabel")
    public String answerLabel = "WORD";
    @Execution.Option(name="wordClassClusterFile")
    String wordClassClusterFile = null;
    @Execution.Option(name="thresholdWeight")
    Double thresholdWeight = null;
    Map<String, Integer> clusterIds = new HashMap<String, Integer>();
    CollectionValuedMap<Integer, String> clusters = new CollectionValuedMap();
    @Execution.Option(name="negativeWordsFiles")
    String negativeWordsFiles = null;
    HashSet<String> negativeWords = new HashSet();

    public void setUp() {
        assert (this.wordClassClusterFile != null);
        if (this.wordClassClusterFile != null) {
            for (String line : IOUtils.readLines(this.wordClassClusterFile)) {
                String[] t = line.split("\\s+");
                int num = Integer.parseInt(t[1]);
                this.clusterIds.put(t[0], num);
                this.clusters.add(num, t[0]);
            }
        }
        if (this.negativeWordsFiles != null) {
            for (String file : this.negativeWordsFiles.split("[,;]")) {
                this.negativeWords.addAll(IOUtils.linesFromFile(file));
            }
            System.out.println("number of negative words from lists " + this.negativeWords.size());
        }
    }

    public static boolean getRandomBoolean(Random random, double p) {
        return (double)random.nextFloat() < p;
    }

    private int sample(Map<String, DataInstance> sents, Random r, Random rneg, double perSelectNeg, double perSelectRand, int numrand, List<Pair<String, Integer>> chosen, RVFDataset<String, String> dataset) {
        for (Map.Entry<String, DataInstance> en : sents.entrySet()) {
            CoreLabel[] sent = en.getValue().getTokens().toArray(new CoreLabel[0]);
            for (int i = 0; i < sent.length; ++i) {
                CoreLabel l = sent[i];
                boolean chooseThis = false;
                if (l.get(this.answerClass).equals(this.answerLabel)) {
                    chooseThis = true;
                } else if ((!l.get(this.answerClass).equals("O") || this.negativeWords.contains(l.word().toLowerCase())) && LearnImportantFeatures.getRandomBoolean(r, perSelectNeg)) {
                    chooseThis = true;
                } else if (LearnImportantFeatures.getRandomBoolean(r, perSelectRand)) {
                    ++numrand;
                    chooseThis = true;
                } else {
                    chooseThis = false;
                }
                if (!chooseThis) continue;
                chosen.add(new Pair<String, Integer>(en.getKey(), i));
                RVFDatum<String, String> d = this.getDatum(sent, i);
                dataset.add(d, en.getKey(), Integer.toString(i));
            }
        }
        return numrand;
    }

    public Counter<String> getTopFeatures(Iterator<Pair<Map<String, DataInstance>, File>> sentsf, double perSelectRand, double perSelectNeg, String externalFeatureWeightsFileLabel) throws IOException, ClassNotFoundException {
        ClassicCounter<String> features = new ClassicCounter<String>();
        RVFDataset<String, String> dataset = new RVFDataset<String, String>();
        Random r = new Random(10L);
        Random rneg = new Random(10L);
        int numrand = 0;
        ArrayList<Pair<String, Integer>> chosen = new ArrayList<Pair<String, Integer>>();
        while (sentsf.hasNext()) {
            Pair<Map<String, DataInstance>, File> sents = sentsf.next();
            numrand = this.sample(sents.first(), r, rneg, perSelectNeg, perSelectRand, numrand, chosen, dataset);
        }
        System.out.println("num random chosen: " + numrand);
        System.out.println("Number of datums per label: " + dataset.numDatumsPerLabel());
        LogisticClassifierFactory logfactory = new LogisticClassifierFactory();
        Classifier classifier = logfactory.trainClassifier((GeneralDataset)dataset);
        Counter weights = ((LogisticClassifier)classifier).weightsAsCounter();
        if (!((String)((LogisticClassifier)classifier).getLabelForInternalPositiveClass()).equals(this.answerLabel)) {
            weights = Counters.scale(weights, -1.0);
        }
        if (this.thresholdWeight != null) {
            HashSet removeKeys = new HashSet();
            for (Map.Entry en : weights.entrySet()) {
                if (!(Math.abs(en.getValue()) <= this.thresholdWeight)) continue;
                removeKeys.add(en.getKey());
            }
            Counters.removeKeys(weights, removeKeys);
            System.out.println("Removing " + removeKeys);
        }
        IOUtils.writeStringToFile(Counters.toSortedString(weights, weights.size(), "%1$s:%2$f", "\n"), externalFeatureWeightsFileLabel, "utf8");
        return features;
    }

    private RVFDatum<String, String> getDatum(CoreLabel[] sent, int i) {
        CoreLabel lj;
        int j;
        ClassicCounter<String> feat = new ClassicCounter<String>();
        CoreLabel l = sent[i];
        String label = l.get(this.answerClass).toString().equals(this.answerLabel) ? this.answerLabel : "O";
        CollectionValuedMap<String, CandidatePhrase> matchedPhrases = (CollectionValuedMap<String, CandidatePhrase>)l.get(PatternsAnnotations.MatchedPhrases.class);
        if (matchedPhrases == null) {
            matchedPhrases = new CollectionValuedMap<String, CandidatePhrase>();
            matchedPhrases.add(label, CandidatePhrase.createOrGet(l.word()));
        }
        for (CandidatePhrase w : matchedPhrases.allValues()) {
            Integer num = this.clusterIds.get(w.getPhrase());
            if (num == null) {
                num = -1;
            }
            feat.setCount("Cluster-" + num, 1.0);
        }
        int window = 0;
        for (j = Math.max(0, i - window); j < i; ++j) {
            lj = sent[j];
            feat.incrementCount("PREV-WORD-" + lj.word());
            feat.incrementCount("PREV-LEMMA-" + lj.lemma());
            feat.incrementCount("PREV-TAG-" + lj.tag());
        }
        for (j = i + 1; j < sent.length && j <= i + window; ++j) {
            lj = sent[j];
            feat.incrementCount("NEXT-WORD-" + lj.word());
            feat.incrementCount("NEXT-LEMMA-" + lj.lemma());
            feat.incrementCount("NEXT-TAG-" + lj.tag());
        }
        return new RVFDatum<String, String>(feat, label);
    }

    public static void main(String[] args) {
        try {
            LearnImportantFeatures lmf = new LearnImportantFeatures();
            Properties props = StringUtils.argsToPropertiesWithResolve(args);
            Execution.fillOptions((Object)lmf, props);
            lmf.setUp();
            String sentsFile = props.getProperty("sentsFile");
            Map sents = (Map)IOUtils.readObjectFromFile(sentsFile);
            System.out.println("Read the sents file: " + sentsFile);
            double perSelectRand = Double.parseDouble(props.getProperty("perSelectRand"));
            double d = Double.parseDouble(props.getProperty("perSelectNeg"));
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

