/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.postag;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import opennlp.maxent.GIS;
import opennlp.maxent.GISModel;
import opennlp.maxent.PlainTextByLineDataStream;
import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
import opennlp.model.AbstractModel;
import opennlp.model.EventStream;
import opennlp.model.SequenceStream;
import opennlp.model.TwoPassDataIndexer;
import opennlp.perceptron.PerceptronTrainer;
import opennlp.perceptron.SimplePerceptronSequenceTrainer;
import opennlp.perceptron.SuffixSensitivePerceptronModelWriter;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.ngram.NGramModel;
import opennlp.tools.postag.DefaultPOSContextGenerator;
import opennlp.tools.postag.POSDictionary;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSSample;
import opennlp.tools.postag.POSSampleEventStream;
import opennlp.tools.postag.POSSampleSequenceStream;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.postag.TagDictionary;
import opennlp.tools.postag.WordTagSampleStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.StringList;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
@Deprecated
public class POSTaggerTrainer {
    @Deprecated
    private static void usage() {
        System.err.println("Usage: POSTaggerTrainer [-encoding encoding] [-dict dict_file] -model [perceptron,maxnet] training_data model_file_name [cutoff] [iterations]");
        System.err.println("This trains a new model on the specified training file and writes the trained model to the model file.");
        System.err.println("-encoding Specifies the encoding of the training file");
        System.err.println("-dict Specifies that a dictionary file should be created for use in distinguising between rare and non-rare words");
        System.err.println("-model [perceptron|maxent] Specifies what type of model should be used.");
        System.exit(1);
    }

    public static POSModel train(String languageCode, ObjectStream<POSSample> samples, POSDictionary tagDictionary, Dictionary ngramDictionary, int cutoff, int iterations) throws IOException {
        GISModel posModel = GIS.trainModel(iterations, new TwoPassDataIndexer(new POSSampleEventStream(samples, new DefaultPOSContextGenerator(ngramDictionary)), cutoff));
        return new POSModel(languageCode, posModel, tagDictionary, ngramDictionary);
    }

    @Deprecated
    public static void trainMaxentModel(EventStream evc, File modelFile) throws IOException {
        AbstractModel model = POSTaggerTrainer.trainMaxentModel(evc, 100, 5);
        new SuffixSensitiveGISModelWriter(model, modelFile).persist();
    }

    @Deprecated
    public static AbstractModel trainMaxentModel(EventStream es, int iterations, int cut) throws IOException {
        return GIS.trainModel(iterations, new TwoPassDataIndexer(es, cut));
    }

    public static AbstractModel trainPerceptronModel(EventStream es, int iterations, int cut, boolean useAverage) throws IOException {
        return new PerceptronTrainer().trainModel(iterations, new TwoPassDataIndexer(es, cut, false), cut, useAverage);
    }

    public static AbstractModel trainPerceptronModel(EventStream es, int iterations, int cut) throws IOException {
        return POSTaggerTrainer.trainPerceptronModel(es, iterations, cut, true);
    }

    public static AbstractModel trainPerceptronSequenceModel(SequenceStream ss, int iterations, int cut, boolean useAverage) throws IOException {
        return new SimplePerceptronSequenceTrainer().trainModel(iterations, ss, cut, useAverage);
    }

    @Deprecated
    public static void test(AbstractModel model) throws IOException {
        POSTaggerME tagger = new POSTaggerME(model, (TagDictionary)null);
        BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
        String line = in.readLine();
        while (line != null) {
            System.out.println(tagger.tag(line));
            line = in.readLine();
        }
    }

    @Deprecated
    public static void main(String[] args) throws IOException {
        if (args.length == 0) {
            POSTaggerTrainer.usage();
        }
        int ai = 0;
        try {
            String encoding = null;
            String dict = null;
            boolean perceptron = false;
            boolean sequence = false;
            while (args[ai].startsWith("-")) {
                if (args[ai].equals("-encoding")) {
                    if (++ai < args.length) {
                        encoding = args[ai++];
                        continue;
                    }
                    POSTaggerTrainer.usage();
                    continue;
                }
                if (args[ai].equals("-dict")) {
                    if (++ai < args.length) {
                        dict = args[ai++];
                        continue;
                    }
                    POSTaggerTrainer.usage();
                    continue;
                }
                if (args[ai].equals("-sequence")) {
                    ++ai;
                    sequence = true;
                    continue;
                }
                if (args[ai].equals("-model")) {
                    if (++ai < args.length) {
                        String type;
                        if ((type = args[ai++]).equals("perceptron")) {
                            perceptron = true;
                            continue;
                        }
                        if (type.equals("maxent")) continue;
                        POSTaggerTrainer.usage();
                        continue;
                    }
                    POSTaggerTrainer.usage();
                    continue;
                }
                System.err.println("Unknown option " + args[ai]);
                POSTaggerTrainer.usage();
            }
            File inFile = new File(args[ai++]);
            File outFile = new File(args[ai++]);
            int cutoff = 5;
            int iterations = 100;
            if (args.length > ai) {
                cutoff = Integer.parseInt(args[ai++]);
                iterations = Integer.parseInt(args[ai++]);
            }
            if (dict != null) {
                POSTaggerTrainer.buildDictionary(dict, inFile, cutoff);
            }
            if (sequence) {
                POSSampleSequenceStream ss;
                if (encoding == null) {
                    if (dict == null) {
                        ss = new POSSampleSequenceStream(new WordTagSampleStream(new InputStreamReader(new FileInputStream(inFile))));
                    } else {
                        DefaultPOSContextGenerator cg = new DefaultPOSContextGenerator(new Dictionary(new FileInputStream(dict)));
                        ss = new POSSampleSequenceStream(new WordTagSampleStream(new InputStreamReader(new FileInputStream(inFile))), cg);
                    }
                } else if (dict == null) {
                    ss = new POSSampleSequenceStream(new WordTagSampleStream(new InputStreamReader((InputStream)new FileInputStream(inFile), encoding)));
                } else {
                    DefaultPOSContextGenerator cg = new DefaultPOSContextGenerator(new Dictionary(new FileInputStream(dict)));
                    ss = new POSSampleSequenceStream(new WordTagSampleStream(new InputStreamReader((InputStream)new FileInputStream(inFile), encoding)), cg);
                }
                AbstractModel mod = new SimplePerceptronSequenceTrainer().trainModel(iterations, ss, cutoff, true);
                System.out.println("Saving the model as: " + outFile);
                new SuffixSensitivePerceptronModelWriter(mod, outFile).persist();
            } else {
                POSSampleEventStream es;
                if (encoding == null) {
                    if (dict == null) {
                        es = new POSSampleEventStream(new WordTagSampleStream(new InputStreamReader(new FileInputStream(inFile))));
                    } else {
                        DefaultPOSContextGenerator cg = new DefaultPOSContextGenerator(new Dictionary(new FileInputStream(dict)));
                        es = new POSSampleEventStream(new WordTagSampleStream(new InputStreamReader(new FileInputStream(inFile))), cg);
                    }
                } else if (dict == null) {
                    es = new POSSampleEventStream(new WordTagSampleStream(new InputStreamReader((InputStream)new FileInputStream(inFile), encoding)));
                } else {
                    DefaultPOSContextGenerator cg = new DefaultPOSContextGenerator(new Dictionary(new FileInputStream(dict)));
                    es = new POSSampleEventStream(new WordTagSampleStream(new InputStreamReader((InputStream)new FileInputStream(inFile), encoding)), cg);
                }
                if (perceptron) {
                    AbstractModel mod = POSTaggerTrainer.trainPerceptronModel(es, iterations, cutoff);
                    System.out.println("Saving the model as: " + outFile);
                    new SuffixSensitivePerceptronModelWriter(mod, outFile).persist();
                } else {
                    AbstractModel mod = POSTaggerTrainer.trainMaxentModel(es, iterations, cutoff);
                    System.out.println("Saving the model as: " + outFile);
                    new SuffixSensitiveGISModelWriter(mod, outFile).persist();
                }
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static void buildDictionary(String dict, File inFile, int cutoff) throws FileNotFoundException, IOException {
        System.err.println("Building dictionary");
        NGramModel ngramModel = new NGramModel();
        PlainTextByLineDataStream data = new PlainTextByLineDataStream(new FileReader(inFile));
        while (data.hasNext()) {
            String tagStr = (String)data.nextToken();
            String[] tt = tagStr.split(" ");
            String[] words = new String[tt.length];
            for (int wi = 0; wi < words.length; ++wi) {
                words[wi] = tt[wi].substring(0, tt[wi].lastIndexOf(95));
            }
            ngramModel.add(new StringList(words), 1, 1);
        }
        System.out.println("Saving the dictionary");
        ngramModel.cutoff(cutoff, Integer.MAX_VALUE);
        Dictionary dictionary = ngramModel.toDictionary(true);
        dictionary.serialize(new FileOutputStream(dict));
    }
}

