/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.cmdline.tokenizer;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import opennlp.model.TrainUtil;
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.cmdline.params.TrainingToolParams;
import opennlp.tools.cmdline.tokenizer.TrainingParams;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.tokenize.TokenSampleStream;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public final class TokenizerTrainerTool
implements CmdLineTool {
    @Override
    public String getName() {
        return "TokenizerTrainer";
    }

    @Override
    public String getShortDescription() {
        return "trainer for the learnable tokenizer";
    }

    @Override
    public String getHelp() {
        return "Usage: opennlp " + this.getName() + " " + ArgumentParser.createUsage(TrainerToolParams.class);
    }

    static ObjectStream<TokenSample> openSampleData(String sampleDataName, File sampleDataFile, Charset encoding) {
        CmdLineUtil.checkInputFile(sampleDataName + " Data", sampleDataFile);
        FileInputStream sampleDataIn = CmdLineUtil.openInFile(sampleDataFile);
        PlainTextByLineStream lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(), encoding);
        return new TokenSampleStream(lineStream);
    }

    static Dictionary loadDict(File f) throws IOException {
        Dictionary dict = null;
        if (f != null) {
            CmdLineUtil.checkInputFile("abb dict", f);
            dict = new Dictionary(new FileInputStream(f));
        }
        return dict;
    }

    @Override
    public void run(String[] args) {
        TokenizerModel model;
        if (!ArgumentParser.validateArguments(args, TrainerToolParams.class)) {
            System.err.println(this.getHelp());
            throw new TerminateToolException(1);
        }
        TrainerToolParams params = ArgumentParser.parse(args, TrainerToolParams.class);
        TrainingParameters mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false);
        if (mlParams != null) {
            if (!TrainUtil.isValid(mlParams.getSettings())) {
                System.err.println("Training parameters file is invalid!");
                throw new TerminateToolException(-1);
            }
            if (TrainUtil.isSequenceTraining(mlParams.getSettings())) {
                System.err.println("Sequence training is not supported!");
                throw new TerminateToolException(-1);
            }
        }
        File trainingDataInFile = params.getData();
        File modelOutFile = params.getModel();
        CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile);
        ObjectStream<TokenSample> sampleStream = TokenizerTrainerTool.openSampleData("Training", trainingDataInFile, params.getEncoding());
        if (mlParams == null) {
            mlParams = TokenizerTrainerTool.createTrainingParameters(params.getIterations(), params.getCutoff());
        }
        try {
            Dictionary dict = TokenizerTrainerTool.loadDict(params.getAbbDict());
            model = TokenizerME.train(params.getLang(), sampleStream, dict, params.getAlphaNumOpt(), mlParams);
        }
        catch (IOException e) {
            CmdLineUtil.printTrainingIoError(e);
            throw new TerminateToolException(-1);
        }
        finally {
            try {
                sampleStream.close();
            }
            catch (IOException e) {}
        }
        CmdLineUtil.writeModel("tokenizer", modelOutFile, model);
    }

    public static TrainingParameters createTrainingParameters(Integer iterations, Integer cutoff) {
        TrainingParameters mlParams = new TrainingParameters();
        mlParams.put("Algorithm", "MAXENT");
        mlParams.put("Iterations", iterations.toString());
        mlParams.put("Cutoff", cutoff.toString());
        return mlParams;
    }

    static interface TrainerToolParams
    extends TrainingParams,
    TrainingToolParams {
    }
}

