/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.steps.msbatchtranslation;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.okapi.common.Base64;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.EventType;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.annotation.AltTranslationsAnnotation;
import net.sf.okapi.common.annotation.GenericAnnotation;
import net.sf.okapi.common.annotation.GenericAnnotations;
import net.sf.okapi.common.exceptions.OkapiIOException;
import net.sf.okapi.common.filterwriter.TMXWriter;
import net.sf.okapi.common.pipeline.BasePipelineStep;
import net.sf.okapi.common.pipeline.annotations.StepParameterMapping;
import net.sf.okapi.common.pipeline.annotations.StepParameterType;
import net.sf.okapi.common.query.QueryResult;
import net.sf.okapi.common.resource.ISegments;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.MultiEvent;
import net.sf.okapi.common.resource.PipelineParameters;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.common.resource.Segment;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextUnitUtil;
import net.sf.okapi.connectors.microsoft.MicrosoftMTConnector;
import net.sf.okapi.steps.msbatchtranslation.Parameters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@UsingParameters(value=Parameters.class)
public class MSBatchTranslationStep
extends BasePipelineStep {
    private final Logger logger = LoggerFactory.getLogger(this.getClass());
    public static final String DOMAINVAR = "${domain}";
    private static final int MAXEVENTS = 20;
    private Parameters params = new Parameters();
    private TMXWriter tmxWriter;
    private LinkedList<Event> events;
    private int maxEvents = 20;
    private MicrosoftMTConnector conn;
    private LocaleId sourceLocale;
    private LocaleId targetLocale;
    private String rootDir;
    private String inputRootDir;
    private Map<String, String> attributes;
    private boolean needReset;
    private boolean sendTmx;
    private String tmxOutputPath;
    private int batchInputCount;
    private int count;
    private String tempCategory;
    private String computedCategory;

    private void closeAndClean() {
        if (this.tmxWriter != null) {
            this.tmxWriter.writeEndDocument();
            this.tmxWriter.close();
            this.tmxWriter = null;
        }
        if (this.events != null) {
            this.events.clear();
            this.events = null;
        }
    }

    @Override
    public String getDescription() {
        return "Annotates text units with Microsoft Translator matches or/and creates a TM from them. Expects: filter events. Sends back: filter events.";
    }

    @Override
    public String getName() {
        return "Microsoft Batch Translation";
    }

    @StepParameterMapping(parameterType=StepParameterType.SOURCE_LOCALE)
    public void setSourceLocale(LocaleId sourceLocale) {
        this.sourceLocale = sourceLocale;
    }

    @StepParameterMapping(parameterType=StepParameterType.TARGET_LOCALE)
    public void setTargetLocale(LocaleId targetLocale) {
        this.targetLocale = targetLocale;
    }

    @StepParameterMapping(parameterType=StepParameterType.ROOT_DIRECTORY)
    public void setRootDirectory(String rootDir) {
        this.rootDir = rootDir;
    }

    @StepParameterMapping(parameterType=StepParameterType.INPUT_ROOT_DIRECTORY)
    public void setInputRootDirectory(String inputRootDir) {
        this.inputRootDir = inputRootDir;
    }

    @StepParameterMapping(parameterType=StepParameterType.BATCH_INPUT_COUNT)
    public void setBatchInputCount(int batchInputCount) {
        this.batchInputCount = batchInputCount;
    }

    @Override
    public IParameters getParameters() {
        return this.params;
    }

    @Override
    public void setParameters(IParameters params) {
        this.params = (Parameters)params;
    }

    private String computeCategory(String initialValue) {
        if (initialValue != null && (initialValue.contains("@@@") || initialValue.contains("***"))) {
            Pattern pattern = initialValue.contains("@@@") ? Pattern.compile("@@@(.+?)@@@") : Pattern.compile("\\*\\*\\*(.+?)\\*\\*\\*");
            Matcher matcher = pattern.matcher(initialValue);
            if (!matcher.find()) {
                this.logger.error("Not able to parse predefined engine string '{}'. Using empty category.", (Object)initialValue);
                return "";
            }
            String parsedKey = matcher.group(1);
            String propFile = this.params.getConfigPath();
            if (propFile != null) {
                propFile = propFile.trim();
                propFile = Util.fillRootDirectoryVariable(propFile, this.rootDir);
                propFile = Util.fillInputRootDirectoryVariable(propFile, this.inputRootDir);
                propFile = LocaleId.replaceVariables(propFile, this.sourceLocale, this.targetLocale);
            }
            if (!Util.isEmpty(propFile)) {
                Properties prop = new Properties();
                try {
                    prop.load(new FileInputStream(propFile));
                    String keyWithLoc = parsedKey + "." + this.targetLocale.toJavaLocale().getLanguage().toUpperCase();
                    String category = prop.getProperty(keyWithLoc);
                    if (category != null) {
                        this.logger.info("Found engine '{}'. Using category '{}'.", (Object)keyWithLoc, (Object)category);
                        return Base64.decodePassword(category);
                    }
                    this.logger.warn("Cannot find engine '{}'. Try fallback.", (Object)keyWithLoc);
                    int index = parsedKey.lastIndexOf(46);
                    while (index != -1) {
                        keyWithLoc = (parsedKey = parsedKey.substring(0, index)) + "." + this.targetLocale.toJavaLocale().getLanguage().toUpperCase();
                        category = prop.getProperty(keyWithLoc);
                        if (category != null) {
                            this.logger.info("Found fallback engine '{}'. Using category '{}'.", (Object)keyWithLoc, (Object)category);
                            return Base64.decodePassword(category);
                        }
                        index = parsedKey.lastIndexOf(46);
                    }
                }
                catch (IOException ex) {
                    throw new OkapiIOException("Can't load: " + propFile + ".");
                }
            } else {
                throw new OkapiIOException("No engine mapping property file specified. Using empty category.");
            }
            this.logger.warn("No engine found. Using empty category.");
            return "";
        }
        if (initialValue == null) {
            initialValue = "";
        }
        this.logger.info("Using category '{}'.", (Object)initialValue);
        return initialValue;
    }

    @Override
    protected Event handleStartBatch(Event event) {
        this.count = 0;
        this.events = new LinkedList();
        this.maxEvents = this.params.getMaxEvents();
        if (this.maxEvents < 1 || this.maxEvents > 1000) {
            this.maxEvents = 20;
        }
        this.conn = new MicrosoftMTConnector();
        net.sf.okapi.connectors.microsoft.Parameters prm = this.conn.getParameters();
        prm.setAzureKey(this.params.getAzureKey());
        this.tempCategory = this.params.getCategory();
        if (this.tempCategory.contains(DOMAINVAR)) {
            this.computedCategory = null;
            prm.setCategory("");
        } else {
            this.computedCategory = this.computeCategory(this.params.getCategory());
            prm.setCategory(this.computedCategory);
        }
        this.conn.setLanguages(this.sourceLocale, this.targetLocale);
        this.conn.setMaximumHits(this.params.getMaxMatches());
        this.conn.setThreshold(this.params.getThreshold());
        boolean bl = this.sendTmx = this.params.getSendTmx() && this.params.getMakeTmx();
        if (this.params.getMakeTmx()) {
            this.tmxOutputPath = Util.fillRootDirectoryVariable(this.params.getTmxPath(), this.rootDir);
            this.tmxOutputPath = Util.fillInputRootDirectoryVariable(this.tmxOutputPath, this.inputRootDir);
            this.tmxOutputPath = LocaleId.replaceVariables(this.tmxOutputPath, this.sourceLocale, this.targetLocale);
            this.tmxWriter = new TMXWriter(this.tmxOutputPath);
            this.tmxWriter.writeStartDocument(this.sourceLocale, this.targetLocale, this.getClass().getCanonicalName(), "1", "sentence", null, "unknown");
            this.attributes = new Hashtable<String, String>();
            if (this.params.getMarkAsMT()) {
                this.attributes.put("creationid", "MT!");
            }
            this.attributes.put("Txt::Origin", "Microsoft-Translator");
        }
        return event;
    }

    @Override
    public Event handleEvent(Event event) {
        switch (event.getEventType()) {
            case START_BATCH: {
                return this.handleStartBatch(event);
            }
            case TEXT_UNIT: 
            case DOCUMENT_PART: 
            case START_GROUP: 
            case END_GROUP: 
            case START_SUBFILTER: 
            case END_SUBFILTER: {
                Event tempEvent = this.storeAndPossiblyProcess(event, false);
                if (this.sendTmx) break;
                return tempEvent;
            }
            case CUSTOM: 
            case MULTI_EVENT: 
            case START_SUBDOCUMENT: 
            case END_SUBDOCUMENT: {
                Event tempEvent = this.storeAndPossiblyProcess(event, true);
                if (this.sendTmx) break;
                return tempEvent;
            }
            case CANCELED: 
            case END_BATCH: {
                this.closeAndClean();
                break;
            }
            case START_BATCH_ITEM: {
                ++this.count;
                break;
            }
            case END_DOCUMENT: {
                Event tempEvent = this.storeAndPossiblyProcess(event, true);
                if (!this.sendTmx) {
                    return tempEvent;
                }
                if (this.count < this.batchInputCount) break;
                this.closeAndClean();
                return MSBatchTranslationStep.generateAltOutput(this.tmxOutputPath, "UTF-8", this.sourceLocale, this.targetLocale, "okf_tmx");
            }
        }
        if (this.sendTmx) {
            return Event.NOOP_EVENT;
        }
        return event;
    }

    private Event processEvents() {
        this.getTranslations();
        this.needReset = true;
        return new Event(EventType.MULTI_EVENT, new MultiEvent(this.events));
    }

    private Event storeAndPossiblyProcess(Event event, boolean mustProcess) {
        String domain;
        GenericAnnotation ann;
        ITextUnit tu;
        GenericAnnotations anns;
        if (this.needReset) {
            this.needReset = false;
            this.events.clear();
        }
        this.events.add(event);
        if (this.computedCategory == null && event.isTextUnit() && (anns = (tu = event.getTextUnit()).getAnnotation(GenericAnnotations.class)) != null && (ann = anns.getFirstAnnotation("its-domain")) != null && (domain = ann.getString("domainValue")) != null) {
            this.logger.info("First domain value ('{}') detected on text unit id='{}'.", (Object)domain, (Object)tu.getId());
            this.computedCategory = this.computeCategory(this.tempCategory.replace(DOMAINVAR, domain));
            net.sf.okapi.connectors.microsoft.Parameters prm = this.conn.getParameters();
            prm.setCategory(this.computedCategory);
        }
        if (mustProcess || this.events.size() >= this.maxEvents) {
            return this.processEvents();
        }
        return Event.NOOP_EVENT;
    }

    private void getTranslations() {
        if (this.events.isEmpty()) {
            return;
        }
        ArrayList<TextFragment> fragments = new ArrayList<TextFragment>();
        ArrayList<String> segIds = new ArrayList<String>();
        ArrayList<String> tuIds = new ArrayList<String>();
        for (Event event : this.events) {
            ITextUnit tu;
            if (!event.isTextUnit() || !(tu = event.getTextUnit()).isTranslatable()) continue;
            TextContainer trgCont = tu.getTarget(this.targetLocale);
            if (tu.getSource().hasBeenSegmented()) {
                ISegments trgSegs = null;
                if (trgCont != null) {
                    trgSegs = trgCont.getSegments();
                }
                for (Segment srcSeg : tu.getSourceSegments()) {
                    if (!srcSeg.text.hasText() || this.params.getOnlyWhenWithoutCandidate() && this.hasExistingCandidate(srcSeg, trgSegs)) continue;
                    fragments.add(srcSeg.text);
                    tuIds.add(tu.getId());
                    segIds.add(tu.getId() + "\f" + srcSeg.getId());
                }
                continue;
            }
            TextFragment srcFrag = tu.getSource().getFirstContent();
            if (!srcFrag.hasText() || this.params.getOnlyWhenWithoutCandidate() && this.hasExistingCandidate(trgCont)) continue;
            fragments.add(srcFrag);
            tuIds.add(tu.getId());
            segIds.add(null);
        }
        if (fragments.isEmpty()) {
            return;
        }
        List<List<QueryResult>> list = this.conn.batchQuery(fragments);
        if (Util.isEmpty(list)) {
            this.logger.warn("No translation generated.");
            return;
        }
        int entryIndex = 0;
        for (Event event : this.events) {
            ITextUnit tu;
            if (!event.isTextUnit() || !(tu = event.getTextUnit()).isTranslatable()) continue;
            TextContainer trgCont = tu.getTarget(this.targetLocale);
            if (tu.getSource().hasBeenSegmented()) {
                for (Segment srcSeg : tu.getSourceSegments()) {
                    if (!srcSeg.text.hasText() || !((String)segIds.get(entryIndex)).equals(tu.getId() + "\f" + srcSeg.getId())) continue;
                    List<QueryResult> resList = list.get(entryIndex);
                    ++entryIndex;
                    boolean firstMatch = true;
                    for (QueryResult res : resList) {
                        if (this.tmxWriter != null && res.getCombinedScore() > 9) {
                            this.tmxWriter.writeTU(res.source, res.target, null, this.attributes);
                        }
                        boolean fill = false;
                        if (firstMatch && this.params.getFillTarget()) {
                            boolean bl = fill = res.getCombinedScore() >= this.params.getFillTargetThreshold();
                        }
                        if (fill || this.params.getAnnotate()) {
                            Segment trgSeg;
                            if (trgCont == null) {
                                trgCont = tu.createTarget(this.targetLocale, false, 4);
                            }
                            if ((trgSeg = trgCont.getSegments().get(srcSeg.id)) == null) {
                                trgSeg = new Segment(srcSeg.id);
                                trgCont.getSegments().append(trgSeg);
                            }
                            if (this.params.getAnnotate()) {
                                TextUnitUtil.addAltTranslation(trgSeg, res.toAltTranslation(srcSeg.text, this.sourceLocale, this.targetLocale));
                            }
                            if (fill && trgSeg.text.isEmpty()) {
                                trgSeg.text.setCodedText(res.target.getCodedText(), res.target.getClonedCodes());
                            }
                        }
                        firstMatch = false;
                    }
                }
                continue;
            }
            if (!tu.getSource().getFirstContent().hasText() || !((String)tuIds.get(entryIndex)).equals(tu.getId())) continue;
            List<QueryResult> resList = list.get(entryIndex);
            ++entryIndex;
            boolean firstMatch = true;
            for (QueryResult res : resList) {
                if (this.tmxWriter != null) {
                    this.tmxWriter.writeTU(res.source, res.target, null, this.attributes);
                }
                boolean fill = false;
                if (firstMatch && this.params.getFillTarget()) {
                    boolean bl = fill = res.getCombinedScore() >= this.params.getFillTargetThreshold();
                }
                if (fill || this.params.getAnnotate()) {
                    TextFragment trgFrag;
                    if (trgCont == null) {
                        trgCont = tu.createTarget(this.targetLocale, false, 4);
                    }
                    if (this.params.getAnnotate()) {
                        TextFragment srcFrag = tu.getSource().getFirstContent();
                        TextUnitUtil.addAltTranslation(trgCont, res.toAltTranslation(srcFrag, this.sourceLocale, this.targetLocale));
                    }
                    if (fill && (trgFrag = trgCont.getFirstContent()).isEmpty()) {
                        trgFrag.setCodedText(res.target.getCodedText(), res.target.getClonedCodes());
                    }
                }
                firstMatch = false;
            }
        }
    }

    private boolean hasExistingCandidate(Segment srcSeg, ISegments trgSegs) {
        if (trgSegs == null) {
            return false;
        }
        Segment trgSeg = trgSegs.get(srcSeg.getId());
        if (trgSeg == null) {
            return false;
        }
        AltTranslationsAnnotation ann = trgSeg.getAnnotation(AltTranslationsAnnotation.class);
        if (ann == null) {
            return false;
        }
        return ann.getFirst() != null;
    }

    private boolean hasExistingCandidate(TextContainer frag) {
        if (frag == null) {
            return false;
        }
        AltTranslationsAnnotation ann = frag.getAnnotation(AltTranslationsAnnotation.class);
        if (ann == null) {
            return false;
        }
        return ann.getFirst() != null;
    }

    public static Event generateAltOutput(String outFilePath, String defaultEncoding, LocaleId sourceLocale, LocaleId targetLocale, String filterConfigId) {
        ArrayList<Event> list = new ArrayList<Event>();
        PipelineParameters pp = new PipelineParameters();
        RawDocument rawDoc = new RawDocument(new File(outFilePath).toURI(), defaultEncoding, sourceLocale, targetLocale, filterConfigId);
        pp.setInputURI(rawDoc.getInputURI());
        pp.setOutputURI(rawDoc.getInputURI());
        pp.setSourceLocale(rawDoc.getSourceLocale());
        pp.setTargetLocale(rawDoc.getTargetLocale());
        pp.setOutputEncoding(rawDoc.getEncoding());
        pp.setInputRawDocument(rawDoc);
        pp.setFilterConfigurationId(rawDoc.getFilterConfigId());
        pp.setBatchInputCount(1);
        list.add(new Event(EventType.PIPELINE_PARAMETERS, pp));
        list.add(new Event(EventType.START_BATCH_ITEM));
        list.add(new Event(EventType.RAW_DOCUMENT, rawDoc));
        list.add(new Event(EventType.END_BATCH_ITEM));
        return new Event(EventType.MULTI_EVENT, new MultiEvent(list));
    }
}

