/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.steps.repetitionanalysis;

import java.util.List;
import java.util.UUID;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.annotation.AltTranslation;
import net.sf.okapi.common.annotation.AltTranslationsAnnotation;
import net.sf.okapi.common.pipeline.BasePipelineStep;
import net.sf.okapi.common.pipeline.annotations.StepParameterMapping;
import net.sf.okapi.common.pipeline.annotations.StepParameterType;
import net.sf.okapi.common.query.MatchType;
import net.sf.okapi.common.resource.ISegments;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.Segment;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.steps.repetitionanalysis.Parameters;
import net.sf.okapi.steps.repetitionanalysis.RepetitiveSegmentAnnotation;
import net.sf.okapi.tm.pensieve.common.MetadataType;
import net.sf.okapi.tm.pensieve.common.TmHit;
import net.sf.okapi.tm.pensieve.common.TranslationUnit;
import net.sf.okapi.tm.pensieve.common.TranslationUnitVariant;
import net.sf.okapi.tm.pensieve.seeker.ITmSeeker;
import net.sf.okapi.tm.pensieve.seeker.TmSeekerFactory;
import net.sf.okapi.tm.pensieve.writer.ITmWriter;
import net.sf.okapi.tm.pensieve.writer.TmWriterFactory;

public class RepetitionAnalysisStep
extends BasePipelineStep {
    private Parameters params = new Parameters();
    private boolean searchExact;
    private long counter;
    private String tmDir = String.format("%s~okapi-step-repetitionanalysis-%s/", Util.ensureSeparator(Util.getTempDirectory(), true), UUID.randomUUID().toString());
    private ITmWriter tmWriter;
    private ITmSeeker currentTm;
    private LocaleId sourceLocale;
    private LocaleId targetLocale;

    @Override
    public String getName() {
        return "Repetition Analysis";
    }

    @Override
    public String getDescription() {
        return "Analyzes repetitions in input documents. Adds AltTranslationsAnnotation and RepetitiveSegmentAnnotation to a repetitive segment. Expects: raw document. Sends back: raw document.";
    }

    @Override
    public IParameters getParameters() {
        return this.params;
    }

    @Override
    public void cancel() {
        this.close();
    }

    @Override
    public void setParameters(IParameters params) {
        this.params = (Parameters)params;
    }

    @StepParameterMapping(parameterType=StepParameterType.SOURCE_LOCALE)
    public void setSourceLocale(LocaleId sourceLocale) {
        this.sourceLocale = sourceLocale;
    }

    @StepParameterMapping(parameterType=StepParameterType.TARGET_LOCALE)
    public void setTargetLocale(LocaleId targetLocale) {
        this.targetLocale = targetLocale;
    }

    private void close() {
        if (this.tmWriter != null) {
            this.tmWriter.close();
            this.tmWriter = null;
        }
        if (this.currentTm != null) {
            this.currentTm.close();
            this.currentTm = null;
        }
        Util.deleteDirectory(this.tmDir, false);
    }

    @Override
    protected Event handleStartDocument(Event event) {
        this.close();
        Util.createDirectories(this.tmDir);
        this.searchExact = this.params.getFuzzyThreshold() >= 100;
        this.counter = 0L;
        this.tmWriter = TmWriterFactory.createFileBasedTmWriter(this.tmDir, true);
        this.tmWriter.close();
        this.tmWriter = TmWriterFactory.createFileBasedTmWriter(this.tmDir, true);
        this.currentTm = TmSeekerFactory.createFileBasedTmSeeker(this.tmDir);
        return super.handleStartDocument(event);
    }

    @Override
    protected Event handleEndDocument(Event event) {
        this.close();
        return super.handleEndDocument(event);
    }

    public static boolean checkSegment(Segment segment) {
        return segment != null && segment.getContent() != null && segment.getContent().hasText();
    }

    @Override
    protected Event handleTextUnit(Event event) {
        ITextUnit tu = event.getTextUnit();
        if (tu.isTranslatable()) {
            ISegments ssegments = tu.getSource().getSegments();
            ISegments tsegments = null;
            if (this.targetLocale != null) {
                tsegments = tu.getTargetSegments(this.targetLocale);
            }
            for (Segment seg : ssegments) {
                if (!RepetitionAnalysisStep.checkSegment(seg)) continue;
                ++this.counter;
                TextFragment tf = seg.getContent();
                String tuid = Long.toString(this.counter);
                List<TmHit> hits = null;
                hits = this.searchExact ? this.currentTm.searchExact(tf, null) : this.currentTm.searchFuzzy(tf, this.params.getFuzzyThreshold(), 1, null);
                if (hits.size() > 0) {
                    TmHit hit = hits.get(0);
                    TranslationUnit hitTu = hit.getTu();
                    RepetitiveSegmentAnnotation ann = new RepetitiveSegmentAnnotation(tuid, hitTu.getMetadataValue(MetadataType.ID), hit.getScore());
                    seg.setAnnotation(ann);
                    if (tsegments == null) continue;
                    Segment tseg = tsegments.get(seg.getId());
                    TextFragment stf = hitTu.getSource().getContent();
                    TextFragment ttf = hitTu.getTarget().getContent();
                    AltTranslationsAnnotation ata = tseg.getAnnotation(AltTranslationsAnnotation.class);
                    if (ata == null) {
                        ata = new AltTranslationsAnnotation();
                    }
                    ata.add(new AltTranslation(this.sourceLocale, this.targetLocale == null ? this.sourceLocale : this.targetLocale, tf, stf, ttf, MatchType.EXACT_DOCUMENT_CONTEXT, Math.round(hit.getScore()), ""));
                    tseg.setAnnotation(ata);
                    continue;
                }
                TranslationUnit ntu = new TranslationUnit(new TranslationUnitVariant(this.sourceLocale, tf), new TranslationUnitVariant(this.targetLocale == null ? this.sourceLocale : this.targetLocale, new TextFragment("")));
                ntu.setMetadataValue(MetadataType.ID, tuid);
                RepetitiveSegmentAnnotation ann = new RepetitiveSegmentAnnotation(tuid, tuid, 1.0f);
                seg.setAnnotation(ann);
                this.tmWriter.indexTranslationUnit(ntu);
                this.tmWriter.commit();
                this.currentTm.close();
                this.currentTm = TmSeekerFactory.createFileBasedTmSeeker(this.tmDir);
            }
        }
        return super.handleTextUnit(event);
    }
}

