/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.steps.repetitionanalysis;

import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.annotation.AltTranslation;
import net.sf.okapi.common.annotation.AltTranslationsAnnotation;
import net.sf.okapi.common.pipeline.BasePipelineStep;
import net.sf.okapi.common.pipeline.annotations.StepParameterMapping;
import net.sf.okapi.common.pipeline.annotations.StepParameterType;
import net.sf.okapi.common.query.MatchType;
import net.sf.okapi.common.resource.ISegments;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.Segment;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.steps.repetitionanalysis.Parameters;
import net.sf.okapi.steps.repetitionanalysis.RepetitiveSegmentAnnotation;
import net.sf.okapi.steps.repetitionanalysis.SegmentInfo;
import net.sf.okapi.tm.pensieve.common.MetadataType;
import net.sf.okapi.tm.pensieve.common.TmHit;
import net.sf.okapi.tm.pensieve.common.TranslationUnit;
import net.sf.okapi.tm.pensieve.common.TranslationUnitVariant;
import net.sf.okapi.tm.pensieve.seeker.ITmSeeker;
import net.sf.okapi.tm.pensieve.seeker.PensieveSeeker;
import net.sf.okapi.tm.pensieve.writer.PensieveWriter;
import net.sf.okapi.tm.pensieve.writer.TmWriterFactory;

public class RepetitionAnalysisStep
extends BasePipelineStep {
    private Parameters params = new Parameters();
    private boolean searchExact;
    private int segCounter;
    private long tuCounter;
    private long groupCounter;
    private String tmDir;
    private PensieveWriter tmWriter;
    private ITmSeeker currentTm;
    private LocaleId sourceLocale;
    private LocaleId targetLocale;

    @Override
    public String getName() {
        return "Repetition Analysis";
    }

    @Override
    public String getDescription() {
        return "Analyzes repetitions in input documents. Adds AltTranslationsAnnotation and RepetitiveSegmentAnnotation to a repetitive segment. Expects: filter events. Sends back: filter events.";
    }

    @Override
    public IParameters getParameters() {
        return this.params;
    }

    @Override
    public void cancel() {
        this.close();
    }

    @Override
    public void setParameters(IParameters params) {
        this.params = (Parameters)params;
    }

    @StepParameterMapping(parameterType=StepParameterType.SOURCE_LOCALE)
    public void setSourceLocale(LocaleId sourceLocale) {
        this.sourceLocale = sourceLocale;
    }

    @StepParameterMapping(parameterType=StepParameterType.TARGET_LOCALE)
    public void setTargetLocale(LocaleId targetLocale) {
        this.targetLocale = targetLocale;
    }

    private void close() {
        if (this.tmWriter != null) {
            this.tmWriter.close();
            this.tmWriter = null;
        }
        if (this.currentTm != null) {
            this.currentTm.close();
            this.currentTm = null;
        }
        if (this.tmDir != null) {
            Util.deleteDirectory(this.tmDir, false);
        }
    }

    @Override
    protected Event handleStartDocument(Event event) {
        this.close();
        this.tmDir = String.format("%s~okapi-step-repetitionanalysis-%s/", Util.ensureSeparator(Util.getTempDirectory(), true), UUID.randomUUID().toString());
        System.out.println("@@@ " + this.tmDir);
        Util.createDirectories(this.tmDir);
        this.searchExact = this.params.getFuzzyThreshold() >= 100;
        this.tuCounter = 0L;
        this.groupCounter = 1L;
        this.tmWriter = (PensieveWriter)TmWriterFactory.createFileBasedTmWriter(this.tmDir, true);
        this.currentTm = new PensieveSeeker(this.tmWriter.getIndexWriter());
        return super.handleStartDocument(event);
    }

    @Override
    protected Event handleEndDocument(Event event) {
        this.close();
        return super.handleEndDocument(event);
    }

    public static boolean checkSegments(Segment sseg, Segment tseg) {
        return sseg != null && (sseg.getContent().hasText() || tseg != null && tseg.getContent().hasText());
    }

    @Override
    protected Event handleTextUnit(Event event) {
        ITextUnit tu = event.getTextUnit();
        if (tu.isTranslatable()) {
            ISegments ssegments = tu.getSource().getSegments();
            ISegments tsegments = null;
            if (this.targetLocale != null) {
                tsegments = tu.getTargetSegments(this.targetLocale);
            }
            this.segCounter = 0;
            boolean hasTranslationUnits = false;
            for (Segment seg : ssegments) {
                ++this.segCounter;
                Segment tseg = null;
                if (tsegments != null) {
                    tseg = tsegments.get(seg.getId());
                }
                if (!RepetitionAnalysisStep.checkSegments(seg, tseg)) continue;
                ++this.tuCounter;
                hasTranslationUnits = true;
                TextFragment content = seg.getContent();
                if (content.isEmpty()) continue;
                TextFragment tf = new TextFragment(content.getText());
                String tuid = Long.toString(this.tuCounter);
                String groupId = Long.toString(this.groupCounter);
                String segId = Long.toString(this.segCounter);
                SegmentInfo info = new SegmentInfo(tuid, groupId, segId);
                ArrayList<TmHit> hits = new ArrayList<TmHit>();
                hits.addAll(this.currentTm.searchExact(tf, null));
                if (!this.searchExact) {
                    hits.addAll(this.currentTm.searchFuzzy(tf, this.params.getFuzzyThreshold(), this.params.getMaxHits(), null));
                }
                if (hits.size() > 0) {
                    RepetitiveSegmentAnnotation ann = new RepetitiveSegmentAnnotation(info, hits);
                    seg.setAnnotation(ann);
                    for (TmHit hit : hits) {
                        TranslationUnit hitTu = hit.getTu();
                        if (tsegments == null) continue;
                        TextFragment otf = new TextFragment(tf.getText());
                        TextFragment stf = new TextFragment(hitTu.getSource().getContent().getText());
                        TextFragment ttf = new TextFragment(hitTu.getTarget().getContent().getText());
                        AltTranslationsAnnotation ata = tseg.getAnnotation(AltTranslationsAnnotation.class);
                        if (ata == null) {
                            ata = new AltTranslationsAnnotation();
                            tseg.setAnnotation(ata);
                        }
                        ata.add(new AltTranslation(this.sourceLocale, this.targetLocale == null ? this.sourceLocale : this.targetLocale, otf, stf, ttf, MatchType.EXACT_DOCUMENT_CONTEXT, (int)Math.floor(hit.getScore()), ""));
                    }
                }
                TranslationUnit ntu = new TranslationUnit(new TranslationUnitVariant(this.sourceLocale, tf), new TranslationUnitVariant(this.targetLocale == null ? this.sourceLocale : this.targetLocale, new TextFragment(tuid)));
                ntu.setMetadataValue(MetadataType.ID, tuid);
                ntu.setMetadataValue(MetadataType.GROUP_NAME, groupId);
                ntu.setMetadataValue(MetadataType.FILE_NAME, segId);
                if (seg.getAnnotation(RepetitiveSegmentAnnotation.class) == null) {
                    RepetitiveSegmentAnnotation ann = new RepetitiveSegmentAnnotation(info, (List<TmHit>)null);
                    seg.setAnnotation(ann);
                }
                this.tmWriter.indexTranslationUnit(ntu);
                this.tmWriter.commit();
            }
            if (hasTranslationUnits) {
                ++this.groupCounter;
            }
        }
        return super.handleTextUnit(event);
    }
}

