/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.filters.mosestext;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.okapi.common.BOMNewlineEncodingDetector;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.EventType;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.IdGenerator;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.encoder.EncoderManager;
import net.sf.okapi.common.exceptions.OkapiIOException;
import net.sf.okapi.common.exceptions.OkapiUnsupportedEncodingException;
import net.sf.okapi.common.filters.FilterConfiguration;
import net.sf.okapi.common.filters.IFilter;
import net.sf.okapi.common.filters.IFilterConfigurationMapper;
import net.sf.okapi.common.filterwriter.GenericFilterWriter;
import net.sf.okapi.common.filterwriter.IFilterWriter;
import net.sf.okapi.common.resource.Code;
import net.sf.okapi.common.resource.Ending;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.common.resource.StartDocument;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextUnit;
import net.sf.okapi.common.skeleton.GenericSkeleton;
import net.sf.okapi.common.skeleton.GenericSkeletonWriter;
import net.sf.okapi.common.skeleton.ISkeletonWriter;

@UsingParameters
public class MosesTextFilter
implements IFilter {
    public static final String MOSESTEXT_MIME_TYPE = "text/x-mosestext";
    private static final String ENDSEGMENT = "</mrk>";
    private static final Pattern STARTSEGMENT = Pattern.compile("<mrk\\s+mtype\\s*=\\s*?[\"']seg[\"'].*?>");
    private static final Pattern OPENCLOSE = Pattern.compile("(\\<g(\\s+)id=['\"](.*?)['\"]>)|(\\</g\\>)");
    private static final Pattern ISOLATED = Pattern.compile("\\<(bx|ex|x)(\\s+)id=['\"](.*?)['\"](\\s*?)/>");
    private static final Pattern LINEBREAK = Pattern.compile("(\\<lb\\s*?/>)");
    private BufferedReader reader;
    private String lineBreak;
    private Event event;
    private IdGenerator tuIdGen;
    private EncoderManager encoderManager;
    private GenericSkeleton skel;

    @Override
    public void cancel() {
    }

    @Override
    public void close() {
        try {
            if (this.reader != null) {
                this.reader.close();
                this.reader = null;
            }
        }
        catch (IOException e) {
            throw new OkapiIOException(e);
        }
    }

    @Override
    public ISkeletonWriter createSkeletonWriter() {
        return new GenericSkeletonWriter();
    }

    @Override
    public IFilterWriter createFilterWriter() {
        return new GenericFilterWriter(this.createSkeletonWriter(), this.getEncoderManager());
    }

    @Override
    public List<FilterConfiguration> getConfigurations() {
        ArrayList<FilterConfiguration> list = new ArrayList<FilterConfiguration>();
        list.add(new FilterConfiguration(this.getName(), MOSESTEXT_MIME_TYPE, this.getClass().getName(), "Moses Text Default", "Default Moses Text configuration.", null, ".txt;"));
        return list;
    }

    @Override
    public EncoderManager getEncoderManager() {
        if (this.encoderManager == null) {
            this.encoderManager = new EncoderManager();
            this.encoderManager.setMapping(MOSESTEXT_MIME_TYPE, "net.sf.okapi.filters.mosestext.MosesTextEncoder");
        }
        return this.encoderManager;
    }

    @Override
    public String getDisplayName() {
        return "Moses Text Filter";
    }

    @Override
    public String getMimeType() {
        return MOSESTEXT_MIME_TYPE;
    }

    @Override
    public String getName() {
        return "okf_mosestext";
    }

    @Override
    public IParameters getParameters() {
        return null;
    }

    @Override
    public boolean hasNext() {
        return this.event != null;
    }

    @Override
    public Event next() {
        Event eventToSend = this.event;
        this.event = null;
        if (eventToSend.getEventType() == EventType.END_DOCUMENT) {
            return eventToSend;
        }
        try {
            this.skel = new GenericSkeleton();
            StringBuilder sb = new StringBuilder();
            boolean inSeg = false;
            while (true) {
                String line;
                if ((line = this.reader.readLine()) == null) {
                    if (inSeg) {
                        throw new OkapiIOException("End of segment expected before the end of the document.");
                    }
                    this.event = new Event(EventType.END_DOCUMENT, new Ending("ed"));
                    break;
                }
                Matcher m = STARTSEGMENT.matcher(line);
                if (m.lookingAt()) {
                    if (inSeg) {
                        throw new OkapiIOException("End of segment expected before a new segment.");
                    }
                    line = line.substring(m.group().length());
                    inSeg = true;
                    this.skel.append(m.group());
                } else if (!inSeg) {
                    sb.append(line);
                    this.event = this.processBuffer(sb);
                    return eventToSend;
                }
                if (line.endsWith(ENDSEGMENT)) {
                    line = line.substring(0, line.length() - ENDSEGMENT.length());
                    sb.append(line);
                    this.event = this.processBuffer(sb);
                    break;
                }
                sb.append(line + "\n");
            }
            return eventToSend;
        }
        catch (IOException e) {
            throw new OkapiIOException(e);
        }
    }

    @Override
    public void open(RawDocument input) {
        this.open(input, true);
    }

    @Override
    public void open(RawDocument input, boolean generateSkeleton) {
        this.close();
        BOMNewlineEncodingDetector detector = new BOMNewlineEncodingDetector(input.getStream(), "UTF-8");
        detector.detectAndRemoveBom();
        input.setEncoding(detector.getEncoding());
        String encoding = input.getEncoding();
        try {
            this.reader = new BufferedReader(new InputStreamReader(detector.getInputStream(), encoding));
        }
        catch (UnsupportedEncodingException e) {
            throw new OkapiUnsupportedEncodingException(String.format("The encoding '%s' is not supported.", encoding), e);
        }
        this.lineBreak = detector.getNewlineType().toString();
        boolean hasUTF8BOM = detector.hasUtf8Bom();
        String docName = null;
        if (input.getInputURI() != null) {
            docName = input.getInputURI().getPath();
        }
        this.tuIdGen = new IdGenerator(null);
        StartDocument startDoc = new StartDocument("sd");
        startDoc.setName(docName);
        startDoc.setEncoding(encoding, hasUTF8BOM);
        startDoc.setLocale(input.getSourceLocale());
        startDoc.setLineBreak(this.lineBreak);
        startDoc.setFilterParameters(this.getParameters());
        startDoc.setFilterWriter(this.createFilterWriter());
        startDoc.setType(MOSESTEXT_MIME_TYPE);
        startDoc.setMimeType(MOSESTEXT_MIME_TYPE);
        startDoc.setMultilingual(false);
        this.event = new Event(EventType.START_DOCUMENT, startDoc);
    }

    @Override
    public void setFilterConfigurationMapper(IFilterConfigurationMapper fcMapper) {
    }

    @Override
    public void setParameters(IParameters params) {
    }

    private Event processBuffer(StringBuilder sb) {
        TextFragment tf = this.fromPseudoXLIFF(sb.toString());
        TextUnit tu = new TextUnit(this.tuIdGen.createId());
        tu.setSourceContent(tf);
        tu.setPreserveWhitespaces(true);
        boolean add = !this.skel.isEmpty();
        this.skel.addContentPlaceholder(tu);
        if (add) {
            this.skel.append(ENDSEGMENT);
        }
        this.skel.add(this.lineBreak);
        tu.setSkeleton(this.skel);
        return new Event(EventType.TEXT_UNIT, tu);
    }

    public TextFragment fromPseudoXLIFF(String text) {
        String markers;
        Code code;
        int id;
        TextFragment tf = new TextFragment();
        if (Util.isEmpty(text)) {
            return tf;
        }
        if (text.indexOf(60) == -1 && text.indexOf(38) == -1) {
            tf.append(text);
            return tf;
        }
        text = text.replaceAll("(&#13;)|(&#x0*?[dD];)", "\r");
        text = text.replace("&apos;", "'");
        text = text.replace("&lt;", "<");
        text = text.replace("&gt;", ">");
        text = text.replace("&quot;", "\"");
        StringBuilder sb = new StringBuilder(text.replace("&amp;", "&"));
        ArrayList<Code> codes = new ArrayList<Code>();
        Matcher m = OPENCLOSE.matcher(sb.toString());
        Stack<Integer> stack = new Stack<Integer>();
        while (m.find()) {
            if (m.group(1) != null) {
                id = Util.strToInt(m.group(3), -1);
                code = new Code(TextFragment.TagType.OPENING, "g", m.group(1));
                code.setId(id);
                codes.add(code);
                markers = String.format("%c%c", 57601, Character.valueOf(TextFragment.toChar(codes.size() - 1)));
                sb.replace(m.start(), m.end(), markers);
                stack.push(id);
            } else {
                codes.add(new Code(TextFragment.TagType.CLOSING, "g", m.group(4)));
                markers = String.format("%c%c", 57602, Character.valueOf(TextFragment.toChar(codes.size() - 1)));
                sb.replace(m.start(), m.end(), markers);
            }
            m = OPENCLOSE.matcher(sb.toString());
        }
        m = ISOLATED.matcher(sb.toString());
        while (m.find()) {
            id = Util.strToInt(m.group(3), -1);
            String name = m.group(1);
            code = name.equals("bx") ? new Code(TextFragment.TagType.OPENING, "Xpt" + id, m.group()) : (name.equals("ex") ? new Code(TextFragment.TagType.CLOSING, "Xpt" + id, m.group()) : new Code(TextFragment.TagType.PLACEHOLDER, "x", m.group()));
            code.setId(id);
            codes.add(code);
            markers = String.format("%c%c", 57603, Character.valueOf(TextFragment.toChar(codes.size() - 1)));
            sb.replace(m.start(), m.end(), markers);
            m = ISOLATED.matcher(sb.toString());
        }
        m = LINEBREAK.matcher(sb.toString());
        while (m.find()) {
            sb.replace(m.start(), m.end(), "\n");
            m = LINEBREAK.matcher(sb.toString());
        }
        tf.setCodedText(sb.toString(), codes);
        return tf;
    }
}

