/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.lib.segmentation;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.okapi.common.ISegmenter;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Range;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextUnitUtil;
import net.sf.okapi.lib.segmentation.CompiledRule;
import net.sf.okapi.lib.segmentation.ICURegex;
import net.sf.okapi.lib.segmentation.SegmentationRuleException;

public class SRXSegmenter
implements ISegmenter {
    private boolean segmentSubFlows;
    private boolean cascade;
    private boolean includeStartCodes;
    private boolean includeEndCodes;
    private boolean includeIsolatedCodes;
    private LocaleId currentLanguageCode;
    private boolean oneSegmentIncludesAll;
    private boolean trimLeadingWS;
    private boolean trimTrailingWS;
    private boolean useJavaRegex;
    private boolean trimCodes;
    private ArrayList<CompiledRule> rules;
    private Pattern maskRule;
    private TreeMap<Integer, Boolean> splits;
    private List<Integer> finalSplits;
    private ArrayList<Integer> starts;
    private ArrayList<Integer> ends;
    private ICURegex icuRegex = new ICURegex();

    public SRXSegmenter() {
        this.reset();
    }

    @Override
    public void reset() {
        this.currentLanguageCode = null;
        this.rules = new ArrayList();
        this.maskRule = null;
        this.splits = null;
        this.segmentSubFlows = true;
        this.cascade = false;
        this.includeStartCodes = false;
        this.includeEndCodes = true;
        this.includeIsolatedCodes = false;
        this.oneSegmentIncludesAll = false;
        this.trimLeadingWS = false;
        this.trimTrailingWS = false;
        this.useJavaRegex = false;
        this.trimCodes = false;
        this.icuRegex.reset();
    }

    public void setOptions(boolean segmentSubFlows, boolean includeStartCodes, boolean includeEndCodes, boolean includeIsolatedCodes, boolean oneSegmentIncludesAll, boolean trimLeadingWS, boolean trimTrailingWS, boolean useJavaRegex) {
        this.segmentSubFlows = segmentSubFlows;
        this.includeStartCodes = includeStartCodes;
        this.includeEndCodes = includeEndCodes;
        this.includeIsolatedCodes = includeIsolatedCodes;
        this.oneSegmentIncludesAll = oneSegmentIncludesAll;
        this.trimLeadingWS = trimLeadingWS;
        this.trimTrailingWS = trimTrailingWS;
        this.useJavaRegex = useJavaRegex;
    }

    @Override
    public void setOptions(boolean segmentSubFlows, boolean includeStartCodes, boolean includeEndCodes, boolean includeIsolatedCodes, boolean oneSegmentIncludesAll, boolean trimLeadingWS, boolean trimTrailingWS) {
        this.segmentSubFlows = segmentSubFlows;
        this.includeStartCodes = includeStartCodes;
        this.includeEndCodes = includeEndCodes;
        this.includeIsolatedCodes = includeIsolatedCodes;
        this.oneSegmentIncludesAll = oneSegmentIncludesAll;
        this.trimLeadingWS = trimLeadingWS;
        this.trimTrailingWS = trimTrailingWS;
    }

    @Override
    public boolean oneSegmentIncludesAll() {
        return this.oneSegmentIncludesAll;
    }

    @Override
    public boolean segmentSubFlows() {
        return this.segmentSubFlows;
    }

    public boolean cascade() {
        return this.cascade;
    }

    @Override
    public boolean trimLeadingWhitespaces() {
        return this.trimLeadingWS;
    }

    @Override
    public boolean trimTrailingWhitespaces() {
        return this.trimTrailingWS;
    }

    public boolean useJavaRegex() {
        return this.useJavaRegex;
    }

    public void setUseJavaRegex(boolean useJavaRegex) {
        this.useJavaRegex = useJavaRegex;
    }

    @Override
    public boolean includeStartCodes() {
        return this.includeStartCodes;
    }

    @Override
    public boolean includeEndCodes() {
        return this.includeEndCodes;
    }

    @Override
    public boolean includeIsolatedCodes() {
        return this.includeIsolatedCodes;
    }

    @Override
    public int computeSegments(String text) {
        TextContainer tmp = new TextContainer(text);
        return this.computeSegments(tmp);
    }

    @Override
    public int computeSegments(TextContainer container) {
        int trimmedTextStart;
        Matcher m;
        boolean isECWS;
        boolean isSCWS;
        if (this.currentLanguageCode == null) {
            throw new SegmentationRuleException("No language defined for the segmenter.");
        }
        boolean hasCode = container.contentIsOneSegment() ? container.getSegments().getFirstContent().hasCode() : container.getUnSegmentedContentCopy().hasCode();
        boolean bl = this.trimCodes ? !this.includeStartCodes : (isSCWS = false);
        boolean bl2 = this.trimCodes ? !this.includeEndCodes : (isECWS = false);
        boolean isICWS = this.trimCodes ? !this.includeIsolatedCodes : false;
        String codedText = container.getCodedText();
        if (!this.useJavaRegex) {
            this.icuRegex.processText(codedText, this.rules);
        }
        List<Integer> codePositions = this.storeCodePositions(codedText);
        List<Integer> origCodePositions = this.storeOriginalCodePositions(codedText);
        codedText = TextUnitUtil.removeCodes(codedText);
        List<Integer> oldBoundaries = this.icuRegex.getBoundaries();
        ArrayList<Integer> boundaries = new ArrayList<Integer>();
        for (Integer oldBoundary : oldBoundaries) {
            boundaries.add(this.recalcPosBack(oldBoundary, origCodePositions));
        }
        this.splits = new TreeMap();
        for (CompiledRule rule : this.rules) {
            m = rule.pattern.matcher(codedText);
            while (m.find()) {
                int n = m.start() + m.group(1).length();
                if (n > codedText.length() || this.splits.containsKey(n) || !this.useJavaRegex && !this.icuRegex.verifyPos(n, rule, m, boundaries)) continue;
                this.splits.put(n, rule.isBreak);
            }
        }
        TreeMap<Integer, Boolean> oldSplits = this.splits;
        this.splits = new TreeMap();
        for (Integer pos : oldSplits.keySet()) {
            int newPos = this.recalcPos(pos, codePositions);
            this.splits.put(newPos, oldSplits.get(pos));
        }
        if (!this.useJavaRegex) {
            codedText = container.getCodedText();
        }
        if (this.maskRule != null) {
            m = this.maskRule.matcher(codedText);
            while (m.find()) {
                for (int n = m.start(); n < m.end(); ++n) {
                    if (!this.splits.containsKey(n)) continue;
                    this.splits.remove(n);
                }
                if (m.start() > 0) {
                    this.splits.put(m.start(), true);
                }
                this.splits.put(m.end(), true);
            }
        }
        this.finalSplits = new ArrayList<Integer>();
        if (hasCode) {
            for (int pos : this.splits.keySet()) {
                if (!this.splits.get(pos).booleanValue() || pos >= codedText.length()) continue;
                switch (codedText.charAt(pos)) {
                    case '\ue101': {
                        if (this.includeStartCodes) {
                            while (codedText.charAt(pos - 1) == '\ue101' && pos > 1) {
                                --pos;
                            }
                            break;
                        }
                        while (codedText.charAt(pos += 2) == '\ue101' && pos < codedText.length() - 1) {
                        }
                        break;
                    }
                    case '\ue102': {
                        if (this.includeEndCodes) {
                            while (codedText.charAt(pos += 2) == '\ue102' && pos < codedText.length() - 1) {
                            }
                            break;
                        }
                        while (codedText.charAt(pos - 1) == '\ue102' && pos > 1) {
                            --pos;
                        }
                        break;
                    }
                    case '\ue103': {
                        if (this.includeIsolatedCodes) {
                            while (codedText.charAt(pos += 2) == '\ue103' && pos < codedText.length() - 1) {
                            }
                            break;
                        }
                        while (codedText.charAt(pos - 1) == '\ue103' && pos > 1) {
                            --pos;
                        }
                        break;
                    }
                }
                this.finalSplits.add(pos);
            }
        } else {
            for (int pos : this.splits.keySet()) {
                if (!this.splits.get(pos).booleanValue()) continue;
                this.finalSplits.add(pos);
            }
        }
        this.starts = new ArrayList();
        this.ends = new ArrayList();
        int textStart = 0;
        for (int pos : this.finalSplits) {
            int textEnd;
            trimmedTextStart = TextFragment.indexOfFirstNonWhitespace(codedText, textStart, pos - 1, isSCWS, isECWS, isICWS, this.trimLeadingWS);
            if (trimmedTextStart == -1) continue;
            if (this.trimLeadingWS || this.trimCodes) {
                textStart = trimmedTextStart;
            }
            if ((textEnd = this.trimTrailingWS || this.trimCodes ? TextFragment.indexOfLastNonWhitespace(codedText, pos - 1, 0, isSCWS, isECWS, isICWS, this.trimTrailingWS) : pos - 1) >= textStart) {
                if (textEnd < pos) {
                    ++textEnd;
                }
                this.starts.add(textStart);
                this.ends.add(textEnd);
            }
            textStart = pos;
        }
        int lastPos = codedText.length();
        if (textStart < lastPos) {
            int textEnd;
            trimmedTextStart = TextFragment.indexOfFirstNonWhitespace(codedText, textStart, lastPos - 1, isSCWS, isECWS, isICWS, this.trimLeadingWS);
            if ((this.trimLeadingWS || this.trimCodes) && trimmedTextStart != -1) {
                textStart = trimmedTextStart;
            }
            if (trimmedTextStart != -1 && trimmedTextStart < lastPos && (textEnd = this.trimTrailingWS || this.trimCodes ? TextFragment.indexOfLastNonWhitespace(codedText, lastPos - 1, textStart, isSCWS, isECWS, isICWS, this.trimTrailingWS) : lastPos - 1) >= textStart) {
                if (textEnd < lastPos) {
                    ++textEnd;
                }
                this.starts.add(textStart);
                this.ends.add(textEnd);
            }
        }
        if (this.starts.size() == 1 && this.oneSegmentIncludesAll) {
            this.starts.set(0, 0);
            this.ends.clear();
        }
        this.ends.add(lastPos);
        return this.starts.size();
    }

    int recalcPos(int pos, List<Integer> codePositions) {
        int numCodes = 0;
        for (Integer codePos : codePositions) {
            if (codePos >= pos) {
                return pos + numCodes * 2;
            }
            ++numCodes;
        }
        return pos + numCodes * 2;
    }

    int recalcPosBack(int pos, List<Integer> origCodePositions) {
        int numCodes = 0;
        for (Integer codePos : origCodePositions) {
            if (codePos >= pos) {
                return pos - numCodes * 2;
            }
            ++numCodes;
        }
        return pos - numCodes * 2;
    }

    List<Integer> storeCodePositions(String text) {
        ArrayList<Integer> res = new ArrayList<Integer>();
        int numCodes = 0;
        block3: for (int i = 0; i < text.length(); ++i) {
            switch (text.charAt(i)) {
                case '\ue101': 
                case '\ue102': 
                case '\ue103': {
                    res.add(i - numCodes * 2);
                    ++numCodes;
                    ++i;
                    continue block3;
                }
            }
        }
        return res;
    }

    List<Integer> storeOriginalCodePositions(String text) {
        ArrayList<Integer> res = new ArrayList<Integer>();
        block3: for (int i = 0; i < text.length(); ++i) {
            switch (text.charAt(i)) {
                case '\ue101': 
                case '\ue102': 
                case '\ue103': {
                    res.add(i++);
                    continue block3;
                }
            }
        }
        return res;
    }

    @Override
    public Range getNextSegmentRange(TextContainer container) {
        return null;
    }

    @Override
    public List<Integer> getSplitPositions() {
        if (this.finalSplits == null) {
            this.finalSplits = new ArrayList<Integer>();
        }
        return Collections.unmodifiableList(this.finalSplits);
    }

    @Override
    public List<Range> getRanges() {
        ArrayList<Range> list = new ArrayList<Range>();
        if (this.starts == null) {
            return null;
        }
        for (int i = 0; i < this.starts.size(); ++i) {
            list.add(new Range(this.starts.get(i), this.ends.get(i)));
        }
        return Collections.unmodifiableList(list);
    }

    @Override
    public LocaleId getLanguage() {
        return this.currentLanguageCode;
    }

    @Override
    public void setLanguage(LocaleId languageCode) {
        this.currentLanguageCode = languageCode;
        this.icuRegex.setLanguage(languageCode);
    }

    protected void setCascade(boolean value) {
        this.cascade = value;
    }

    protected void addRule(CompiledRule compiledRule) {
        this.rules.add(compiledRule);
    }

    protected void setMaskRule(String pattern) {
        this.maskRule = pattern != null && pattern.length() > 0 ? Pattern.compile(pattern) : null;
    }

    public ICURegex getICURegex() {
        return this.icuRegex;
    }

    @Override
    public void setSegmentSubFlows(boolean segmentSubFlows) {
        this.segmentSubFlows = segmentSubFlows;
    }

    @Override
    public void setIncludeStartCodes(boolean includeStartCodes) {
        this.includeStartCodes = includeStartCodes;
    }

    @Override
    public void setIncludeEndCodes(boolean includeEndCodes) {
        this.includeEndCodes = includeEndCodes;
    }

    @Override
    public void setIncludeIsolatedCodes(boolean includeIsolatedCodes) {
        this.includeIsolatedCodes = includeIsolatedCodes;
    }

    @Override
    public void setOneSegmentIncludesAll(boolean oneSegmentIncludesAll) {
        this.oneSegmentIncludesAll = oneSegmentIncludesAll;
    }

    @Override
    public void setTrimLeadingWS(boolean trimLeadingWS) {
        this.trimLeadingWS = trimLeadingWS;
    }

    @Override
    public void setTrimTrailingWS(boolean trimTrailingWS) {
        this.trimTrailingWS = trimTrailingWS;
    }

    @Override
    public void setTrimCodes(boolean trimCodes) {
        this.trimCodes = trimCodes;
    }
}

