/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.lib.segmentation;

import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.okapi.common.ISegmenter;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Range;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextUnitUtil;
import net.sf.okapi.lib.segmentation.CompiledRule;
import net.sf.okapi.lib.segmentation.ICURegex;
import net.sf.okapi.lib.segmentation.SegmentationRuleException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SRXSegmenter
implements ISegmenter {
    private static final String ISOLATED_CODE_REPLACEMENT_TEXT = " ";
    private static final int CODE_MARKER_LENGTH = 2;
    private final Logger LOGGER = LoggerFactory.getLogger(this.getClass());
    private boolean segmentSubFlows;
    private boolean cascade;
    private boolean includeStartCodes;
    private boolean includeEndCodes;
    private boolean includeIsolatedCodes;
    private LocaleId currentLanguageCode;
    private boolean oneSegmentIncludesAll;
    private boolean trimLeadingWS;
    private boolean trimTrailingWS;
    private boolean useJavaRegex;
    private boolean trimCodes;
    private boolean treatIsolatedCodesAsWhitespace;
    private ArrayList<CompiledRule> rules;
    private Pattern maskRule;
    private TreeMap<Integer, Boolean> splits;
    private List<Integer> finalSplits;
    private ArrayList<Integer> starts;
    private ArrayList<Integer> ends;
    private ICURegex icuRegex = new ICURegex();

    public SRXSegmenter() {
        this.reset();
    }

    @Override
    public void reset() {
        this.currentLanguageCode = null;
        this.rules = new ArrayList();
        this.maskRule = null;
        this.splits = null;
        this.segmentSubFlows = true;
        this.cascade = false;
        this.includeStartCodes = false;
        this.includeEndCodes = true;
        this.includeIsolatedCodes = false;
        this.oneSegmentIncludesAll = false;
        this.trimLeadingWS = false;
        this.trimTrailingWS = false;
        this.useJavaRegex = true;
        this.trimCodes = false;
        this.treatIsolatedCodesAsWhitespace = false;
        this.icuRegex.reset();
    }

    public void setOptions(boolean segmentSubFlows, boolean includeStartCodes, boolean includeEndCodes, boolean includeIsolatedCodes, boolean oneSegmentIncludesAll, boolean trimLeadingWS, boolean trimTrailingWS, boolean useJavaRegex, boolean treatIsolatedCodesAsWhitespace) {
        this.segmentSubFlows = segmentSubFlows;
        this.includeStartCodes = includeStartCodes;
        this.includeEndCodes = includeEndCodes;
        this.includeIsolatedCodes = includeIsolatedCodes;
        this.oneSegmentIncludesAll = oneSegmentIncludesAll;
        this.trimLeadingWS = trimLeadingWS;
        this.trimTrailingWS = trimTrailingWS;
        this.useJavaRegex = useJavaRegex;
        this.treatIsolatedCodesAsWhitespace = treatIsolatedCodesAsWhitespace;
        if (!useJavaRegex) {
            this.LOGGER.warn("Use of ICU regex is deprecated and may be removed in the future.");
        }
    }

    @Override
    public void setOptions(boolean segmentSubFlows, boolean includeStartCodes, boolean includeEndCodes, boolean includeIsolatedCodes, boolean oneSegmentIncludesAll, boolean trimLeadingWS, boolean trimTrailingWS) {
        this.segmentSubFlows = segmentSubFlows;
        this.includeStartCodes = includeStartCodes;
        this.includeEndCodes = includeEndCodes;
        this.includeIsolatedCodes = includeIsolatedCodes;
        this.oneSegmentIncludesAll = oneSegmentIncludesAll;
        this.trimLeadingWS = trimLeadingWS;
        this.trimTrailingWS = trimTrailingWS;
    }

    @Override
    public boolean oneSegmentIncludesAll() {
        return this.oneSegmentIncludesAll;
    }

    @Override
    public boolean segmentSubFlows() {
        return this.segmentSubFlows;
    }

    public boolean cascade() {
        return this.cascade;
    }

    @Override
    public boolean trimLeadingWhitespaces() {
        return this.trimLeadingWS;
    }

    @Override
    public boolean trimTrailingWhitespaces() {
        return this.trimTrailingWS;
    }

    public boolean useJavaRegex() {
        return this.useJavaRegex;
    }

    @Override
    public boolean treatIsolatedCodesAsWhitespace() {
        return this.treatIsolatedCodesAsWhitespace;
    }

    public void setUseJavaRegex(boolean useJavaRegex) {
        this.useJavaRegex = useJavaRegex;
        if (!useJavaRegex) {
            this.LOGGER.warn("Use of ICU regex is deprecated and may be removed in the future.");
        }
    }

    @Override
    public boolean includeStartCodes() {
        return this.includeStartCodes;
    }

    @Override
    public boolean includeEndCodes() {
        return this.includeEndCodes;
    }

    @Override
    public boolean includeIsolatedCodes() {
        return this.includeIsolatedCodes;
    }

    @Override
    public int computeSegments(String text) {
        TextContainer tmp = new TextContainer(text);
        return this.computeSegments(tmp);
    }

    @Override
    public int computeSegments(TextContainer container) {
        int trimmedTextStart;
        Matcher m;
        if (this.currentLanguageCode == null) {
            throw new SegmentationRuleException("No language defined for the segmenter.");
        }
        boolean hasCode = container.contentIsOneSegment() ? container.getSegments().getFirstContent().hasCode() : container.getUnSegmentedContentCopy().hasCode();
        boolean isSCWS = this.trimCodes && !this.includeStartCodes;
        boolean isECWS = this.trimCodes && !this.includeEndCodes;
        boolean isICWS = this.trimCodes && !this.includeIsolatedCodes;
        String codedText = container.getCodedText();
        ArrayList<Integer> boundaries = null;
        List<Integer> origCodePositions = this.storeOriginalCodePositions(codedText);
        if (!this.useJavaRegex) {
            this.icuRegex.processText(codedText, this.rules);
            List<Integer> oldBoundaries = this.icuRegex.getBoundaries();
            boundaries = new ArrayList<Integer>();
            for (Integer oldBoundary : oldBoundaries) {
                boundaries.add(this.recalcPosBack(codedText, oldBoundary, origCodePositions));
            }
        }
        List<Integer> codePositions = this.storeCodePositions(codedText);
        codedText = this.treatIsolatedCodesAsWhitespace ? TextUnitUtil.removeAndReplaceCodes(codedText, ISOLATED_CODE_REPLACEMENT_TEXT) : TextUnitUtil.removeCodes(codedText);
        this.splits = new TreeMap();
        for (CompiledRule rule : this.rules) {
            m = rule.pattern.matcher(codedText);
            m.useTransparentBounds(true);
            int start = 0;
            int prevStart = -1;
            while (start != prevStart && m.find(start)) {
                int n = m.start() + m.group(1).length();
                prevStart = start;
                start = n;
                if (n > codedText.length() || this.splits.containsKey(n) || !this.useJavaRegex && !this.icuRegex.verifyPos(n, rule, m, boundaries)) continue;
                this.splits.put(n, rule.isBreak);
            }
        }
        codedText = container.getCodedText();
        TreeMap<Integer, Boolean> oldSplits = this.splits;
        this.splits = new TreeMap();
        for (Integer pos : oldSplits.keySet()) {
            int newPos = this.recalcPos(codedText, pos, codePositions, origCodePositions);
            this.splits.put(newPos, oldSplits.get(pos));
        }
        if (this.maskRule != null) {
            m = this.maskRule.matcher(codedText);
            while (m.find()) {
                for (int n = m.start(); n < m.end(); ++n) {
                    if (!this.splits.containsKey(n)) continue;
                    this.splits.remove(n);
                }
                if (m.start() > 0) {
                    this.splits.put(m.start(), true);
                }
                this.splits.put(m.end(), true);
            }
        }
        this.finalSplits = new ArrayList<Integer>();
        if (hasCode) {
            EnumSet<TextFragment.Marker> includeCodeSettings = EnumSet.noneOf(TextFragment.Marker.class);
            if (this.includeStartCodes) {
                includeCodeSettings.add(TextFragment.Marker.OPENING);
            }
            if (this.includeEndCodes) {
                includeCodeSettings.add(TextFragment.Marker.CLOSING);
            }
            if (this.includeIsolatedCodes) {
                includeCodeSettings.add(TextFragment.Marker.ISOLATED);
            }
            for (int pos : this.splits.keySet()) {
                if (!this.splits.get(pos).booleanValue() || pos >= codedText.length()) continue;
                TextFragment.Marker codeMarkerType = TextFragment.Marker.asEnum(codedText.charAt(pos));
                switch (codeMarkerType) {
                    case OPENING: 
                    case CLOSING: 
                    case ISOLATED: {
                        if (!includeCodeSettings.contains((Object)codeMarkerType)) break;
                        while ((pos += 2) < codedText.length() - 1 && includeCodeSettings.contains((Object)TextFragment.Marker.asEnum(codedText.charAt(pos)))) {
                        }
                        break;
                    }
                }
                this.finalSplits.add(pos);
            }
        } else {
            for (int pos : this.splits.keySet()) {
                if (!this.splits.get(pos).booleanValue()) continue;
                this.finalSplits.add(pos);
            }
        }
        this.starts = new ArrayList();
        this.ends = new ArrayList();
        int textStart = 0;
        for (int pos : this.finalSplits) {
            int textEnd;
            if (pos >= codedText.length() || (trimmedTextStart = TextFragment.indexOfFirstNonWhitespace(codedText, textStart, pos - 1, isSCWS, isECWS, isICWS, this.trimLeadingWS)) == -1) continue;
            if (this.trimLeadingWS || this.trimCodes) {
                textStart = trimmedTextStart;
            }
            if ((textEnd = this.trimTrailingWS || this.trimCodes ? TextFragment.indexOfLastNonWhitespace(codedText, pos - 1, 0, isSCWS, isECWS, isICWS, this.trimTrailingWS) : pos - 1) >= textStart) {
                if (textEnd < pos) {
                    ++textEnd;
                }
                this.starts.add(textStart);
                this.ends.add(textEnd);
            }
            textStart = pos;
        }
        int lastPos = codedText.length();
        if (textStart < lastPos) {
            int textEnd;
            trimmedTextStart = TextFragment.indexOfFirstNonWhitespace(codedText, textStart, lastPos - 1, isSCWS, isECWS, isICWS, this.trimLeadingWS);
            if ((this.trimLeadingWS || this.trimCodes) && trimmedTextStart != -1) {
                textStart = trimmedTextStart;
            }
            if (trimmedTextStart != -1 && trimmedTextStart < lastPos && (textEnd = this.trimTrailingWS || this.trimCodes ? TextFragment.indexOfLastNonWhitespace(codedText, lastPos - 1, textStart, isSCWS, isECWS, isICWS, this.trimTrailingWS) : lastPos - 1) >= textStart) {
                if (textEnd < lastPos) {
                    ++textEnd;
                }
                this.starts.add(textStart);
                this.ends.add(textEnd);
            }
        }
        if (this.starts.size() == 1 && this.oneSegmentIncludesAll) {
            this.starts.set(0, 0);
            this.ends.clear();
        }
        this.ends.add(lastPos);
        return this.starts.size();
    }

    private int calculatePosition(int position, int numberOfNonIsolatedCodes, int numberOfIsolatedCodes, boolean increase) {
        int nonIsolatedCodesLength = numberOfNonIsolatedCodes * 2;
        int isolatedCodesLength = numberOfIsolatedCodes * (this.treatIsolatedCodesAsWhitespace() ? ISOLATED_CODE_REPLACEMENT_TEXT.length() : 2);
        return increase ? position + nonIsolatedCodesLength + isolatedCodesLength : position - nonIsolatedCodesLength - isolatedCodesLength;
    }

    private int calculateIncreasedPosition(int position, int numberOfNonIsolatedCodes, int numberOfIsolatedCodes) {
        return this.calculatePosition(position, numberOfNonIsolatedCodes, numberOfIsolatedCodes, true);
    }

    private int calculateDecreasedPosition(int position, int numberOfNonIsolatedCodes, int numberOfIsolatedCodes) {
        return this.calculatePosition(position, numberOfNonIsolatedCodes, numberOfIsolatedCodes, false);
    }

    int recalcPos(String codedText, int pos, List<Integer> codePositions, List<Integer> origCodePositions) {
        int numberOfNonIsolatedCodes = 0;
        int numberOfIsolatedCodes = 0;
        block4: for (int codeIndex = 0; codeIndex < codePositions.size(); ++codeIndex) {
            if (codePositions.get(codeIndex) >= pos) {
                return this.calculateIncreasedPosition(pos, numberOfNonIsolatedCodes, numberOfIsolatedCodes);
            }
            switch (TextFragment.Marker.asEnum(codedText.charAt(origCodePositions.get(codeIndex)))) {
                case OPENING: 
                case CLOSING: {
                    ++numberOfNonIsolatedCodes;
                    continue block4;
                }
                case ISOLATED: {
                    ++numberOfIsolatedCodes;
                    continue block4;
                }
            }
        }
        return this.calculateIncreasedPosition(pos, numberOfNonIsolatedCodes, numberOfIsolatedCodes);
    }

    int recalcPosBack(String codedText, int pos, List<Integer> origCodePositions) {
        int numberOfNonIsolatedCodes = 0;
        int numberOfIsolatedCodes = 0;
        for (Integer origCodePosition : origCodePositions) {
            if (origCodePosition >= pos) {
                return this.calculateDecreasedPosition(pos, numberOfNonIsolatedCodes, numberOfIsolatedCodes);
            }
            switch (TextFragment.Marker.asEnum(codedText.charAt(origCodePosition))) {
                case OPENING: 
                case CLOSING: {
                    ++numberOfNonIsolatedCodes;
                    break;
                }
                case ISOLATED: {
                    ++numberOfIsolatedCodes;
                    break;
                }
            }
        }
        return this.calculateDecreasedPosition(pos, numberOfNonIsolatedCodes, numberOfIsolatedCodes);
    }

    List<Integer> storeCodePositions(String text) {
        ArrayList<Integer> res = new ArrayList<Integer>();
        int numberOfNonIsolatedCodes = 0;
        int numberOfIsolatedCodes = 0;
        block4: for (int i = 0; i < text.length(); ++i) {
            switch (TextFragment.Marker.asEnum(text.charAt(i))) {
                case OPENING: 
                case CLOSING: {
                    res.add(this.calculateDecreasedPosition(i, numberOfNonIsolatedCodes, numberOfIsolatedCodes));
                    ++numberOfNonIsolatedCodes;
                    ++i;
                    continue block4;
                }
                case ISOLATED: {
                    res.add(this.calculateDecreasedPosition(i, numberOfNonIsolatedCodes, numberOfIsolatedCodes));
                    ++numberOfIsolatedCodes;
                    ++i;
                    continue block4;
                }
            }
        }
        return res;
    }

    List<Integer> storeOriginalCodePositions(String text) {
        ArrayList<Integer> res = new ArrayList<Integer>();
        block3: for (int i = 0; i < text.length(); ++i) {
            switch (text.charAt(i)) {
                case '\ue101': 
                case '\ue102': 
                case '\ue103': {
                    res.add(i++);
                    continue block3;
                }
            }
        }
        return res;
    }

    @Override
    public Range getNextSegmentRange(TextContainer container) {
        return null;
    }

    @Override
    public List<Integer> getSplitPositions() {
        if (this.finalSplits == null) {
            this.finalSplits = new ArrayList<Integer>();
        }
        return Collections.unmodifiableList(this.finalSplits);
    }

    @Override
    public List<Range> getRanges() {
        ArrayList<Range> list = new ArrayList<Range>();
        if (this.starts == null) {
            return null;
        }
        for (int i = 0; i < this.starts.size(); ++i) {
            list.add(new Range(this.starts.get(i), this.ends.get(i)));
        }
        return Collections.unmodifiableList(list);
    }

    @Override
    public LocaleId getLanguage() {
        return this.currentLanguageCode;
    }

    @Override
    public void setLanguage(LocaleId languageCode) {
        this.currentLanguageCode = languageCode;
        this.icuRegex.setLanguage(languageCode);
    }

    protected void setCascade(boolean value) {
        this.cascade = value;
    }

    protected void addRule(CompiledRule compiledRule) {
        this.rules.add(compiledRule);
    }

    protected void setMaskRule(String pattern) {
        this.maskRule = pattern != null && pattern.length() > 0 ? Pattern.compile(pattern, 256) : null;
    }

    public ICURegex getICURegex() {
        return this.icuRegex;
    }

    @Override
    public void setSegmentSubFlows(boolean segmentSubFlows) {
        this.segmentSubFlows = segmentSubFlows;
    }

    @Override
    public void setIncludeStartCodes(boolean includeStartCodes) {
        this.includeStartCodes = includeStartCodes;
    }

    @Override
    public void setIncludeEndCodes(boolean includeEndCodes) {
        this.includeEndCodes = includeEndCodes;
    }

    @Override
    public void setIncludeIsolatedCodes(boolean includeIsolatedCodes) {
        this.includeIsolatedCodes = includeIsolatedCodes;
    }

    @Override
    public void setOneSegmentIncludesAll(boolean oneSegmentIncludesAll) {
        this.oneSegmentIncludesAll = oneSegmentIncludesAll;
    }

    @Override
    public void setTrimLeadingWS(boolean trimLeadingWS) {
        this.trimLeadingWS = trimLeadingWS;
    }

    @Override
    public void setTrimTrailingWS(boolean trimTrailingWS) {
        this.trimTrailingWS = trimTrailingWS;
    }

    @Override
    public void setTrimCodes(boolean trimCodes) {
        this.trimCodes = trimCodes;
    }

    @Override
    public void setTreatIsolatedCodesAsWhitespace(boolean treatIsolatedCodesAsWhitespace) {
        this.treatIsolatedCodesAsWhitespace = treatIsolatedCodesAsWhitespace;
    }
}

