/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.lib.segmentation;

import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.RuleBasedBreakIterator;
import com.ibm.icu.util.ULocale;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.exceptions.OkapiException;
import net.sf.okapi.lib.segmentation.CompiledRule;
import net.sf.okapi.lib.segmentation.Placeholder;
import net.sf.okapi.lib.segmentation.RuleInfo;

public class ICURegex {
    static final int GC_LEXEM = 700;
    static final int WB_LEXEM = 200;
    static final int PH_LEXEM = 710;
    private static final Pattern ICU_PATTERN = Pattern.compile("\\\\b|\\\\B|\\\\d|\\\\D|\\\\G|\\\\N\\{|\\\\s|\\\\S|\\\\U|\\\\w|\\\\W|\\\\x\\{|\\\\X|\\\\p\\{|\\\\P\\{");
    private static final Pattern icuPatternExtractor = Pattern.compile("\\\\x\\{([0-9A-Fa-f]{1,6})}-\\\\x\\{([0-9A-Fa-f]{1,6})}|\\\\U[0-9A-Fa-f]{8}-\\\\U[0-9A-Fa-f]{8}|(\\\\N|\\\\p|\\\\P)\\{.+?\\}|\\\\x\\{([0-9A-Fa-f]{1,6})}|\\\\U[0-9A-Fa-f]{8}");
    private static final Pattern gcPatternExtractor = Pattern.compile("\\\\X");
    private static final Pattern wbPatternExtractor = Pattern.compile("\\\\b|\\\\B");
    private RuleBasedBreakIterator wbIterator;
    private Map<LocaleId, RuleBasedBreakIterator> wbIterators = new TreeMap<LocaleId, RuleBasedBreakIterator>();
    private Map<String, RuleBasedBreakIterator> phIterators = new TreeMap<String, RuleBasedBreakIterator>();
    private List<RuleInfo> rules = new LinkedList<RuleInfo>();
    private Map<String, Placeholder> placeholders = new LinkedHashMap<String, Placeholder>();
    private String[] placeholderLookup;
    private Map<CompiledRule, RuleInfo> ruleInfoLookup;
    private LocaleId language;
    private boolean dirty;
    private Pattern phPattern;
    private String lastProcessedText;
    private boolean hasGraphemClusterPh;
    private boolean hasWordBoundaryPh;
    private boolean hasICURules;
    private Placeholder graphemeCluster;
    private List<Integer> boundaries = new ArrayList<Integer>();

    public ICURegex() {
        this.ruleInfoLookup = new HashMap<CompiledRule, RuleInfo>();
        this.reset();
    }

    public void reset() {
        this.rules.clear();
        this.placeholders.clear();
        this.graphemeCluster = Placeholder.createGraphemeCluster(700);
        this.setLanguage(null);
        this.boundaries.clear();
        this.ruleInfoLookup.clear();
        this.dirty = true;
        this.lastProcessedText = null;
        this.hasGraphemClusterPh = false;
        this.hasWordBoundaryPh = false;
        this.hasICURules = false;
    }

    public void setHasICURules(boolean hasICURules) {
        this.hasICURules = hasICURules;
    }

    public static boolean isICURule(String rule) {
        return ICU_PATTERN.matcher(rule).find();
    }

    protected void setLanguage(LocaleId language) {
        this.dirty = this.language != language;
        this.language = language;
    }

    public String processRule(String rule) {
        Matcher m = icuPatternExtractor.matcher(rule);
        while (m.find()) {
            String pattern = String.format("[%s]", m.group());
            Placeholder ph = this.placeholders.get(pattern);
            if (ph == null) {
                RuleBasedBreakIterator phIterator;
                if (this.phIterators.containsKey(pattern)) {
                    phIterator = this.phIterators.get(pattern);
                } else {
                    phIterator = Placeholder.createPhIterator(pattern, 710);
                    this.phIterators.put(pattern, phIterator);
                }
                ph = new Placeholder(this.placeholders.size(), phIterator, 710);
                this.placeholders.put(pattern, ph);
            }
            rule = rule.replace(m.group(), ph.toString());
            this.dirty = true;
        }
        this.hasGraphemClusterPh |= gcPatternExtractor.matcher(rule).find();
        rule = rule.replace("\b", "\\b");
        this.hasWordBoundaryPh |= wbPatternExtractor.matcher(rule).find();
        rule = rule.replace("\\b", Character.toString('\ue011'));
        rule = rule.replace("\\B", Character.toString('\ue012'));
        rule = rule.replace("\\X", Character.toString('\ue010'));
        rule = rule.replace("\\d", "\\p{Nd}");
        rule = rule.replace("\\D", "\\P{Nd}");
        rule = rule.replace("\\w", "[\\p{Ll}\\p{Lu}\\p{Lt}\\p{Lo}\\p{Nd}]");
        rule = rule.replace("\\W", "[^\\p{Ll}\\p{Lu}\\p{Lt}\\p{Lo}\\p{Nd}]");
        rule = rule.replace("\\s", "[\\t\\n\\f\\r\\p{Z}\u200b]");
        rule = rule.replace("\\S", "[^\\t\\n\\f\\r\\p{Z}\u200b]");
        return rule;
    }

    private boolean containsPlaceholder(String rule) {
        return this.phPattern != null && this.phPattern.matcher(rule).find();
    }

    public void processText(String codedText, List<CompiledRule> compRules) {
        if (!this.hasICURules) {
            return;
        }
        if (this.dirty) {
            if (this.hasWordBoundaryPh) {
                if (this.wbIterators.containsKey(this.language)) {
                    this.wbIterator = this.wbIterators.get(this.language);
                } else {
                    this.wbIterator = (RuleBasedBreakIterator)BreakIterator.getWordInstance(ULocale.createCanonical(this.language.toString()));
                    RuleBasedBreakIterator.registerInstance((BreakIterator)this.wbIterator, ULocale.createCanonical(this.language.toString()), 1);
                    this.wbIterators.put(this.language, this.wbIterator);
                }
            }
            for (int i = 0; i < compRules.size(); ++i) {
                CompiledRule compRule = compRules.get(i);
                String rule = compRule.pattern.pattern();
                this.rules.add(new RuleInfo(rule));
            }
            if (this.placeholders.size() > 0) {
                this.phPattern = this.placeholders.size() == 1 ? Pattern.compile(String.format("[\\u%04X]", 57376)) : Pattern.compile(String.format("[\\u%04X-\\u%04X]", 57376, 57376 + this.placeholders.size() - 1));
            }
            this.placeholderLookup = this.placeholders.keySet().toArray(new String[this.placeholders.size()]);
            this.dirty = false;
        }
        if (Util.isEmpty(codedText)) {
            return;
        }
        if (codedText.equals(this.lastProcessedText)) {
            return;
        }
        this.boundaries.clear();
        int start = 0;
        int end = 0;
        if (this.hasWordBoundaryPh) {
            start = 0;
            end = 0;
            this.wbIterator.setText(codedText);
            end = start = this.wbIterator.first();
            while ((end = this.wbIterator.next()) != -1 && start < end) {
                int areaId = this.wbIterator.getRuleStatus();
                if (areaId == 200) {
                    this.boundaries.add(start);
                    this.boundaries.add(end);
                }
                start = end;
            }
        }
        if (this.hasGraphemClusterPh) {
            this.graphemeCluster.processText(codedText);
        }
        for (Placeholder ph : this.placeholders.values()) {
            ph.processText(codedText);
        }
        if (this.rules.size() != compRules.size()) {
            throw new OkapiException("Internal rules desynchronized");
        }
        this.ruleInfoLookup.clear();
        for (int i = 0; i < this.rules.size(); ++i) {
            String chars;
            RuleInfo ruleInfo = this.rules.get(i);
            String rule = ruleInfo.getRule();
            if (this.hasGraphemClusterPh && !Util.isEmpty(chars = this.graphemeCluster.getChars())) {
                rule = rule.replace(this.graphemeCluster.toString(), String.format("[%s]+", chars));
            }
            if (this.containsPlaceholder(rule)) {
                for (int phIndex = 0; phIndex < this.placeholders.size(); ++phIndex) {
                    Placeholder ph = this.placeholders.get(this.placeholderLookup[phIndex]);
                    String chars2 = ph.getChars();
                    if (Util.isEmpty(chars2)) continue;
                    rule = this.resolvePlaceholder(rule, ruleInfo, ph, chars2);
                }
            }
            CompiledRule oldRule = compRules.get(i);
            CompiledRule newRule = new CompiledRule(rule, oldRule.isBreak);
            compRules.set(i, newRule);
            this.ruleInfoLookup.put(newRule, ruleInfo);
        }
        this.lastProcessedText = codedText;
    }

    private String resolvePlaceholder(String rule, RuleInfo ruleInfo, Placeholder ph, String chars) {
        StringBuilder sb = new StringBuilder();
        int start = 0;
        Matcher m = ph.getPhPattern().matcher(rule);
        while (m.find()) {
            String st = ruleInfo.isSetArea(m.start()) ? chars : String.format("[%s]", chars);
            sb.append(rule.substring(start, m.start()));
            sb.append(st);
            start = m.end();
        }
        if (start < rule.length()) {
            sb.append(rule.substring(start));
        }
        return sb.toString();
    }

    public boolean verifyPos(int pos, CompiledRule rule, Matcher matcher, List<Integer> boundaries) {
        RuleInfo info = this.ruleInfoLookup.get(rule);
        if (info == null) {
            return true;
        }
        return info.verifyPos(pos, matcher, boundaries);
    }

    public List<Integer> getBoundaries() {
        return this.boundaries;
    }
}

