/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.steps.tokenization;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.ListUtil;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Range;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextUnitUtil;
import net.sf.okapi.lib.extra.steps.AbstractPipelineStep;
import net.sf.okapi.steps.tokenization.Parameters;
import net.sf.okapi.steps.tokenization.common.Config;
import net.sf.okapi.steps.tokenization.common.ILexer;
import net.sf.okapi.steps.tokenization.common.Lexem;
import net.sf.okapi.steps.tokenization.common.Lexems;
import net.sf.okapi.steps.tokenization.common.LexerRule;
import net.sf.okapi.steps.tokenization.common.LexerRules;
import net.sf.okapi.steps.tokenization.common.StructureParameters;
import net.sf.okapi.steps.tokenization.common.StructureParametersItem;
import net.sf.okapi.steps.tokenization.common.Token;
import net.sf.okapi.steps.tokenization.common.TokensAnnotation;
import net.sf.okapi.steps.tokenization.tokens.Tokens;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@UsingParameters(value=Parameters.class)
public class TokenizationStep
extends AbstractPipelineStep {
    private final Logger logger = LoggerFactory.getLogger(this.getClass());
    public static final int RAWTEXT = -1;
    private Parameters params;
    private StructureParameters structureParams;
    private Config config = new Config();
    private List<ILexer> lexers = new ArrayList<ILexer>();
    private List<ILexer> serviceLexers = new ArrayList<ILexer>();
    private boolean allowNewRawText = true;
    private List<LexerRule> idleRules = new ArrayList<LexerRule>();
    private ArrayList<Integer> positions = new ArrayList();
    private LinkedList<Lexem> rawtextLexems = new LinkedList();

    public TokenizationStep() {
        this.setName("Tokenization");
        this.setDescription("Extracts tokens from the text units content of a document. Expects: filter events. Sends back: filter events.");
        this.setConfiguration(this.getClass(), "config.tprm");
    }

    public void setConfiguration(Class<?> classRef, String configLocation) {
        if (this.config == null) {
            this.config = new Config();
        }
        if (this.config == null) {
            return;
        }
        this.config.loadFromResource(classRef, configLocation);
        this.structureParams = new StructureParameters();
        if (this.structureParams == null) {
            return;
        }
        String structureLocation = this.config.getEngineConfig();
        if (Util.isEmpty(structureLocation) || !this.structureParams.loadFromResource(classRef, structureLocation)) {
            this.logger.debug("Lexers' config file not found.");
        }
        this.instantiateLexers();
        this.setParameters(new Parameters());
    }

    private void instantiateLexers() {
        if (this.lexers == null) {
            return;
        }
        this.lexers.clear();
        this.serviceLexers.clear();
        for (StructureParametersItem item : this.structureParams.getItems()) {
            try {
                LexerRules lexerRules;
                ILexer lexer;
                if (item == null || !item.isEnabled() || (lexer = (ILexer)Class.forName(item.getLexerClass()).newInstance()) == null || (lexerRules = lexer.getRules()) != null && !lexerRules.loadFromResource(lexer.getClass(), item.getRulesLocation())) continue;
                lexer.init();
                if (lexerRules == null) {
                    this.serviceLexers.add(lexer);
                    continue;
                }
                if (lexerRules.hasOutTokens()) {
                    this.lexers.add(lexer);
                    continue;
                }
                this.serviceLexers.add(lexer);
            }
            catch (ClassNotFoundException e) {
                this.logger.debug("Lexer instantiation falied: {}", (Object)e.getMessage());
            }
            catch (InstantiationException e) {
                this.logger.debug("Lexer instantiation falied: {}", (Object)e.getMessage());
            }
            catch (IllegalAccessException e) {
                this.logger.debug("Lexer instantiation falied: {}", (Object)e.getMessage());
            }
        }
    }

    @Override
    protected void component_init() {
        this.updateParameters();
        this.setFilters();
    }

    private void updateParameters() {
        this.params = this.getParameters(Parameters.class);
    }

    @Override
    public boolean exec(Object sender, String command, Object info) {
        if (super.exec(sender, command, info)) {
            return true;
        }
        if (command.equalsIgnoreCase("parameters_changed")) {
            this.updateParameters();
            this.setFilters();
            return true;
        }
        return false;
    }

    private void setFilters() {
        if (this.params == null) {
            return;
        }
        if (this.idleRules == null) {
            return;
        }
        this.idleRules.clear();
        for (ILexer lexer : this.lexers) {
            if (lexer == null || lexer.getRules() == null) continue;
            for (LexerRule rule : lexer.getRules()) {
                if (!rule.isEnabled()) {
                    this.idleRules.add(rule);
                }
                if (rule.getInTokenIDs().size() != 0 || rule.getOutTokenIDs().size() != 0 || rule.getUserTokenIDs().size() != 0) continue;
                this.idleRules.add(rule);
            }
        }
    }

    @Override
    protected Event handleTextUnit(Event event) {
        if ((event = super.handleTextUnit(event)) == null) {
            return event;
        }
        ITextUnit tu = event.getTextUnit();
        if (tu == null) {
            return event;
        }
        if (tu.isEmpty()) {
            return event;
        }
        if (!tu.isTranslatable()) {
            return event;
        }
        if (this.params.tokenizeSource) {
            this.tokenizeSource(tu);
        }
        if (this.params.tokenizeTargets) {
            this.tokenizeTargets(tu);
        }
        return event;
    }

    private void processLexem(Lexem lexem, ILexer lexer, LocaleId language, Tokens tokens, int textShift) {
        if (lexem == null) {
            return;
        }
        if (lexem.getId() == -1) {
            if (this.allowNewRawText) {
                this.rawtextLexems.add(lexem);
            }
            return;
        }
        LexerRules rules = lexer.getRules();
        LexerRule rule = rules.getRule(lexem.getId());
        if (rule == null) {
            return;
        }
        if (this.idleRules.contains(rule)) {
            return;
        }
        if (!rule.supportsLanguage(language)) {
            return;
        }
        lexem.setLexerId(this.lexers.indexOf(lexer) + 1);
        for (int tokenId : rule.getOutTokenIDs()) {
            if (textShift > 0) {
                Range r = lexem.getRange();
                r.start += textShift;
                r.end += textShift;
            }
            Token token = new Token(tokenId, lexem, 100);
            tokens.add(token);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void runLexers(List<ILexer> lexers, String text, LocaleId language, Tokens tokens, int textShift) {
        for (ILexer lexer : lexers) {
            if (lexer == null) continue;
            Lexems lexems = lexer.process(text, language, tokens);
            if (lexems != null) {
                for (Lexem lexem : lexems) {
                    this.processLexem(lexem, lexer, language, tokens, 0);
                }
            }
            lexer.open(text, language, tokens);
            try {
                while (lexer.hasNext()) {
                    this.processLexem(lexer.next(), lexer, language, tokens, textShift);
                }
            }
            finally {
                lexer.close();
            }
        }
    }

    private Tokens tokenize(TextContainer tc, LocaleId language) {
        if (tc == null) {
            return null;
        }
        if (Util.isNullOrEmpty(language)) {
            return null;
        }
        if (!this.params.supportsLanguage(language)) {
            return null;
        }
        if (this.positions == null) {
            return null;
        }
        this.positions.clear();
        Tokens tokens = new Tokens();
        Tokens tempTokens = new Tokens();
        int textShift = 0;
        this.rawtextLexems.clear();
        String text = tc.contentIsOneSegment() ? TextUnitUtil.getText(tc.getFirstContent(), this.positions) : TextUnitUtil.getText(tc.getUnSegmentedContentCopy(), this.positions);
        this.allowNewRawText = true;
        this.runLexers(this.lexers, text, language, tokens, textShift);
        this.runLexers(this.serviceLexers, text, language, tokens, textShift);
        this.allowNewRawText = false;
        if (this.rawtextLexems.size() > 0) {
            int saveNumRawtextLexems = 0;
            while (this.rawtextLexems.size() > 0) {
                if (saveNumRawtextLexems > 0 && this.rawtextLexems.size() >= saveNumRawtextLexems) {
                    if (this.rawtextLexems.size() == saveNumRawtextLexems) {
                        this.logger.debug("RAWTEXT lexems are not processed in tokenize()");
                        break;
                    }
                    this.logger.debug("RAWTEXT lexems are creating a chain reaction in tokenize()");
                    break;
                }
                tempTokens.clear();
                saveNumRawtextLexems = this.rawtextLexems.size();
                Lexem lexem = this.rawtextLexems.poll();
                text = lexem.getValue();
                textShift = lexem.getRange().start;
                this.runLexers(this.lexers, text, language, tempTokens, textShift);
                tempTokens.setImmutable(true);
                tokens.addAll(tempTokens);
            }
            this.runLexers(this.serviceLexers, text, language, tokens, 0);
        }
        if (tokens != null) {
            tokens.fixRanges(this.positions);
        }
        return tokens.getFilteredList(ListUtil.stringListAsArray(this.params.getTokenNames()));
    }

    private void tokenizeSource(ITextUnit tu) {
        if (tu == null) {
            return;
        }
        Tokens tokens = this.tokenize(tu.getSource(), this.getSourceLocale());
        if (tokens == null) {
            return;
        }
        TokensAnnotation ta = TextUnitUtil.getSourceAnnotation(tu, TokensAnnotation.class);
        if (ta == null) {
            TextUnitUtil.setSourceAnnotation(tu, new TokensAnnotation(tokens));
        } else {
            ta.addTokens(tokens);
        }
    }

    private void tokenizeTargets(ITextUnit tu) {
        if (tu == null) {
            return;
        }
        for (LocaleId language : tu.getTargetLocales()) {
            Tokens tokens = this.tokenize(tu.getTarget(language), language);
            if (tokens == null) continue;
            TokensAnnotation ta = TextUnitUtil.getTargetAnnotation(tu, language, TokensAnnotation.class);
            if (ta == null) {
                TextUnitUtil.setTargetAnnotation(tu, language, new TokensAnnotation(tokens));
                continue;
            }
            ta.addTokens(tokens);
        }
    }

    public List<LexerRule> getIdleRules() {
        return this.idleRules;
    }

    public void setLexers(List<ILexer> lexers) {
        this.lexers = lexers;
    }

    public List<ILexer> getLexers() {
        return this.lexers;
    }

    public String getConfigInfo() {
        if (this.config == null) {
            return "";
        }
        return this.config.getEngineConfig();
    }
}

