/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.filters.html;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.EmptyStackException;
import java.util.logging.Level;
import java.util.logging.Logger;
import net.htmlparser.jericho.Attribute;
import net.htmlparser.jericho.EndTag;
import net.htmlparser.jericho.Segment;
import net.htmlparser.jericho.StartTag;
import net.htmlparser.jericho.StartTagType;
import net.htmlparser.jericho.Tag;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.exceptions.OkapiIOException;
import net.sf.okapi.common.filters.FilterConfiguration;
import net.sf.okapi.common.filters.PropertyTextUnitPlaceholder;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.filters.abstractmarkup.AbstractMarkupEventBuilder;
import net.sf.okapi.filters.abstractmarkup.AbstractMarkupFilter;
import net.sf.okapi.filters.abstractmarkup.ExtractionRuleState;
import net.sf.okapi.filters.html.Parameters;
import net.sf.okapi.filters.html.StreamedSourceCopy;
import net.sf.okapi.filters.yaml.TaggedFilterConfiguration;

@UsingParameters(value=Parameters.class)
public class HtmlFilter
extends AbstractMarkupFilter {
    private static final Logger LOGGER = Logger.getLogger(HtmlFilter.class.getName());
    private Parameters parameters;
    private RawDocument tempSourceInput;

    public HtmlFilter() {
        this.setMimeType("text/html");
        this.setFilterWriter(this.createFilterWriter());
        this.setParameters(new Parameters());
        this.setName("okf_html");
        this.setDisplayName("HTML/XHTML Filter");
        this.addConfiguration(new FilterConfiguration(this.getName(), "text/html", this.getClass().getName(), "HTML", "HTML or XHTML documents", "nonwellformedConfiguration.yml", ".html;.htm;"));
        this.addConfiguration(new FilterConfiguration(this.getName() + "-wellFormed", "text/xhtml", this.getClass().getName(), "HTML (Well-Formed)", "XHTML and well-formed HTML documents", "wellformedConfiguration.yml"));
    }

    @Override
    public void open(RawDocument input, boolean generateSkeleton) {
        String encoding = this.detectEncoding(input);
        this.setCurrentDocName(input.getInputURI() == null ? "" : input.getInputURI().getPath());
        try {
            this.tempSourceInput = StreamedSourceCopy.htmlTidiedRewrite(input, this.isDocumentEncoding(), encoding, this.isBOM());
        }
        catch (IOException e) {
            throw new OkapiIOException("Error generating tidied source temp file", e);
        }
        super.open(this.tempSourceInput, generateSkeleton);
    }

    @Override
    protected void startFilter() {
        super.startFilter();
        if (!this.getConfig().isGlobalPreserveWhitespace()) {
            LOGGER.log(Level.FINE, "By default the HTML filter will collapse whitespace unless overridden in the configuration");
        }
        this.getEventBuilder().initializeCodeFinder(this.getConfig().isUseCodeFinder(), this.getConfig().getCodeFinderRules());
    }

    @Override
    protected void endFilter() {
        super.endFilter();
        if (this.tempSourceInput != null) {
            this.tempSourceInput.close();
            boolean success = new File(this.tempSourceInput.getInputURI()).delete();
            if (!success) {
                new File(this.tempSourceInput.getInputURI()).deleteOnExit();
            }
        }
    }

    @Override
    protected void preProcess(Segment segment) {
        super.preProcess(segment);
        if (this.getConfig().isWellformed()) {
            return;
        }
        if (segment instanceof Tag) {
            Tag tag = (Tag)segment;
            boolean inlineTag = false;
            if (this.getConfig().getElementRuleType(tag.getName()) == TaggedFilterConfiguration.RULE_TYPE.INLINE_ELEMENT || this.getEventBuilder().isInsideTextRun() && (tag.getTagType() == StartTagType.COMMENT || tag.getTagType() == StartTagType.XML_PROCESSING_INSTRUCTION)) {
                inlineTag = true;
            }
            if (this.getEventBuilder().isCurrentTextUnit() && !inlineTag) {
                this.getEventBuilder().endTextUnit();
            }
        }
    }

    @Override
    protected TaggedFilterConfiguration.RULE_TYPE updateEndTagRuleState(EndTag endTag) {
        TaggedFilterConfiguration.RULE_TYPE ruleType = this.getConfig().getElementRuleType(endTag.getName());
        ExtractionRuleState.RuleType currentState = null;
        switch (ruleType) {
            case INLINE_ELEMENT: {
                try {
                    currentState = this.getRuleState().popInlineRule();
                    ruleType = currentState.ruleType;
                }
                catch (EmptyStackException e) {}
                break;
            }
            case ATTRIBUTES_ONLY: {
                break;
            }
            case GROUP_ELEMENT: {
                currentState = this.getRuleState().popGroupRule();
                ruleType = currentState.ruleType;
                break;
            }
            case EXCLUDED_ELEMENT: {
                currentState = this.getRuleState().popExcludedIncludedRule();
                ruleType = currentState.ruleType;
                break;
            }
            case INCLUDED_ELEMENT: {
                currentState = this.getRuleState().popExcludedIncludedRule();
                ruleType = currentState.ruleType;
                break;
            }
            case TEXT_UNIT_ELEMENT: {
                try {
                    currentState = this.getRuleState().popTextUnitRule();
                    ruleType = currentState.ruleType;
                }
                catch (EmptyStackException e) {}
                break;
            }
        }
        if (currentState != null && !currentState.ruleName.equalsIgnoreCase(endTag.getName())) {
            ruleType = this.getConfig().getElementRuleType(endTag.getName());
            String character = Integer.toString(endTag.getBegin());
            LOGGER.log(Level.FINE, "End tag " + endTag.getName() + " and start tag " + currentState.ruleName + " do not match at character number " + character);
        }
        return ruleType;
    }

    @Override
    protected PropertyTextUnitPlaceholder createPropertyTextUnitPlaceholder(PropertyTextUnitPlaceholder.PlaceholderAccessType type, String name, String value, Tag tag, Attribute attribute) {
        String normalizeAttributeName = this.normalizeAttributeName(name, value, tag);
        if (this.isMetaCharset(name, value, tag) && value.toLowerCase().indexOf("charset=") != -1) {
            int mainStartPos = attribute.getBegin() - tag.getBegin();
            int mainEndPos = attribute.getEnd() - tag.getBegin();
            int charsetValueOffset = value.toLowerCase().lastIndexOf("charset=") + "charset=".length();
            int valueStartPos = attribute.getValueSegment().getBegin() + charsetValueOffset - tag.getBegin();
            int valueEndPos = attribute.getValueSegment().getEnd() - tag.getBegin();
            String v = tag.toString().substring(valueStartPos, valueEndPos);
            return new PropertyTextUnitPlaceholder(type, normalizeAttributeName, v, mainStartPos, mainEndPos, valueStartPos, valueEndPos);
        }
        return super.createPropertyTextUnitPlaceholder(type, name, this.getEventBuilder().normalizeHtmlText(value, true, this.isPreserveWhitespace()), tag, attribute);
    }

    @Override
    protected String normalizeAttributeName(String attrName, String attrValue, Tag tag) {
        StartTag st;
        String normalizedName = attrName;
        if (this.isMetaCharset(attrName, attrValue, tag)) {
            normalizedName = "encoding";
            return normalizedName;
        }
        if (tag.getName().equalsIgnoreCase("meta") && attrName.equalsIgnoreCase("content") && (st = (StartTag)tag).getAttributeValue("http-equiv") != null && st.getAttributeValue("http-equiv").equalsIgnoreCase("Content-Language")) {
            normalizedName = "language";
            return normalizedName;
        }
        if (attrName.equalsIgnoreCase("lang") || attrName.equalsIgnoreCase("xml:lang")) {
            normalizedName = "language";
        }
        return normalizedName;
    }

    private boolean isMetaCharset(String attrName, String attrValue, Tag tag) {
        StartTag st;
        return tag.getName().equalsIgnoreCase("meta") && attrName.equalsIgnoreCase("content") && (st = (StartTag)tag).getAttributeValue("http-equiv") != null && st.getAttributeValue("content") != null && st.getAttributeValue("http-equiv").equalsIgnoreCase("Content-Type") && st.getAttributeValue("content").toLowerCase().contains("charset=");
    }

    @Override
    protected TaggedFilterConfiguration getConfig() {
        return this.parameters.getTaggedConfig();
    }

    @Override
    public void setParameters(IParameters params) {
        this.parameters = (Parameters)params;
    }

    @Override
    public IParameters getParameters() {
        return this.parameters;
    }

    public void setParametersFromURL(URL config) {
        this.parameters = new Parameters(config);
    }

    public void setParametersFromFile(File config) {
        this.parameters = new Parameters(config);
    }

    public void setParametersFromString(String config) {
        this.parameters = new Parameters(config);
    }

    @Override
    public AbstractMarkupEventBuilder getEventBuilder() {
        return (AbstractMarkupEventBuilder)super.getEventBuilder();
    }
}

