/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.steps.whitespacecorrection;

import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextPart;

public class WhitespaceCorrector {
    private static final char PARTSEP = '\ue091';
    public static final Set<Whitespace> VERTICAL_WHITESPACE = Collections.unmodifiableSet(EnumSet.of(Whitespace.LINE_FEED, new Whitespace[]{Whitespace.LINE_TABULATION, Whitespace.FORM_FEED, Whitespace.CARRIAGE_RETURN, Whitespace.NEXT_LINE, Whitespace.LINE_SEPARATOR, Whitespace.PARAGRAPH_SEPARATOR}));
    public static final Set<Whitespace> NONBREAKING_SPACES = Collections.unmodifiableSet(EnumSet.of(Whitespace.NO_BREAK_SPACE, Whitespace.ZERO_WIDTH_NON_BREAKING_SPACE, Whitespace.NAORROW_NO_BREAK_SPACE));
    public static final Set<Whitespace> SPACE = Collections.unmodifiableSet(EnumSet.of(Whitespace.SPACE));
    public static final Set<Whitespace> ALL_WHITESPACE = Collections.unmodifiableSet(EnumSet.allOf(Whitespace.class));
    public static final Set<Whitespace> OTHER = Collections.unmodifiableSet(EnumSet.complementOf(EnumSet.of(Whitespace.LINE_FEED, new Whitespace[]{Whitespace.LINE_TABULATION, Whitespace.FORM_FEED, Whitespace.CARRIAGE_RETURN, Whitespace.NEXT_LINE, Whitespace.LINE_SEPARATOR, Whitespace.PARAGRAPH_SEPARATOR, Whitespace.NO_BREAK_SPACE, Whitespace.ZERO_WIDTH_NON_BREAKING_SPACE, Whitespace.NAORROW_NO_BREAK_SPACE, Whitespace.SPACE, Whitespace.CHARACTER_TABULATION})));
    public static final Set<Whitespace> HORIZONTAL_TABS = Collections.unmodifiableSet(EnumSet.of(Whitespace.CHARACTER_TABULATION));
    protected static final char WHITESPACE = ' ';
    protected LocaleId sourceLocale;
    protected LocaleId targetLocale;
    protected Set<Punctuation> punctuation;
    protected Set<Whitespace> whitespace;

    public WhitespaceCorrector(LocaleId sourceLocale, LocaleId targetLocale, Set<Punctuation> punctuation, Set<Whitespace> whitespace) {
        this.sourceLocale = sourceLocale;
        this.targetLocale = targetLocale;
        this.punctuation = punctuation;
        this.whitespace = whitespace;
    }

    static boolean isSpaceDelimitedLanguage(LocaleId localeId) {
        return !LocaleId.JAPANESE.sameLanguageAs(localeId) && !LocaleId.CHINA_CHINESE.sameLanguageAs(localeId);
    }

    public ITextUnit correctWhitespace(ITextUnit tu) {
        if (tu.getTarget(this.targetLocale) != null) {
            if (WhitespaceCorrector.isSpaceDelimitedLanguage(this.sourceLocale) && !WhitespaceCorrector.isSpaceDelimitedLanguage(this.targetLocale)) {
                this.removeTrailingWhitespace(tu);
            } else if (!WhitespaceCorrector.isSpaceDelimitedLanguage(this.sourceLocale) && WhitespaceCorrector.isSpaceDelimitedLanguage(this.targetLocale)) {
                this.addTrailingWhitespace(tu);
            }
        }
        return tu;
    }

    private String segmentsToString(TextContainer tc) {
        StringBuilder tmp = new StringBuilder();
        for (TextPart part : tc) {
            tmp.append(part.getContent().getCodedText());
            tmp.append('\ue091');
        }
        return tmp.toString();
    }

    private List<String> stringToSegments(String segments) {
        ArrayList<String> parts = new ArrayList<String>();
        Collections.addAll(parts, segments.split(String.valueOf('\ue091'), -1));
        return parts;
    }

    protected void removeTrailingWhitespace(ITextUnit textUnit) {
        TextContainer targetTextContainer = textUnit.getTarget(this.targetLocale);
        String newSegments = this.findAndRemoveWhitespacesAfterPunctuation(this.segmentsToString(targetTextContainer));
        List<String> newParts = this.stringToSegments(newSegments);
        for (TextPart part : targetTextContainer.getParts()) {
            part.getContent().setCodedText(newParts.remove(0));
        }
    }

    protected void addTrailingWhitespace(ITextUnit textUnit) {
        TextContainer sourceTextContainer = textUnit.getSource();
        TextContainer targetTextContainer = textUnit.getTarget(this.targetLocale);
        Iterator<TextPart> sourceTextPartsIterator = sourceTextContainer.getParts().iterator();
        Iterator<TextPart> targetTextPartsIterator = targetTextContainer.getParts().iterator();
        while (sourceTextPartsIterator.hasNext() && targetTextPartsIterator.hasNext()) {
            TextPart sourceTextPart = sourceTextPartsIterator.next();
            TextPart targetTextPart = targetTextPartsIterator.next();
            String sourceText = sourceTextPart.getContent().getText();
            if (sourceText.isEmpty() || !this.isNonSpaceDelimitedPunctuation(this.lastChar(sourceText)) || this.isWhitespace(this.lastChar(targetTextPart.getContent().getText()))) continue;
            targetTextPart.getContent().append(' ');
        }
    }

    protected boolean isWhitespace(char c) {
        for (Whitespace ws : this.whitespace) {
            if (c != ws.whitespace) continue;
            return true;
        }
        return false;
    }

    private char lastChar(String s) {
        return s.charAt(s.length() - 1);
    }

    protected boolean isSpaceDelimitedPunctuation(char c) {
        for (Punctuation p : this.punctuation) {
            if (c != p.whitespaceAcceptingForm) continue;
            return true;
        }
        return false;
    }

    protected boolean isNonSpaceDelimitedPunctuation(char c) {
        for (Punctuation p : this.punctuation) {
            for (char form : p.whitespaceNonAcceptingForm) {
                if (form != c) continue;
                return true;
            }
        }
        return false;
    }

    private String findAndRemoveWhitespacesAfterPunctuation(String segments) {
        StringBuilder newSegments = new StringBuilder(segments.length());
        for (int i = 0; i < segments.length(); ++i) {
            newSegments.append(segments.charAt(i));
            if (!this.isNonSpaceDelimitedPunctuation(segments.charAt(i))) continue;
            ++i;
            while (i < segments.length() && (this.isWhitespace(segments.charAt(i)) || segments.charAt(i) == '\ue091')) {
                if (segments.charAt(i) == '\ue091') {
                    newSegments.append(segments.charAt(i));
                }
                ++i;
            }
            --i;
        }
        return newSegments.toString();
    }

    public static enum Whitespace {
        LINE_FEED('\n'),
        LINE_TABULATION('\u000b'),
        FORM_FEED('\f'),
        CARRIAGE_RETURN('\r'),
        NEXT_LINE('\u0085'),
        LINE_SEPARATOR('\u2028'),
        PARAGRAPH_SEPARATOR('\u2029'),
        CHARACTER_TABULATION('\t'),
        SPACE(' '),
        NO_BREAK_SPACE('\u00a0'),
        EN_QUAD('\u2000'),
        EM_QUAD('\u2001'),
        EN_SPACE('\u2002'),
        EM_SPACE('\u2003'),
        THREE_PER_EM_SPACE('\u2004'),
        FOUR_PER_EM_SPACER('\u2005'),
        SIX_PER_EM_SPACE('\u2006'),
        FIGURE_SPACE('\u2007'),
        PUNCUATION_SPACE('\u2008'),
        THIS_SPACE('\u2009'),
        HAIR_SPACE('\u200a'),
        NAORROW_NO_BREAK_SPACE('\u202f'),
        MEDIUM_MATHEMATICAL_SPACE('\u205f'),
        IDEOGRAPHIC_SPACE('\u3000'),
        ZERO_WIDTH_SPACE('\u200b'),
        ZERO_WIDTH_NON_BREAKING_SPACE('\ufeff');

        private final char whitespace;

        private Whitespace(char whitespace) {
            this.whitespace = whitespace;
        }

        public char getWhitespace() {
            return this.whitespace;
        }
    }

    public static enum Punctuation {
        FULL_STOP('.', '\u3002', '\uff0e'),
        COMMA(',', '\u3001', '\uff0c'),
        EXCLAMATION_MARK('!', '\uff01'),
        QUESTION_MARK('?', '\uff1f');

        private final char[] whitespaceNonAcceptingForm;
        private final char whitespaceAcceptingForm;

        private Punctuation(char whitespaceAcceptingForm, char ... whitespaceNonAcceptingForms) {
            this.whitespaceAcceptingForm = whitespaceAcceptingForm;
            this.whitespaceNonAcceptingForm = (char[])whitespaceNonAcceptingForms.clone();
        }

        public char getWhitespaceAcceptingForm() {
            return this.whitespaceAcceptingForm;
        }

        public char[] getWhitespaceNonAcceptingForm() {
            return (char[])this.whitespaceNonAcceptingForm.clone();
        }
    }
}

