/*===========================================================================
 Copyright (C) 2010-2013 by the Okapi Framework contributors
 -----------------------------------------------------------------------------
 This library is free software; you can redistribute it and/or modify it
 under the terms of the GNU Lesser General Public License as published by
 the Free Software Foundation; either version 2.1 of the License, or (at
 your option) any later version.

 This library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
 General Public License for more details.

 You should have received a copy of the GNU Lesser General Public License
 along with this library; if not, write to the Free Software Foundation,
 Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

 See also the full LGPL text here: http://www.gnu.org/copyleft/lesser.html
 ===========================================================================*/
package net.sf.okapi.lib.verification;

import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.annotation.IssueType;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.Segment;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextUnitUtil;

public class BlacklistChecker {

	private ArrayList<Issue> issues;
	private LocaleId srcLoc;
	private LocaleId trgLoc;
	private BlacklistTB ta;

	public void initialize(BlacklistTB termAccess, LocaleId srcLoc, LocaleId trgLoc) {
		this.issues = new ArrayList<Issue>();
		this.srcLoc = srcLoc;
		this.trgLoc = trgLoc;
		this.ta = termAccess;
	}

	public int verify(URI docId,
			String subDocId,
			ITextUnit tu,
			Segment segment) {

		return verifyTerms(docId, subDocId, tu, segment);
	}

	private int verifyTerms(URI docId,
			String subDocId,
			ITextUnit tu,
			Segment segment) {
		issues.clear();

		// iterate over text in segment
		List<BlackTerm> termList = ta.getBlacklistStrings();
		List<Integer> mrkPositions = new ArrayList<Integer>();
		String search = TextUnitUtil.getText(segment.getContent(), mrkPositions);
		search = search.toLowerCase();
		mrkPositions = updateMarkerPositions(mrkPositions);

		for (BlackTerm bterm : termList) {
			int idx = 0;
			while ((idx = search.indexOf(bterm.searchTerm, idx)) != -1) {
				// check boundaries
				if ((bterm.searchTerm.length()) != search.length()) {
					if ((idx > 0) && (idx < search.length() - bterm.searchTerm.length())) {
						if ((Character.isLetter(search.charAt(idx - 1))) || (Character.isLetter(search.charAt(idx + bterm.searchTerm.length())))) {
							// BlackTerm is a substring
							break;
						}
					} else if (idx == 0) { // check end
						if (Character.isLetter(search.charAt(idx + bterm.searchTerm.length()))) {
							// BlackTerm is a substring
							break;
						}
					} else { // check start
						if (Character.isLetter(search.charAt(idx - 1))) {
							// BlackTerm is a substring
							break;
						}
					}
				}
				
				// Calculate offset
				int rangeStart;
				int rangeEnd;
				int offset;
				if (!mrkPositions.isEmpty()) {
					offset = getOffset(idx, mrkPositions, true);
					rangeStart = idx + offset;
					rangeEnd = idx + bterm.searchTerm.length();
					offset = getOffset(rangeEnd, mrkPositions, false);
					rangeEnd += offset;
				} else {
					rangeStart = idx;
					rangeEnd = rangeStart + bterm.searchTerm.length();
				}
				
				// Create issue
				String message;
				if (!bterm.suggestion.isEmpty()) {
					message = String.format("The term \"%s\" is a blacklisted term. Consider using \"%s\".", bterm.text, bterm.suggestion);
				} else {
					message = String.format("The term \"%s\" is a blacklisted term. Consider revising.", bterm.text, bterm.suggestion);
				}
				Issue issue = new Issue(docId, subDocId, IssueType.TERMINOLOGY, tu.getId(), segment.getId(), message, -1, 0, TextFragment.fromFragmentToString(segment.text, rangeStart), TextFragment.fromFragmentToString(segment.text, rangeEnd), Issue.DISPSEVERITY_LOW, tu.getName());
				issues.add(issue);

				// iterate
				idx++;
			}
		}
		return issues.size();
	}

	public List<Issue> getIssues() {
		return issues;
	}

	/***
	 * Recalculates the marker positions based on the string without in-line code.
	 * Basing the marker positions on the string with the in-line code removed
	 * makes the calculation of marker offset from the start of the string easier.
	 * 
	 * @param mrkPositions is the list indices for all the in-line code
	 * @return an updated list of all the indices
	 */
	private List<Integer> updateMarkerPositions(List<Integer> mrkPositions) {
		for (int i = 0; i < mrkPositions.size(); i++) {
			int value = mrkPositions.get(i);
			value = value - (i * 2);
			mrkPositions.set(i, value);
		}
		return mrkPositions;
	}
	
	/**
	 * Calculates the offset of the term hit.
	 * As the search is performed on a string where the in-line code has been
	 * removed, the rangeStart and rangeEnd have to be recalculated to take into
	 * account any in-line code that may be present in the original string.
	 * 
	 * @param index is the position of the BlackTerm in the search string
	 * @param mrkPositions is the list of in-line code indices
	 * @param isStart determines if the offset is for the rangeStart or the rangeEnd
	 * @return the offset from the beginning of the string
	 */
	private int getOffset(int index, List<Integer> mrkPositions, boolean isStart) {		
		int offset = 0;
		for (int i = 0; i < mrkPositions.size(); i++) {
			if (mrkPositions.get(i) < index) {
				// count instances
				offset++;
				continue;
			}
			if (mrkPositions.get(i) == index) {
				// if start count instances
				if (isStart) {
					// count instances
					offset++;
					continue;
				}
			}
			if (mrkPositions.get(i) > index) {
				return (offset*2);
			}
		}
		return (offset*2);
	}
}
