package org.exist.util;

import java.net.URI;
import java.net.URISyntaxException;
import java.text.CollationElementIterator;
import java.text.Collator;
import java.text.RuleBasedCollator;
import java.text.ParseException;
import java.util.Locale;
import java.util.StringTokenizer;

import org.apache.log4j.Logger;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;

/**
 * Utility methods dealing with collations.
 * 
 * @author wolf
 */
public class Collations {

	private final static Logger LOG = Logger.getLogger(Collations.class);
	
	/**
	 * The default unicode codepoint collation URI as defined by the XQuery spec.
	 */
	public final static String CODEPOINT = 
		"http://www.w3.org/2004/07/xpath-functions/collation/codepoint";
	
	/**
	 * Short string to select the default codepoint collation
	 */
	public final static String CODEPOINT_SHORT =
		"codepoint";
	
	/**
	 * The URI used to select collations in eXist.
	 */
	public final static String EXIST_COLLATION_URI =
		"http://exist-db.org/collation";
	
	/**
	 * Get a {@link Comparator} from the specified URI.
	 * 
	 * The original code is from saxon (@linkplain http://saxon.sf.net).
	 * 
	 * @param uri
	 * @return
	 * @throws XPathException
	 */
	public final static Collator getCollationFromURI(XQueryContext context, String uri) throws XPathException {
		if(uri.startsWith(EXIST_COLLATION_URI) || uri.startsWith("?")) {
			URI u = null;
			try {
				u = new URI(uri);
			} catch (URISyntaxException e) {
				return null;
			}
			String query = u.getQuery();
			String strength = null;
			/*
			 * Check if the db broker is configured to be case insensitive.
			 * If yes, we assume "primary" strength unless the user specified
			 * something different.
			 * 
			 * TODO: bad idea: using primary strength as default also ignores
			 * German Umlaute.
			 */
//			if(!context.getBroker().isCaseSensitive())
//				strength = "primary";
			if(query == null) {
				return getCollationFromParams(null, strength, null);
			} else {
				LOG.debug("Loading collation: " + query);
				String lang = null;
				String decomposition = null;
				StringTokenizer queryTokenizer = new StringTokenizer(query, ";&");
				while (queryTokenizer.hasMoreElements()) {
					String param = queryTokenizer.nextToken();
					int eq = param.indexOf('=');
					if (eq > 0) {
						String kw = param.substring(0, eq);
						String val = param.substring(eq + 1);
						if (kw.equals("lang")) {
							lang = val;
						} else if (kw.equals("strength")) {
							strength = val;
						} else if (kw.equals("decomposition")) {
							decomposition = val;
						}
					}
	            }
				return getCollationFromParams(lang, strength, decomposition);
			}
		} else if(uri.startsWith("java:")) {
			// java class specified: this should be a subclass of java.text.RuleBasedCollator
			uri = uri.substring("java:".length());
			try {
				Class collatorClass = Class.forName(uri);
				if(!Collator.class.isAssignableFrom(collatorClass))
					throw new XPathException("The specified collator class is not a subclass of java.text.Collator");
				return (Collator)collatorClass.newInstance();
			} catch (Exception e) {
				throw new XPathException("The specified collator class " + uri + " could not be found", e);
			}
		} else
			// unknown collation
			return null;
	}

	public final static boolean equals(Collator collator, String s1, String s2) {
		if(collator == null)
			return s1.equals(s2);
		else
			return collator.equals(s1, s2);
	}
	
	public final static int compare(Collator collator, String s1, String s2) {
		if(collator == null)
			return s1.compareTo(s2);
		else
			return collator.compare(s1, s2);
	}
	
	public final static boolean startsWith(Collator collator, String s1, String s2) {
		if(collator == null)
			return s1.startsWith(s2);
		else {
			final RuleBasedCollator rbc = (RuleBasedCollator)collator;
			final CollationElementIterator i1 = rbc.getCollationElementIterator(s1);
			final CollationElementIterator i2 = rbc.getCollationElementIterator(s2);
			return collationStartsWith(i1, i2);
		}
	}
	
	public final static boolean endsWith(Collator collator, String s1, String s2) {
		if(collator == null)
			return s1.endsWith(s2);
		else {
			final RuleBasedCollator rbc = (RuleBasedCollator)collator;
			final CollationElementIterator i1 = rbc.getCollationElementIterator(s1);
			final CollationElementIterator i2 = rbc.getCollationElementIterator(s2);
			return collationContains(i1, i2, null, true);
		}
	}
	
	public final static boolean contains(Collator collator, String s1, String s2) {
		if(collator == null)
			return s1.indexOf(s2) > -1;
		else {
			final RuleBasedCollator rbc = (RuleBasedCollator)collator;
			final CollationElementIterator i1 = rbc.getCollationElementIterator(s1);
			final CollationElementIterator i2 = rbc.getCollationElementIterator(s2);
			return collationContains(i1, i2, null, false);
		}
	}
	
	public final static int indexOf(Collator collator, String s1, String s2) {
		if(collator == null)
			return s1.indexOf(s2);
		else {
			final int offsets[] = new int[2]; 
			final RuleBasedCollator rbc = (RuleBasedCollator)collator;
			final CollationElementIterator i1 = rbc.getCollationElementIterator(s1);
			final CollationElementIterator i2 = rbc.getCollationElementIterator(s2);
			final boolean found = collationContains(i1, i2, offsets, false);
			if(found)
				return offsets[0];
			else
				return -1;
		}
	}
	
	private final static boolean collationStartsWith(CollationElementIterator s0,
			CollationElementIterator s1) {
		while (true) {
			int e1 = s1.next();
			if (e1 == -1) {
				return true;
			}
			int e0 = s0.next();
			if (e0 != e1) {
				return false;
			}
		}
	}
	 
	private final static boolean collationContains(CollationElementIterator s0, CollationElementIterator s1, 
			int[] offsets, boolean endsWith ) {
		int e1 = s1.next();
		if (e1 == -1) {
			return true;
		}
		int e0 = -1;
		while (true) {
			// scan the first string to find a matching character
			while (e0 != e1) {
				e0 = s0.next();
				if (e0 == -1) {
					// hit the end, no match
					return false;
				}
			}
			// matched first character, note the position of the possible match
			int start = s0.getOffset();
			if (collationStartsWith(s0, s1)) {
				if (!endsWith) {
					if (offsets != null) {
						offsets[0] = start-1;
						offsets[1] = s0.getOffset();
					}
					return true;
				} else {
					// operation == ENDSWITH
					if (s0.next() == -1) {
						// the match is at the end
						return true;
					}
					// else ignore this match and keep looking
				}
			}
			// reset the position and try again
			s0.setOffset(start);
			
			// workaround for a difference between JDK 1.4.0 and JDK 1.4.1
			if (s0.getOffset() != start) {
				// JDK 1.4.0 takes this path
				s0.next();
			}
			s1.reset();
			e0 = -1;
			e1 = s1.next();
			// loop round to try again
		}
	}

	/**
	 * @param lang
	 * @param strength
	 * @param decomposition
	 * @return
	 */
	private static Collator getCollationFromParams(String lang, String strength, String decomposition) 
	throws XPathException {
		Collator collator = null;
		if(lang == null) {
			collator = Collator.getInstance();
		} else if(lang.equals("sme_SE")) {
		    // Collation rules contained in a String object.
		    // Codes for the representation of names of languages:
		    // http://www.loc.gov/standards/iso639-2/englangn.html
		    // UTF-8 characters from: http://chouette.info/entities/table-utf8.php
		    String Samisk = "< a,A< \u00E1,\u00C1< b,B< c,C"+
			            "< \u010d,\u010c< d,D< \u0111,\u0110< e,E"+
			            "< f,F< g,G< h,H< i,I< j,J< k,K< l,L< m,M"+
			            "< n,N< \u014b,\u014a< o,O< p,P< r,R< s,S"+
			            "< \u0161,\u0160< t,T< \u0167,\u0166< u,U"+
			            "< v,V< z,Z< \u017e,\u017d" ;
		    try {
			collator= new RuleBasedCollator(Samisk);
		    } catch (ParseException pe) {
			return null;
		    }
		} else {
			Locale locale = getLocale(lang);
			LOG.debug("Using locale: " + locale.toString());
			collator = Collator.getInstance(locale);
		}
		
		if(strength != null) {
			if("primary".equals(strength))
				collator.setStrength(Collator.PRIMARY);
			else if("secondary".equals(strength))
				collator.setStrength(Collator.SECONDARY);
			else if("tertiary".equals(strength))
				collator.setStrength(Collator.TERTIARY);
			else if(strength.length() == 0 || "identical".equals(strength))
				// the default setting
				collator.setStrength(Collator.IDENTICAL);
			else
				throw new XPathException("Collation strength should be either 'primary', 'secondary', 'tertiary' or 'identical");
		}
		
		if(decomposition != null) {
			if("none".equals(decomposition))
				collator.setDecomposition(Collator.NO_DECOMPOSITION);
			else if("full".equals(decomposition))
				collator.setDecomposition(Collator.FULL_DECOMPOSITION);
			else if(decomposition.length() == 0 || "standard".equals(decomposition))
				// the default setting
				collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
			else
				throw new XPathException("Collation decomposition should be either 'none', 'full' or 'standard");
		}
		
		return collator;
	}

	/**
	 * @param lang
	 * @return
	 */
	private static Locale getLocale(String lang) {
		int dash = lang.indexOf('-');
		if(dash < 0)
			return new Locale(lang);
		else
			return new Locale(lang.substring(0, dash), lang.substring(dash + 1));
	}
	
	
}
