import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import no.divvun.Analyzer.Objects.Entry;
import no.divvun.Analyzer.Objects.LexcOptions;


public class Baseforms {

	/**
	 * @param args
	 */
	public static void main(String[] args) throws IOException{
		Baseforms bf = new Baseforms(args);
	}
	
	public Baseforms(String[] args) throws IOException {
		// TODO Auto-generated method stub
		ReadLexc readLexc;
		String filein = args[0];
		int POS = 0;
		int LANG = 0;
		
		if (filein.contains("sme")) LANG = LexcOptions.SME;
		else if (filein.contains("smj")) LANG = LexcOptions.SMJ;
		else if (filein.contains("sma")) LANG = LexcOptions.SMA;
		else {
			System.out.println("Wrong type of file: Only sm[aej] files accepted");
			return;
		}
			
		
		if (filein.contains("conjunction-sm")) POS = LexcOptions.CONJUNCTION; else
		if (filein.contains("subjunction-sm")) POS = LexcOptions.SUBJUNCTION; else
		if (filein.contains("propernoun-sm")) POS = LexcOptions.PROPERNOUN; else
		// pronoun has to be above noun-sme, otherwise it is assigned as NOUN
		if (filein.contains("pronoun-sm")) POS = LexcOptions.PRONOUN; else
		if (filein.contains("pp-sm")) POS = LexcOptions.ADPOSITION; else
		if (filein.contains("noun-sm")) POS = LexcOptions.NOUN; else
		if (filein.contains("verb-sm")) POS = LexcOptions.VERB; else
		if (filein.contains("adj-sm")) POS = LexcOptions.ADJECTIVE; else 
		if (filein.contains("adv-sm")) POS = LexcOptions.ADVERB; else
		if (filein.contains("acro-sm")) POS = LexcOptions.ACRO; else
		if (filein.contains("abbr-sm")) POS = LexcOptions.ABBR; else
		if (filein.contains("numeral-sm")) POS = LexcOptions.NUMERAL; else
		if (filein.contains("num-sm")) POS = LexcOptions.NUM; else
		if (filein.contains("particle-sm")) POS = LexcOptions.PARTICLE; else
		if (filein.contains("interjection-sm")) POS = LexcOptions.INTERJECTION; else
		if (filein.endsWith("/sme-lex.txt")) POS = LexcOptions.MIDDLE_NOUN;
		
		readLexc = new ReadLexc (filein, LANG, POS);
		
		Iterator<Entry> iterator = readLexc.getEntries().iterator();
		Entry entry;
		String word;
		
		while(iterator.hasNext()) {
			entry = iterator.next();
			word = cleanWord(entry.getWord());
			
			if (entry.getPOS() == LexcOptions.ABBR) {
				if (entry.getInfl().contains("-nodot")) {
					System.out.println(word + "\t");
				} else if (entry.getInfl().contains("-dot")) {
					System.out.println(word + ".\t");
				} else {
					System.out.println(word + ".\t");
					System.out.println(word + "\t");
				}
			}
			
			else
				System.out.println(word + "\t");
		}
	}
	
	private String cleanWord(String word) {
		StringTokenizer st = new StringTokenizer(word);
		String clean;
		
		if (word.contains("+"))
			clean = st.nextToken("+");
		else
			clean = st.nextToken(":");
			
			
		clean = clean.replaceAll("#", "");
		clean = clean.replace("^", "");
		clean = clean.replaceAll("0", "");
		clean = clean.replaceAll("%", "");
		
		return clean;
	}

}
