import java.util.*; import no.divvun.Analyzer.Objects.*; public class PrintEntries { public PrintEntries() {} public static String printPlx(Entry entry) { StringBuilder buffer = new StringBuilder(); Paradigm paradigm = entry.getParadigm(); if(paradigm != null) printParadigmPlx(buffer, entry); else { System.err.println("No para: " + entry.getWord()); } return buffer.toString(); } public static String printHunspell(Entry entry) { StringBuilder buffer = new StringBuilder(); Vector inflections = new Vector(); String infl = entry.getInfl(); /* if (!inflections.contains(infl)) printHunspellAffix(buffer, entry); else printHunspellDict(buffer, entry); */ return buffer.toString(); } public static String printHunspellSuffix(Entry entry, Map> sufmap) { StringBuilder buffer = new StringBuilder(); String infl = entry.getInfl(); for (Map.Entry> e : sufmap.entrySet()) { String digraph = e.getKey(); List suffixes = e.getValue(); if (suffixes.size() > 0) { if (digraph == "default") digraph = ""; String intro = "SFX " + infl + digraph; buffer.append(intro + " Y " + suffixes.size() + "\n"); for (Iterator itr=suffixes.iterator(); itr.hasNext(); ) buffer.append(intro + " 0 " + itr.next() + "\n"); buffer.append("\n"); } } return buffer.toString(); } public static String printHunspellDict(String word, Map> inflmap) { StringBuilder buffer = new StringBuilder(); for (Map.Entry> e : inflmap.entrySet()) { String infl = e.getKey(); List wordforms = e.getValue(); // Generation gen; // gen = srv.generateWordform("geassi+N+Sg+Gen"); System.out.print(wordforms.get(0) + wordforms.get(1)); // if (wordforms.size() > 0) { // if (digraph == "default") // digraph = ""; // String intro = "SFX " + infl + digraph; // buffer.append(intro + " Y " + suffixes.size() + "\n"); // for (Iterator itr=suffixes.iterator(); itr.hasNext(); ) // buffer.append(intro + " 0 " + itr.next() + "\n"); // buffer.append("\n"); // } } return buffer.toString(); } public static void printAspell() { } private static void printParadigmPlx(StringBuilder buffer, Entry entry) { Paradigm paradigm = entry.getParadigm(); String sPOS = entry.getsPOS(); int iPOS = entry.getPOS(); String infl = entry.getInfl(); String word; String analysis; for(int i = 0; i < paradigm.size(); i++) { Reading reading = paradigm.get(i).getReading(); analysis = reading.getValue(); word = reading.getNext().getValue(); if (word == null) System.err.println("No word for " + analysis + " " + entry.getWord()); else { if (word.contains(" ")) { StringTokenizer st = new StringTokenizer(word); buffer.append(st.nextToken() + "\t"); printPlxWordClass(buffer, iPOS, analysis, entry.getCmp()); buffer.append(st.nextToken() + "\t"); printPlxWordClass(buffer, iPOS, analysis, entry.getCmp()); } else { buffer.append(word + "\t"); printPlxWordClass(buffer, iPOS, analysis, entry.getCmp()); // if (word.matches("[0-9]")) // { // buffer.append(word + "-" + "\t"); // printPlxWordClass(buffer, iPOS, analysis, entry.getCmp()); // } // if (iPOS == 47) { // buffer.append("-" + word + "\t"); // printPlxWordClass(buffer, iPOS, analysis, entry.getCmp()); // } } } } } private static void printPlxWordClass(StringBuilder buffer, int POS, String analysis, String cmp) { if (analysis.contains("+Der")) { String dertmp = analysis.substring(analysis.lastIndexOf("Der/")); char derpos = dertmp.charAt(dertmp.indexOf("+")+1); switch (derpos) { case 'V': buffer.append("VI"); break; case 'A': // buffer.append("JIR"); printPlxAdjTag(buffer, analysis); break; case 'N': printPlxNounTag(buffer, analysis, cmp); break; } } else { switch(POS) { case 13: //LexcOptions.ADVERB if (analysis.contains("+Cmpnd") || analysis.contains("+LCmpnd+")) buffer.append("NAPIBX,NtPABI"); else buffer.append("WI"); break; case 11: //LexcOptions.ADJECTIVE // buffer.append("JIR"); printPlxAdjTag(buffer, analysis); break; case 14: //LexcOptions.PROPERNOUN buffer.append("NeP"); // printPlxNounTag(buffer, entry, analysis, infl); break; case 10: //LexcOptions.NOUN // buffer.append(sPOS); printPlxNounTag(buffer, analysis, cmp); break; case 12: //LexcOptions.VERB buffer.append("VI"); break; case 15: //LexcOptions.PRONUON case 18: //LexcOptions.CONJUNCTION case 19: //LexcOptions.INTERJECTION case 46: //LexcOptions.ADPOSITION case 45: //LexcOptions.SUBJUNCTION case 48: //LexcOptions.ABBR if (analysis.contains("+Cmpnd") || analysis.contains("+LCmpnd+")) buffer.append("NAPIB,NtPABI"); else buffer.append("WI"); break; case 47: //LexcOptions.ACRO if (analysis.contains("+Cmpnd") || analysis.contains("+LCmpnd+")) buffer.append("NAPIB,NtPABI"); else buffer.append("WI,Nt+WI,W+WI,NeP+WI"); break; case 50: //LexcOptions.NUM // buffer.append("JAI"); if (analysis.contains("+Cmpnd") || analysis.contains("+RCmpnd+")) buffer.append("NIX,NePABO,NePEX"); else if (!buffer.substring(0, 1).matches("[0-9]")) { buffer.append("JAIE"); printPlxNumTag(buffer, analysis); } else buffer.append("NePIE"); break; case 16: //LexcOptions.NUMERAL buffer.append("JIOE"); // printPlxNumTag(buffer, analysis); break; case 17: //LexcOptions.PARTICLE buffer.append("WI"); break; case 49: // LexcOptions.MIDDLE_NOUN buffer.append("NA"); break; default: break; } } /* DEBUG buffer.append(" #" + analysis); */ buffer.append("\n"); } private static void printPlxNumTag(StringBuilder buffer, String analysis) { /** * Compound tags */ if (analysis.endsWith("Sg+Nom")) buffer.append("BO"); if (analysis.endsWith("Sg+Gen")) buffer.append("BO"); if (analysis.endsWith("Sg+Attr")) buffer.append("BO"); } private static void printPlxAdjTag(StringBuilder buffer, String analysis) { /** * Compound tags */ /* These are the defaults: * {{+SgNomCmp}} * {{+SgGenCmp}} (using the PLX class Ja) * {{+PlGenCmp}} (using the PLX class Jp) */ if (analysis.equals("A+Attr")) buffer.append("JIALR"); else if (analysis.endsWith("+Sg+Nom")) buffer.append("JIALR"); else if (analysis.endsWith("+Sg+Gen")) buffer.append("JaIALR"); else if (analysis.endsWith("+Pl+Gen")) buffer.append("JpIALR"); else if (analysis.endsWith("+SgCmp") || analysis.endsWith("+SgNomCmp")) buffer.append("JAL"); else if (analysis.endsWith("+SgGenCmp")) buffer.append("JaAL"); else if (analysis.endsWith("+PlGenCmp")) buffer.append("JpAL"); else buffer.append("JIR"); } private static void printPlxNounTag(StringBuilder buffer, String analysis, String cmp) { /** * Positional tags */ if (cmp.contains("+First")) { if (analysis.endsWith("+Sg+Gen")) buffer.append("GaIAL"); else if (analysis.endsWith("+Pl+Gen")) buffer.append("GpIAL"); else buffer.append("NIAL"); } else if (cmp.contains("+Last")) { buffer.append("NR"); return; } else if (cmp.contains("+CmpOnly")) { if (analysis.endsWith("+Sg+Gen")) buffer.append("GaAL"); else if (analysis.endsWith("+Pl+Gen")) buffer.append("GpAL"); else buffer.append("NALR"); } else if (cmp.contains("+None")) { buffer.append("N"); return; } else { /** * Compound tags */ /* These are the defaults: * {{+SgNomCmp}} * {{+SgGenCmp}} (using the PLX class Ga) * {{+PlGenCmp}} (using the PLX class Gp) */ if (analysis.endsWith("+Sg+Nom")) buffer.append("NIALR"); else if (analysis.endsWith("+Sg+Gen")) buffer.append("GaIALR"); else if (analysis.endsWith("+Pl+Gen")) buffer.append("GpIALR"); else if (analysis.endsWith("+SgCmp") || analysis.endsWith("+SgNomCmp")) buffer.append("NAL"); else if (analysis.endsWith("+SgGenCmp")) buffer.append("GaAL"); else if (analysis.endsWith("+PlGenCmp")) buffer.append("GpAL"); else buffer.append("NIR"); } } /** * @see http://www.divvun.no/doc/lang/common/compoundtags.html * @param buffer * @param entry * @param analysis * @param infl */ private static void printPlxNounTag2(StringBuilder buffer, Entry entry, String analysis, String infl) { String cmp = entry.getCmp(); /* These are the defaults: * {{+SgNomCmp}} * {{+SgGenCmp}} (using the PLX class Ga) * {{+PlGenCmp}} (using the PLX class Gp) */ if (analysis.endsWith("+Gen")) { /** * +SgGenCmp (default) */ if (analysis.contains("+Sg")) { if (cmp.equals("")) { /* if cmp is empty, then Ga: */ buffer.append("Ga"); } else if /* if cmp contains SgGenCmp, allow compounding with both Ga and regular nouns: */ ( cmp.contains("+SgGenCmp") ) { /* print NILR as PLX code - or perhaps only L at this point, it corresponds to Ga in the restricted Gen cmp case (the default): */ buffer.append("LAGa"); } else /* what do we do if cmp is NOT empty, but does not contain +SgGenCmp? Then it can't compound in SgGen, and it should only receive an NIR tag. */ /* output only NIR, or whatever should be added to the buffer at this point to give NIR in the final output. */ buffer.append(""); } /* Next: process Pl the same way: */ /** * +PlGenCmp (default) */ if (analysis.contains("+Pl")) { /* if cmp is empty, then Gp: */ if (cmp.equals("")) { buffer.append("Gp"); } else if /* use normal compound spec if cmp contains PlGenCmp: */ (cmp.contains("+PlGenCmp") ) { buffer.append("LAGp"); } } } /** * Positional tags */ if (cmp.contains("+First")) buffer.append("I"); else if (cmp.contains("+Last")) { buffer.append("IR"); return; } else if (cmp.contains("+CmpOnly")) ; else if (cmp.contains("+None")) return; else if (entry.getPOS() == 10) buffer.append("IR"); else if (entry.getPOS() == 14) buffer.append("R"); } }