/* * AlignmentModel.java * * ... * ... * @author Oystein Reigem */ package aksis.alignment; import java.awt.Color; import java.util.*; import java.io.*; //import java.util.regex.*; import javax.swing.*; import java.awt.event.MouseEvent; import java.lang.reflect.*; import java.awt.Toolkit; // beep import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.FactoryConfigurationError; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.*; import java.nio.charset.*; import java.text.*; import java.lang.Thread; //import javax.swing.event.ListDataEvent; import java.awt.Rectangle; ////////////////////////////////////////////////////////////////////////////////////////// // apparatus for structure and flow of alignable elements through the alignment process // ////////////////////////////////////////////////////////////////////////////////////////// /* class MyException extends Exception { public MyException() { } public MyException(String gripe) { super(gripe); } } */ class Aligned { /** * lists of aligned elements. * one list for each text. * shown in the gui's 'aligned' JList components. * each element is an AElement object. */ protected DefaultListModel[] elements; /** * the finished alignments. * each alignment is a Link object. */ List alignments = new ArrayList(); // package access. list of Link objects Aligned() { ////System.out.println("Aligned constructor"); elements = new DefaultListModel[Alignment.NUM_FILES]; ////System.out.println(elements.getClass().getName()); for (int t=0; t numberOfFirstAlignmentToDrop) { //System.out.println("drops alignment"); returnValue.alignments.add(returnValue.alignments.size(), alignments.remove(numberOfFirstAlignmentToDrop)); } // update aligned/total ratio in status line gui.model.setMemoryUsage(gui); // 2006-10-03 gui.model.updateAlignedTotalRatio(gui); // return returnValue; } } void pickUp(AlignGui gui, AlignmentsEtc valueGot, boolean scroll) { //System.out.println("pickUp A"); //System.out.println("pickUp()"); //MemTest.print("Tenured Gen", ""); if (valueGot != null) { // alignments //System.out.println("pickUp B"); //MemTest.print("Tenured Gen", ""); //######hvorfor tom løkke? for (int j=0; j 0) { returnValue.alignments.add(pending.remove(0)); //System.out.println("flush E"); //MemTest.print("Tenured Gen", ""); } //System.out.println("nå er pending.size() = " + pending.size()); for (int t=0; t 0) { ((DefaultListModel)(returnValue.elements[t])).addElement((AElement)(elements[t].remove(0))); //System.out.println("flush F"); //MemTest.print("Tenured Gen", ""); } //System.out.println("nå er elements[" + t + "].size() = " + elements[t].size()); } //System.out.println("flush G"); //MemTest.print("Tenured Gen", ""); return returnValue; } //} } } class Unaligned { /** * lists of unaligned elements. * one list for each text. * shown in the gui's 'unaligned' JList components. * each element is an AElement object. */ protected DefaultListModel[] elements; // ########## private + get-metode er bedre Unaligned() { elements = new DefaultListModel[Alignment.NUM_FILES]; for (int t=0; t 0) { int last = ((Integer)(testElementNumbers[t].last())).intValue(); int first = ((Integer)(testElementNumbers[t].first())).intValue(); if ((last - first + 1) != testElementNumbers[t].size()) { // found hole for text t return true; } } } // found no hole for any text return false; } public boolean empty() { return (alignments.size() == 0); } // for debugging purposes public void print() { // loop through the alignments Iterator it = alignments.iterator(); System.out.println("<<>>"); // loop ... elements for (int t=0; t>>"); } } } /** * the program works with elements from xml files, e.g sentences. * each element is a node in a DOM tree. * but the program also needs to know which alignment each element is involved in, if any. * for this purpose the AElement object knows not only the element * but also the element's sequence number and the number of the alignment. */ class AElement { public static final int NUM_COLORS = 10; // foreløpig €€€€€€€€€€€€€€€ /** * the element itself. * a node in a DOM tree for the current text. */ Node element; /** * the sequence number of the element. * the elements of a text are numbered 0, 1, 2, 3, ... */ int elementNumber; /** * the number of the alignment the element is involved in. * alignments have a global numbering 0, 1, 2, 3, ... * #####################################################unused elements under consideration have a special value -1. */ int alignmentNumber; /** * the length in characters of the text content of the element. €€€€€€€€€€€€€€€€€€ burde normalisert whitespace */ int length; AElement(Node o, int en) { element = o; elementNumber = en; alignmentNumber = -1; // €€€ not used yet //length = XmlTools.getText(element).length(); length = element.getTextContent().length(); } public Color getColor() { //System.out.println("getColor. alignmentNumber = " + alignmentNumber); if (alignmentNumber == -1) { //return Color.white; return Color.getHSBColor((float)0.00, (float)0.00, (float)0.97); } else { return Color.getHSBColor((float)((float)alignmentNumber / NUM_COLORS), (float)0.13, (float)1.00); } } /** * makes a value that keeps - but normalizes - the division into lines of the element. * €€€because some files can have odd line endings. * if this value is rendered in a list box as a one-line thing it will not wrap. * if this value is rendered in a list box as a multi-line thing it will wrap at line endings. * €€€ €€€ €€€ suddenly wrap works after all! and this method isn't used! * €€€ fixed */ public String toString() { //System.out.println("AElement sin toString"); // pattern = [\n\r]+ , i.e, matches all kinds of line endings, also multiple endings // 2006-09-19 Pattern pattern = Pattern.compile("[\\n\\r]+"); //Matcher matcher = pattern.matcher(element.toString()); // since 1.5 Node.toString() yields e.g '[s: null]' and not 'Blah blah blah' //Matcher matcher = pattern.matcher(element.getTextContent()); // €€€just the text, e.g, 'Blah blah blah' //System.out.println("kaller getXmlContent. resultat: " + XmlTools.getXmlContent(element)); // 2006-09-19 Matcher matcher = pattern.matcher(XmlTools.getXmlContent(element)); // €€€just the text, e.g, 'Blah blah blah' //Matcher matcher = pattern.matcher(element.getNodeValue()); // €€€leads to Exception in thread "AWT-EventQueue-0" java.lang.NullPointerException //return "element nummer " + elementNumber + ": " + matcher.replaceAll("\n"); // replaces all kinds of line endings with a standard one //return matcher.replaceAll("\n"); // replaces all kinds of line endings with a standard one // 2006-09-19 return matcher.replaceAll(" "); // §§§ return XmlTools.getXmlContent(element); // 2006-09-19 //return "test"; // |||---||| // €€€ merkelig. dersom vi standardiserer til \n, // får vi ordentlig wrap + wrap der det er \n. // dersom vi setter blank, får vi ikke wrap. // €€€ hmm det er noe tull med \n-måten. // når linjer wrapper, ser vi ikke slutten. // det er noe feil i utregningen av hvor mye plass som trengs. // €€€ har ikke noe med scroll bar å gjøre. ser det i aligned også. //return matcher.replaceAll(" ") + "\n"; // €€€ yeah! word wrap works! €€€ øh/ alle elementer blir to linjer høye! //return matcher.replaceAll("\n") + "\n"; } // ###some users might like parent info prepended to the elements // in their newline format output files. // this method makes a suitable version for that purpose public String toNewString(AncestorFilter filter) { //Pattern pattern = Pattern.compile("[\\n\\r]+"); // pattern = [\n\r]+ , i.e, matches all kinds of line endings, also multiple endings //Matcher matcher = pattern.matcher(XmlTools.getXmlContent(element)); // €€€just the text, e.g, 'Blah blah blah' //#### skal dette ut i XmlTools? Node current = element; //short test2 = element.getNodeType(); //###debug String pathText = ""; if (!filter.denyAll()) { String ancestorInfo; NamedNodeMap attrs; String elementName; Attr attribute; boolean done = false; //short test = Node.ELEMENT_NODE; //###debug while (!done) { // next parent? try { current = current.getParentNode(); } catch (DOMException e) { done = true; } // ??? if (current == null) { done = true; } else { // but stop before root element is reached try { Node test = current.getParentNode(); if (test.getNodeName() == "#document") { done = true; } } catch (DOMException e) { done = true; } } if (!done) { if (current.getNodeType() == Node.ELEMENT_NODE) { // elementName = current.getNodeName(); if (filter.allowElement(elementName)) { ancestorInfo = "<" + elementName; attrs = current.getAttributes(); for (int i = 0; i < attrs.getLength(); i++) { attribute = (Attr)attrs.item(i); if (filter.allowAttribute(elementName, attribute.getName())) { ancestorInfo += " " + attribute.getName() + "='" + attribute.getValue() + "'"; } } ancestorInfo += ">"; pathText = ancestorInfo + " " + pathText; } } } } } //return pathText + matcher.replaceAll(" "); // §§§ return pathText + XmlTools.getXmlContent(element); // 2006-09-19 (nå skal elementet inneholde tekst uten (særlig) unødig whitespace) } } /** * each Link object represents an alignment - a finished one or pending one. * ##########################################################in addition a Link object is used for unused elements under consideration. */ class Link { /** * alignments are numbered 0, 1, 2, 3, ... * the numbering is global, so the numbering of pending alignments * continues the numbering of finished alignments. * #################################################unused elements under consideration have a special number -1. €€€UNUSED */ int alignmentNumber; // ########################skulle hatt set-metode. m.fl. /** * the numbers of the elements involved in the alignment. * one set for each text. */ TreeSet[] elementNumbers; Link() { alignmentNumber = -1; // €€€ elementNumbers = new TreeSet[Alignment.NUM_FILES]; for (int t=0; t 0) { return false; } } return true; } int countElements() { int count = 0; for (int t=0; t 0) { str += ";"; } str += "size=" + elementNumbers[t].size(); Iterator e = ((TreeSet)(elementNumbers[t])).iterator(); while (e.hasNext()) { str += ",el="; str += e.next(); } } str += ")"; str += " alignment nummer " + alignmentNumber; return str; } } ///////////////////////////////////////////// /** * separate thread for loading files. * to be more precise it's not for the process of reading a file into a DOM tree * but the processing of the elements we do afterwards. * but anyway it's a process we want to show progress for in gui components, * so we need to have it in a separate thread. */ class LoadFileThread extends Thread { AlignGui gui; NodeList[] nodes; int t; int percentDone = 0; int prevPercentDisplayed = 0; int elementNumber; // do GUI updates void doUpdate(Runnable r) { try { SwingUtilities.invokeAndWait(r); } catch (InvocationTargetException e1) { //System.err.println(e1); ErrorMessage.error(e1.toString()); // 2006-08-10 } catch (InterruptedException e2) { //System.err.println(e2); ErrorMessage.error(e2.toString()); // 2006-08-10 } } // (we need a constructor with some arguments // to get references to the stuff the thread is working with) €€€ public LoadFileThread(AlignGui gui, NodeList[] nodes, int t) { this.gui = gui; this.nodes = nodes; this.t = t; } // do €€€ public void run() { // clear €€€ doUpdate(new Runnable() { public void run() { gui.statusLine.setText(""); gui.statusLine.setProgress(percentDone); } }); int numElements = nodes[t].getLength(); // ### i use Math.log(x)/Math.log(10) instead of Math.log10(x) until i've got java 1.5 installed int step = Math.round((float)(Math.pow(10, Math.sqrt((((Math.log((double)numElements / 100) / Math.log(10))) + 1))))); step = Math.min(step, 100); step = Math.max(step, 10); for (elementNumber = 0; elementNumber < numElements; elementNumber++) { AElement element = new AElement(nodes[t].item(elementNumber), elementNumber); gui.model.unaligned.add(t, element); percentDone = Math.round((float)((float)(elementNumber+1) / numElements * 100.0)); //if ((elementNumber + 1) % 100 == 0) { if (percentDone >= prevPercentDisplayed + step) { doUpdate(new Runnable() { public void run() { gui.statusLine.setText(Integer.toString(elementNumber+1)); gui.statusLine.setProgress(percentDone); } }); prevPercentDisplayed = percentDone; } } // €€€ doUpdate(new Runnable() { public void run() { //gui.statusLine.setText("Finished"); gui.statusLine.setText("Text parsed"); gui.statusLine.setProgress(100); } }); // €€€problem: sometimes at this point the content of the unaligned area doesn't show. // why? // shake it by removing and adding first element. // €€€doesn't always help!? // 2006-09-19: worse with JTextArea than JLabel? gui.model.unaligned.elements[t].add(0 ,gui.model.unaligned.elements[t].remove(0)); } } ///////////////////////////////////////////// /** * information about how the current elements under alignment match * with respect to anchor words, proper names, dice, length, etc. * displayable version. * formatted into a list of lines * ######### to ulike steder som beregner skåre */ //class MatchInfoDisplayable { class MatchInfo { AlignmentModel model; protected DefaultListModel displayableList; //MatchInfoDisplayable(AlignmentModel model) { MatchInfo(AlignmentModel model) { this.model = model; displayableList = new DefaultListModel(); } //public void compute() { // // //...; // //} public void clear() { // §§§§§§§§§§§§§§§§§§§§§§§ //...; } public void purge() { displayableList.clear(); // (keep model) } //public String toString() { public void computeDisplayableList() { int t; int n; //System.out.println("computeDisplayableList()"); ElementInfoToBeCompared elementInfoToBeCompared = new ElementInfoToBeCompared(model); // collect necessary info in an ElementInfoToBeCompared object for (t=0; t high) { high = count; } //if (count < low) { low = count; } } /*if (presentInAllTexts) { if (model.getClusterScoreMethod() == 3) { //str.append(" (" + high + " points)"); retLine += " (" + high + " points)"; } if (model.getClusterScoreMethod() == 2) { //str.append(" (" + low + " points)"); retLine += " (" + low + " points)"; } else { // if (model.getClusterScoreMethod() == 1) //str.append(" (" + 1 + " points)"); // €€€ sløyfe? retLine += " (" + 1 + " points)"; // €€€ sløyfe? } //str.append("\n"); //System.out.println("add'er retLine = " + retLine); ret.add(retLine); //System.out.println("nå er det " + ret.size() + " linjer i ret"); }*/ // 2006-04-05 } /*// add points for this anchor word list entry if (presentInAllTexts) { highSum += high; lowSum += low; oneSum += 1; }*/ // 2006-04-05 } // ... /*if (model.getClusterScoreMethod() == 3) { anchorWordScore = highSum; } if (model.getClusterScoreMethod() == 2) { anchorWordScore = lowSum; } else { // if (model.getClusterScoreMethod() == 1) anchorWordScore = oneSum; }*/ // 2006-04-05 //System.out.println(">>> anchorWordScore = " + anchorWordScore + "\n"); // ... //if (str.length() > 0) { // str.insert(0, INDENT + "Anchor word score: " + anchorWordScore + "\n"); //} else { // str.insert(0, INDENT + "No anchor word matches. Score: 0\n"); //} //int anchorWordScore = anchorWordClusters.getScore(model.getClusterScoreMethod()); // 2006-04-05 //float anchorWordScore = anchorWordClusters.getScore(model.getClusterScoreMethod()); // 2006-04-07 float anchorWordScore = anchorWordClusters.getScore(model.getLargeClusterScorePercentage()); // next line of info... //if (anchorWordScore > 0) { // ### ryddigere med samme syntaks alltid retLine = INDENT + INDENT + "Anchor word score: " + myFormatter.format(anchorWordScore); // 2006-04-05 //} else { // retLine = INDENT + "No anchor word matches. Score: 0"; //} //// ...is header for anchor info. insert at top //ret.add(0, retLine); ret.add(retLine); // 2006-04-05 indentLevel = 3; // 2006-04-05 includeMatchType = true; // i.e, include anchor word entry number. ### + 1 ### ugly ret.addAll(anchorWordClusters.getDetails(indentLevel, includeMatchType)); // getDetails() does its own indentation and endline. ### ikke helt bra? // 2006-04-05 //// ... //score += anchorWordScore; // 2006-04-05 /////////////////// // proper names, // // dice, // // and numbers // /////////////////// //int properNameScore = 0; // 2006-04-05 //int diceScore = 0; // 2006-04-05 // check all the words in one text against all the words in the other. // collect clusters of proper names. // collect clusters of dice-related words. // collect clusters of numbers. // (usually all the words in a cluster will be related to each other, // but not necessarily.) String word1; String word2; String nextWord1; // 2006-04-07 String nextWord2; // 2006-04-07 //String phrase1; // 2006-04-07. words glued together without space between them //String phrase2; // 2006-04-07. words glued together without space between them String showPhrase1; // 2006-04-18. words with space between them String showPhrase2; // 2006-04-18. words with space between them //Clusters properNameClusters = new Clusters(model.getClusterScoreMethod()); Clusters properNameClusters = new Clusters(); // 2006-04-05 //Clusters diceClusters = new Clusters(model.getClusterScoreMethod()); Clusters diceClusters = new Clusters(); // 2006-04-05 Clusters numberClusters = new Clusters(); // 2006-04-06 //System.out.println("Skipper proper, dice, numbers"); for (t=0; t= model.getDiceMinCountingScore()) { if (SimilarityUtils.diceMatch(word1, word2, model.getDiceMinCountingScore())) { // 2006-08-09 // the words are related. // add to cluster list //System.out.println("\n" + word1 + " and " + word2 + " are dice-related. add to cluster list"); //diceClusters.add(t, tt, x, y, word1, word2); matchType = Match.DICE; // 2006-04-05 //weight = 1.0f; // 2006-04-05 weight = model.getDiceMatchWeight(); // 2006-04-07 //diceClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, word1, word2); // 2006-04-05 diceClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, 1, 1, word1, word2); // 2006-04-07 } } // 2006-04-07 // also try dice on 2 words against 1 word... if (nextWord1 != "") { //phrase1 = word1 + " " + nextWord1; //phrase1 = word1 + nextWord1; showPhrase1 = word1 + " " + nextWord1; // 2006-04-18 // first check if the phrases/words are long enough to be considered //if ((phrase1.length()-1 >= model.getDiceMinWordLength()) && (word2.length() >= model.getDiceMinWordLength())) { //if ((phrase1.length() >= model.getDiceMinWordLength()) && (word2.length() >= model.getDiceMinWordLength())) { if ( (word1.length() >= model.getDiceMinWordLength()) && (nextWord1.length() >= model.getDiceMinWordLength()) && (word2.length() >= model.getDiceMinWordLength())) { // 2006-04-18 //if (SimilarityUtils.dice(phrase1, word2) >= model.getDiceMinCountingScore()) { //if (SimilarityUtils.dice(phrase1, word2, "2-1") >= model.getDiceMinCountingScore()) { // 2006-04-18 if (SimilarityUtils.diceMatch(word1, nextWord1, word2, "2-1", model.getDiceMinCountingScore())) { // 2006-08-09 // the phrases/words are related. // add to cluster list //System.out.println("\n" + phrase1 + " and " + word2 + " are dice-related. add to cluster list"); matchType = Match.DICE; // 2006-04-05 weight = model.getDicePhraseMatchWeight(); // 2006-04-07 //diceClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, 2, 1, phrase1, word2); diceClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, 2, 1, showPhrase1, word2); // 2006-04-18 } } } // ...and 1 word against 2 words if (nextWord2 != "") { //phrase2 = word2 + " " + nextWord2; //phrase2 = word2 + nextWord2; showPhrase2 = word2 + " " + nextWord2; // 2006-04-18 // first check if the phrases/words are long enough to be considered //if ((word1.length() >= model.getDiceMinWordLength()) && (phrase2.length()-1 >= model.getDiceMinWordLength())) { //if ((word1.length() >= model.getDiceMinWordLength()) && (phrase2.length() >= model.getDiceMinWordLength())) { if ( (word1.length() >= model.getDiceMinWordLength()) && (word2.length() >= model.getDiceMinWordLength()) && (nextWord2.length() >= model.getDiceMinWordLength())) { // 2006-04-18 //if (SimilarityUtils.dice(word1, phrase2) >= model.getDiceMinCountingScore()) { //if (SimilarityUtils.dice(word1, phrase2, "1-2") >= model.getDiceMinCountingScore()) { // 2006-04-18 if (SimilarityUtils.diceMatch(word1, word2, nextWord2, "1-2", model.getDiceMinCountingScore())) { // 2006-08-09 // the phrases/words are related. // add to cluster list //System.out.println("\n" + word1 + " and " + phrase2 + " are dice-related. add to cluster list"); matchType = Match.DICE; // 2006-04-05 weight = model.getDicePhraseMatchWeight(); // 2006-04-07 //diceClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, 1, 2, word1, phrase2); diceClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, 1, 2, word1, showPhrase2); // 2006-04-18 } } } // end 2006-04-07 // 2006-04-06 // numbers float num1; float num2; try { num1 = Float.parseFloat(word1); num2 = Float.parseFloat(word2); if (num1 == num2) { // same number // add to cluster list matchType = Match.NUMBER; //weight = 1.0f; weight = model.getNumberMatchWeight(); // 2006-04-07 //numberClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, word1, word2); numberClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, 1, 1, word1, word2); // 2006-04-07 } } catch (NumberFormatException ne) { } // end 2006-04-06 } } } } } } //System.out.println("%%% properNameClusters ferdig = " + properNameClusters); //int properNameScore = properNameClusters.getScore(model.getClusterScoreMethod()); //float properNameScore = properNameClusters.getScore(model.getClusterScoreMethod()); float properNameScore = properNameClusters.getScore(model.getLargeClusterScorePercentage()); //int diceScore = diceClusters.getScore(model.getClusterScoreMethod()); //float diceScore = diceClusters.getScore(model.getClusterScoreMethod()); float diceScore = diceClusters.getScore(model.getLargeClusterScorePercentage()); //int numberScore = numberClusters.getScore(model.getClusterScoreMethod()); //float numberScore = numberClusters.getScore(model.getClusterScoreMethod()); float numberScore = numberClusters.getScore(model.getLargeClusterScorePercentage()); // ... //str.append(INDENT + "Proper name score: " + properNameScore + "\n"); retLine = INDENT + INDENT + "Proper name score: " + myFormatter.format(properNameScore); // 2006-04-05 ret.add(retLine); //score += properNameScore; // 2006-04-05 //str.append(properNameClusters.getWords()); // getWords() does its own indentation and endline. ### ikke helt bra? //ret.addAll(properNameClusters.getDetails()); // getDetails() does its own indentation and endline. ### ikke helt bra? indentLevel = 3; // 2006-04-05 includeMatchType = false; ret.addAll(properNameClusters.getDetails(indentLevel, includeMatchType)); // getDetails() does its own indentation and endline. ### ikke helt bra? // 2006-04-05 //str.append(INDENT + "Dice score: " + diceScore + "\n"); retLine = INDENT + INDENT + "Dice score: " + myFormatter.format(diceScore); // 2006-04-05 ret.add(retLine); //score += diceScore; // 2006-04-05 //str.append(diceClusters.getWords()); // getWords() does its own indentation and endline. ### ikke helt bra? //ret.addAll(diceClusters.getDetails()); // getDetails() does its own indentation and endline. ### ikke helt bra? indentLevel = 3; // 2006-04-05 includeMatchType = false; ret.addAll(diceClusters.getDetails(indentLevel, includeMatchType)); // getDetails() does its own indentation and endline. ### ikke helt bra? // 2006-04-05 // 2006-04-06 retLine = INDENT + INDENT + "Number score: " + myFormatter.format(numberScore); ret.add(retLine); indentLevel = 3; includeMatchType = false; ret.addAll(numberClusters.getDetails(indentLevel, includeMatchType)); // end 2006-04-06 // 2006-04-05 //////////////////////////////// // common score for anchor words, proper names, dice and numbers Clusters commonClusters = new Clusters(); commonClusters.add(anchorWordClusters); commonClusters.add(properNameClusters); commonClusters.add(diceClusters); commonClusters.add(numberClusters); // 2006-04-06 //int commonScore = commonClusters.getScore(model.getClusterScoreMethod()); //float commonScore = commonClusters.getScore(model.getClusterScoreMethod()); float commonScore = commonClusters.getScore(model.getLargeClusterScorePercentage()); // go back and insert the common score for the word based methods ret.set(wordMethodsScoreLineNumber, (String)ret.get(wordMethodsScoreLineNumber) + myFormatter.format(commonScore)); score += commonScore; // end 2006-04-05 // debugging or testing String tempo = commonClusters.nonTrivialClusters_ToString(); if (tempo != "") { System.out.println(tempo); } //////////////////////////////// // scoring special characters // //////////////////////////////// //int scoringCharacterScore = 0; // 2006-04-05 // check all the ... ... ... String char1; String char2; //Clusters scoringCharacterClusters = new Clusters(model.getClusterScoreMethod()); Clusters scoringCharacterClusters = new Clusters(); // 2006-04-05 for (t=0; t>> score = " + score + "\n"); retLine = "Lengths " + length[0] + " (" + myFormatter.format(length[0]*model.getLengthRatio()) + ") and " + length[1]; if (score > scoreBefore) { //str.append("Lengths " + length[0] + " and " + length[1] + " match well,\n" + INDENT + "increasing score from " + scoreBefore + " to " + score + "\n"); //retLine = "Lengths " + length[0] + " and " + length[1] + " match well,"; retLine += " match well,"; ret.add(retLine); retLine = INDENT + "increasing score from " + myFormatter.format(scoreBefore) + " to " + myFormatter.format(score); ret.add(retLine); } else if (score < scoreBefore) { //str.append("Lengths " + length[0] + " and " + length[1] + " don't match well,\n" + INDENT + "reducing score from " + scoreBefore + " to " + score + "\n"); //retLine = "Lengths " + length[0] + " and " + length[1] + " don't match well,"; retLine += " don't match well,"; ret.add(retLine); retLine = INDENT + "reducing score from " + myFormatter.format(scoreBefore) + " to " + myFormatter.format(score); ret.add(retLine); } else { //str.append("Lengths " + length[0] + " and " + length[1] + " match so-so,\n" + INDENT + "making no change to the score " + score + "\n"); //retLine = "Lengths " + length[0] + " and " + length[1] + " match so-so,"; retLine += " match so-so,"; ret.add(retLine); retLine = INDENT + "making no change to the score " + myFormatter.format(score); ret.add(retLine); } //////////////////////////////////// // micro adjustment to break ties // 2005-11-03 //////////////////////////////////// // when otherwise scoring equal, // paths with 1-1's are to preferred // over paths with other alignments. // add (subtract) micro punishment if step is not 1-1 boolean is11 = true; for (t=0; t elements NodeList[] nodes; // package access. 2004-11-09: flytter denne fra load...thread til hit i model. liste over alle relevante elementer // list of all elements, e.g, also

elements NodeList[] allNodes; // package access. 2005-09-01. trenger denne fordi: søker etter node med bestemt id. noden kan være på høyere nivå, f.eks

i stedet for . og får ikke til å bruke Document.getElementById() // ########## skulle vært Hashtable? // alignable elements and their ancestors### HashMap relevantElementNames = new HashMap(); HashMap relevantAncestorElementNames = new HashMap(); private DocumentBuilder builder; protected File currentOpenDirectory; protected File currentSaveDirectory; protected String[] inputFilepath = new String[Alignment.NUM_FILES]; protected String[] outputFilepath = new String[Alignment.NUM_FILES]; protected String[] inputFilename = new String[Alignment.NUM_FILES]; protected String anchorFilename = ""; protected String settingsFilename = ""; // 2006-09-21 protected Charset[] charset = new Charset[Alignment.NUM_FILES]; // input files character set. output files character set = input files character set protected Aligned aligned; protected ToAlign toAlign; protected Unaligned unaligned; private String specialCharacters = Alignment.DEFAULT__SPECIAL_CHARACTERS; private String scoringCharacters = Alignment.DEFAULT__SCORING_CHARACTERS; private float lengthRatio = Alignment.DEFAULT__LENGTH_RATIO; private int diceMinWordLength = Alignment.DEFAULT__DICE_MIN_WORD_LENGTH; private float diceMinCountingScore = Alignment.DEFAULT__DICE_MIN_COUNTING_SCORE; //private int clusterScoreMethod = Alignment.DEFAULT__CLUSTER_SCORE_METHOD; private int largeClusterScorePercentage = Alignment.DEFAULT__LARGE_CLUSTER_SCORE_PERCENTAGE; private int maxPathLength = Alignment.DEFAULT__MAX_PATH_LENGTH; // private float anchorWordMatchWeight = Alignment.DEFAULT__ANCHORWORD_MATCH_WEIGHT; private float anchorPhraseMatchWeight = Alignment.DEFAULT__ANCHORPHRASE_MATCH_WEIGHT; private float properNameMatchWeight = Alignment.DEFAULT__PROPERNAME_MATCH_WEIGHT; private float diceMatchWeight = Alignment.DEFAULT__DICE_MATCH_WEIGHT; private float dicePhraseMatchWeight = Alignment.DEFAULT__DICEPHRASE_MATCH_WEIGHT; private float numberMatchWeight = Alignment.DEFAULT__NUMBER_MATCH_WEIGHT; private float scoringCharacterMatchWeight = Alignment.DEFAULT__SCORINGCHARACTER_MATCH_WEIGHT; /* private int outputFileNamingMethod = Alignment.DEFAULT__FILE_NAMING_METHOD; private String fileNamingCorrespExtension = Alignment.DEFAULT__CORRESP_EXTENSION; private String fileNamingNewlineExtension = Alignment.DEFAULT__NEWLINE_EXTENSION; private String fileNamingCorrespSuffix = Alignment.DEFAULT__CORRESP_SUFFIX; private String fileNamingNewlineSuffix = Alignment.DEFAULT__NEWLINE_SUFFIX; */ // filter for newline format ancestor info AncestorFilter ancestorFilter = new AncestorFilter(AncestorFilter.MODE_ALLOW, "", ""); // default = allow none = deny all // 2006-02-23 match info log file //protected String logFilename = Alignment.DEFAULT__LOG_FILENAME; protected String logFilename = ""; protected OutputStreamWriter logFileOut; boolean logging = false; // logging on/off (true/false) protected AnchorWordList anchorWordList; protected Compare compare; //protected AnchorWordMatches anchorWordMatches; // ### computed at suggest(), but not at unalign() //protected MatchInfoDisplayable matchInfoDisplayable; // ### computed at suggest(), but not at unalign() protected MatchInfo matchInfo; // ### computed at suggest(), but not at unalign() public AlignmentModel() { // package access €€€ nei dette er jo public ////System.out.println("går i gang med å lage model"); // ###hvorfor står disse her? skal de ikke opp blant members? setRelevantElementNames(Alignment.DEFAULT__RELEVANT_ELEMENT_NAMES); setRelevantAncestorElementNames(Alignment.DEFAULT__RELEVANT_ANCESTOR_ELEMENT_NAMES); ////System.out.println("skal be om å få laget aligned"); aligned = new Aligned(); ////System.out.println("skal be om å få laget toAlign"); toAlign = new ToAlign(); ////System.out.println("skal be om å få laget unaligned"); unaligned = new Unaligned(); ////System.out.println("har fått laget unaligned"); docs = new Document[Alignment.NUM_FILES]; nodes = new NodeList[Alignment.NUM_FILES]; allNodes = new NodeList[Alignment.NUM_FILES]; // set up the parser here. DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setValidating(false); // #### være et brukervalg??? //factory.setValidating(true); //factory.setNamespaceAware(true); try { builder = factory.newDocumentBuilder(); } catch (ParserConfigurationException pce) { // parser with specified options can't be built pce.printStackTrace(); } compare = new Compare(); //anchorWordList = new AnchorWordList(); anchorWordList = new AnchorWordList(AlignmentModel.this); //anchorWordMatches = new AnchorWordMatches(); //matchInfoDisplayable = new MatchInfoDisplayable(AlignmentModel.this); matchInfo = new MatchInfo(AlignmentModel.this); /* //The plugin that calculates alignment //2004-02-19: When I have other plugins, there must be a mechanism // to choose different plugins. plugin = new ExistingCorrespPlugin(); */ } public void purge(AlignGui gui) { // ###dupl kode. se konstruktor. // men gir det mening å skille dette ut i en metode, // f.eks la konstruktor bruke purge()? // #########ikke dupl likevel... aligned.purge(); toAlign.purge(); unaligned.purge(); for (int t=0; t 0) { text += " - "; } text += Integer.toString(getLowestUnalignedElementNumber(t) + 1) + "/" + nodes[t].getLength(); } gui.statusLine.setText(text); // 2006-10-03. ###disse funker ikke. må jeg yielde på en eller annen måte???? //gui.statusLine.invalidate(); // 2006-08-14 //gui.statusLine.validate(); // 2006-08-14 System.out.println(text); // 2006-10-03 } int getLowestUnalignedElementNumber(int t) { // lowest unaligned or under consideration if (toAlign.elements[t].size() > 0) { return ((AElement)(toAlign.elements[t].get(0))).elementNumber; } else if (unaligned.elements[t].size() > 0) { return ((AElement)(unaligned.elements[t].get(0))).elementNumber; } else { return nodes[t].getLength() - 1; } // ### AlignGui gui, // ### gui.model. } // /** * €€€€€€€€€€€€€€€€€€€€€Loads an xml file. * @return true if loading was successful, false if there was an error. * (most likely a parsing error) */ //void loadFile(AlignGui gui, File f, int t) { // package access //void loadFile(AlignGui gui, File f, int t) throws EmptyElementException { // package access // 2006-09-19 void loadFile(AlignGui gui, File f, int t) throws Exception { // package access // 2006-09-22 //void loadFile(File f, int t) { // package access ////System.out.println("f = " + f); // ... Document result = null; try { // make DOM tree from xml file ////gui.counterDoc.insertString(0, "file -> DOM", null); //gui.counter.setText("file -> DOM"); gui.statusLine.setText("File -> DOM"); result = builder.parse(f); //System.out.println("File " + f.getName() + " loaded as text " + t+1); //System.out.println("File " + f.getName() + " loaded as text " + (t+1)); //€€€2006-02-28. for å kunne lagre utfil med samme encoding som innfil Charset cs = Charset.forName(result.getXmlEncoding()); setCharset(t, cs); //// ### gjør dette kun for å fortelle hvor mange elementer det er? //// ### men det er jo ikke direkte child nodes vi er interessert i. disse kan jo være

f.eks //NodeList childNodes = result.getChildNodes(); //System.out.println("Child node count: " + childNodes.getLength()); //childNodes = null; docs[t] = result; // 2006-09-19 } catch (Exception e) { // €€€€€€€€€€€€€€€ ErrorMessage.error("Exception (1) when loading text " + (t+1) + " " + f.getName() + ":\n" + e.toString()); // 2006-08-10 } // end 2006-09-19 // get a list of alignable elements from the DOM tree try { // 2006-09-19 nodes[t] = getElements(t); //} catch (EmptyElementException e) { // 2006-09-19 } catch (Exception e) { // 2006-09-22 throw e; // ### // 2006-09-19 } // 2006-09-19 try { // 2006-09-19 // get a list of all elements allNodes[t] = docs[t].getElementsByTagName("*"); // €€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€dette er ikke skikkelig // clear ... //System.out.println("*** do clear stuff here? ***"); // fill the unaligned list boxes with a suitable version of the elements //System.out.println("Element count: " + nodes[t].getLength()); // process element list. // update GUI while processing element list. // do processing in separate thread so GUI elements can be updated. Thread load = new LoadFileThread(gui, nodes, t); load.start(); //// init aligned/total ratio in status line // ### funker visst ikke, men skitt i det //updateAlignedTotalRatio(gui); // ### aha. metoden vil jo ikke funke når bare én fil er lest inn. // ### og når fil nr to er lest inn, vil vi vel at det skal stå "Parsed", // og ikke overskrive dette med "0/9999 - 0/9999" - ? // remember name and full pathname of input file gui.model.inputFilepath[t] = f.getCanonicalPath(); gui.model.inputFilename[t] = f.getName(); } catch (Exception e) { // €€€€€€€€€€€€€€€ //System.err.println("Exception when loading " + t + " " + f.getName() + ": "); //System.err.println(e.toString()); //ErrorMessage.error("Exception when loading " + t + " " + f.getName() + ":\n" + e.toString()); // 2006-08-10 ErrorMessage.error("Exception (2) when loading text " + (t+1) + " " + f.getName() + ":\n" + e.toString()); // 2006-08-10 //e.printStackTrace(); //return false; } } //private NodeList getElements(int t) { //private NodeList getElements(int t) throws EmptyElementException { // 2006-09-19 private NodeList getElements(int t) throws Exception { // 2006-09-22 //return docs[t].getElementsByTagName("s"); //return docs[t].getElementsByTagName("p"); funker // ### klønete String[] relevantElementNamesArray = new String[relevantElementNames.size()]; Iterator it = relevantElementNames.keySet().iterator(); int count = 0; while (it.hasNext()) { String name = (String)it.next(); relevantElementNamesArray[count] = name; count++; } try { // 2006-09-19 //return XmlTools.getElementsByTagNames(docs[t], relevantElementNamesArray); return XmlTools.getElementsByTagNames(docs[t], relevantElementNamesArray, getSpecialCharacters()); // 2006-10-03 //} catch (EmptyElementException e) { // 2006-09-19 } catch (Exception e) { // 2006-09-22 throw e; // ### // 2006-09-19 } // 2006-09-19 } /** * establishes corresp attributes in dom for text t */ void setCorrespAttributes(int t) { Iterator it; Iterator eIt; // clean dom of corresp attributes that may have been in the input file for (int i=0; i<((NodeList)(nodes[t])).getLength(); i++) { Element el = (Element)(((NodeList)(nodes[t])).item(i)); el.removeAttribute("corresp"); } // set new corresp attributes in dom. // loop through all finished alignments String newAttribute; it = aligned.alignments.iterator(); while (it.hasNext()) { // next alignment Link link = (Link)(it.next()); // get the corresp attribute values from all the other texts. // loop through all the other texts newAttribute = ""; for (int tt=0; tt refer to a

. // challenge: find the correct

(or similar) // check with previous siblings - // e.g, previous 's in the same

- // what they refer to in the other text // which element to start with? //System.out.println("which element to start with?"); // there might be more than one element from this text in the alignment. // find the first one int smallestElementNumber = Integer.MAX_VALUE; eIt = link.elementNumbers[t].iterator(); while (eIt.hasNext()) { int elementNumber = ((Integer)(eIt.next())).intValue(); if (elementNumber < smallestElementNumber) { smallestElementNumber = elementNumber; } } Node el = nodes[t].item(smallestElementNumber); //System.out.println("el. name = " + el.getNodeName() + ". type = " + el.getNodeType() + ". id = " + ((Element)el).getAttribute("id")); //System.out.println("look for previous"); Node prevEl = XmlTools.getPreviousRelevantSiblingElement(el, relevantElementNames); String otherId = ""; while (prevEl != null) { //System.out.println("prevEl. name = " + prevEl.getNodeName() + ". type = " + prevEl.getNodeType()); if (((Element)prevEl).getAttribute("corresp") != "") { // found a sibling which refers to #####this other text. // get its last corresp attribute value (if more than one) String[] values = ((Element)prevEl).getAttribute("corresp").split(" "); otherId = values[values.length-1]; //System.out.println("t = " + t + ". tt = " + tt + ". otherId = " + otherId); break; } else { prevEl = XmlTools.getPreviousRelevantSiblingElement(prevEl, relevantElementNames); } } //System.out.println("otherId = " + otherId); if (otherId == "") { // no previous sibling refers to the other text. //System.out.println("no previous sibling refers to the other text"); // must try to consult elements in the previous parent element //System.out.println("must try to consult elements in the previous parent element"); // first up one level //System.out.println("first up one level"); Node parent = XmlTools.getRelevantAncestorElement(el, relevantAncestorElementNames); if (parent == null) { // no higher level. //System.out.println("no higher level"); // no reason to believe there's a higher level in the other text either. // refer to nothing newAttribute = ""; } else { // then to previous sibling (previous parent) //System.out.println("then to previous sibling (previous parent)"); Node prevParent = XmlTools.getPreviousRelevantSiblingElement(parent, relevantAncestorElementNames); if (prevParent == null) { // no sibling. first parent. //System.out.println("no sibling. first parent"); // then it's the first parent in the other text we want. // first get the first element in the other text Node otherElement = nodes[tt].item(0); // then get its parent Node otherParent = XmlTools.getRelevantAncestorElement(otherElement, relevantAncestorElementNames); if (otherParent == null) { // no parent. // refer to nothing newAttribute = ""; } else { // refer to that parent newAttribute = ((Element)otherParent).getAttribute("id"); //System.out.println("refer to that otherParent. t = " + t + ". tt = " + tt + ". newAttribute = " + newAttribute); } } else { // found previous parent. //System.out.println("found previous parent"); // try its children (###which hopefully are on the right level, // and not e.g on a level between

and ). // work backwards from last child //System.out.println("try its children. work backwards from last child"); prevEl = XmlTools.getRelevantLastDescendantElement(prevParent, relevantElementNames); if (prevEl == null) { // no children. //System.out.println("no children"); // could be e.g empty

, // or e.g some irrelevant element between

's. // give up. // refer to nothing newAttribute = ""; } else { // ... //System.out.println("there are children"); otherId = ""; while (prevEl != null) { //System.out.println("look for child with corresp"); if (((Element)prevEl).getAttribute("corresp") != "") { // found a sibling which refers to #####this other text. //System.out.println("found a sibling which refers to #####this other text"); // get its last corresp attribute value (if more than one) //System.out.println("get its last corresp attribute value (if more than one)"); String[] values = ((Element)prevEl).getAttribute("corresp").split(" "); //System.out.println("values.length() = " + values.length() + ", values[0] = " + values[0]); // ### otherId = values[values.length-1]; //System.out.println("t = " + t + ". tt = " + tt + ". otherId = " + otherId); break; } else { prevEl = XmlTools.getPreviousRelevantSiblingElement(prevEl, relevantElementNames); } } if (otherId == "") { // no children of previous parent refer to the other text. // ... // give up. // refer to nothing newAttribute = ""; } else { // found reference to the other text //System.out.println("found reference to the other text"); // get element in the other text // reference to which level? //System.out.println("which level?"); //Node otherEl = docs[tt].getElementById(otherId); // ### funker ikke????!!!! Node otherEl = XmlTools.getElementByIdInNodeList(allNodes[tt], otherId); // ### gjør dette isteden if (relevantElementNames.containsKey(otherEl.getNodeName())) { // the "relevant" level //System.out.println("the 'relevant' level"); // get its parent. //System.out.println("get its parent"); // up one level in the other text Node otherParent = XmlTools.getRelevantAncestorElement(otherEl, relevantAncestorElementNames); //System.out.println("parent has id = " + ((Element)otherParent).getAttribute("id")); if (otherParent == null) { // no higher level. //System.out.println("no higher level"); // give up. // refer to nothing newAttribute = ""; } else { // then to next sibling (next parent) //System.out.println("then to next sibling (next parent)"); Node nextOtherParent = XmlTools.getNextRelevantSiblingElement(otherParent, relevantAncestorElementNames); if (nextOtherParent == null) { // no next sibling (next parent). //System.out.println("no next sibling (next parent)"); // give up // refer to nothing newAttribute = ""; } else { // refer to that next sibling (next parent), // which hopefully is "in synch" // with the current element and its parent newAttribute = ((Element)nextOtherParent).getAttribute("id"); //System.out.println("refer to that next sibling (next parent). t = " + t + ". tt = " + tt + ". newAttribute = " + newAttribute); } } } else { // something else, i.e, parent level. //System.out.println("something else, i.e, parent level"); // refer to that parent newAttribute = otherId; } } } } } } else { // found a previous sibling with a reference to the other text. // reference to which level? //System.out.println("found a previous sibling with a reference to the other text. otherId = " + otherId); //System.out.println("t = " + t); //System.out.println("tt = " + tt); //System.out.println("docs[tt].getXmlVersion() = " + docs[tt].getXmlVersion()); //Node otherEl = docs[tt].getElementById(otherId); // ### funker ikke????!!!! Node otherEl = XmlTools.getElementByIdInNodeList(allNodes[tt], otherId); // ### gjør dette isteden //System.out.println("otherEl = " + otherEl); //System.out.println("otherEl.getNodeName() = " + otherEl.getNodeName()); if (relevantElementNames.containsKey(otherEl.getNodeName())) { // the "relevant"level // get its parent Node otherParent = XmlTools.getRelevantAncestorElement(otherEl, relevantAncestorElementNames); if (otherParent == null) { // has no parent. // refer to nothing newAttribute = ""; } else { // refer to that parent newAttribute = ((Element)otherParent).getAttribute("id"); } } else { // something else, i.e, parent level. // refer to that parent newAttribute = otherId; } } } else { // loop through the elements the alignment has got in this other text eIt = link.elementNumbers[tt].iterator(); while (eIt.hasNext()) { int elementNumber = ((Integer)(eIt.next())).intValue(); String id = ((Element)(((AElement)(aligned.elements[tt].get(elementNumber))).element)).getAttribute("id"); if (newAttribute != "") { newAttribute += " "; } newAttribute += id; } } } } // set the corresp attribute values in all the elements // the alignment has got in the current text eIt = link.elementNumbers[t].iterator(); while (eIt.hasNext()) { int elementNumber = ((Integer)(eIt.next())).intValue(); ((Element)(((AElement)(aligned.elements[t].get(elementNumber))).element)).setAttribute("corresp", newAttribute); } } } /** * Saves an xml file with corresp attributes */ //void saveFile(AlignGui gui, File f, int t) { // package access //void saveFile(File f, int t) { // package access //void saveCorrespFormatFile(File f, int t) { // package access void saveCorrespFormatFile(File f, int t, Charset cs) { // package access // €€€ burde vært advarsel hvis pending alignments? // hvis unaligned? // komme spørsmål om prog skal sette et merke? //// establish corresp attributes in dom for text t // ### nei, gjør det på forhånd //setCorrespAttributes(t); // write dom to file //XmlOutput.writeXml(docs[t], f); XmlOutput.writeXml(docs[t], f, cs); } /** * Saves file in newline format */ //void saveNewlineFormatFile(File f, int t) { // package access //void saveNewlineFormatFile(File f, int t, Charset cs) { // package access void saveNewlineFormatFile(File f, int t, Charset cs, AncestorFilter filter) { // package access //System.out.println("filter = " + filter); // €€€ burde vært advarsel hvis pending alignments? // hvis unaligned? // komme spørsmål om prog skal sette et merke? Iterator it; Iterator eIt; // clean dom of corresp attributes for (int i=0; i<((NodeList)(nodes[t])).getLength(); i++) { Element el = (Element)(((NodeList)(nodes[t])).item(i)); el.removeAttribute("corresp"); } // ... //FileWriter out; OutputStreamWriter out; try { //out = new FileWriter(f); //€€€endringer 2006-02-20 for å kunne skrive utf-8, o.a OutputStream fOut= new FileOutputStream(f); OutputStream bOut= new BufferedOutputStream(fOut); out = new OutputStreamWriter(bOut, cs); } catch (IOException e1) { // ### ### ### ### ### ### ### ### ### ### ### ### ### Toolkit.getDefaultToolkit().beep(); System.out.println("Program error? Can't create new FileWriter"); return; } // loop through all finished alignments and write to file it = aligned.alignments.iterator(); while (it.hasNext()) { // next alignment Link link = (Link)(it.next()); // loop through the alignment's elements String line = ""; boolean first = true; eIt = link.elementNumbers[t].iterator(); while (eIt.hasNext()) { int elementNumber = ((Integer)(eIt.next())).intValue(); //Element element = (Element)(((AElement)(aligned.elements[t].get(elementNumber))).element); //String elementText = XmlTools.getText(element); ### heller bruke .getTextContent() AElement aElement = (AElement)(aligned.elements[t].get(elementNumber)); //String elementText = aElement.toString(); // ###toNewString(): some users might like parent info prepended to the elements // in their newline format output files String elementText = aElement.toNewString(filter); if (first) { first = false; } else { line += " "; } line += elementText; } try { out.write(line + "\n"); } catch (IOException e2) { // ### ### ### ### ### ### ### ### ### ### ### ### ### Toolkit.getDefaultToolkit().beep(); System.out.println("Program error? Can't do out.write"); try { out.close(); } catch (IOException e3) { // ### ### ### ### ### ### ### ### ### ### ### ### ### Toolkit.getDefaultToolkit().beep(); System.out.println("Program error? Can't do out.close"); return; } return; } } try { out.close(); } catch (IOException e4) { // ### ### ### ### ### ### ### ### ### ### ### ### ### Toolkit.getDefaultToolkit().beep(); System.out.println("Program error? Can't do out.close"); return; } // what if there are unfinished ones? //... } /** * Saves file in "external" format */ void saveExternalFormatFile(File f) { // €€€ samme spm som for de andre formatene // establish corresp attributes in dom for all texts. // ### no - not necessary if already saved in "corresp" format //£££££££££££££££ //for (int t=0; t\n"; try { out.write(data, 0, data.length()); } catch (IOException e) { JOptionPane.showMessageDialog( null, "Can't write to file " + f.getName(), //"€€€Title", "Error", // 2006-09-21 JOptionPane.ERROR_MESSAGE ); //System.err.println("Exception when writing to " + f.getName() + ": "); //System.err.println(e.toString()); ErrorMessage.error("Exception when writing to " + f.getName() + ":\n" + e.toString()); // 2006-08-10 return; } // loop through all finished alignments and write to file Iterator it = aligned.alignments.iterator(); while (it.hasNext()) { // next alignment. // get all the id's to link Link link = (Link)(it.next()); String xtargetsValue = ""; // loop through the texts for (int t=0; t 0) { int firstElementNumber = ((Integer)(((TreeSet)(link.elementNumbers[tt])).first())).intValue(); // get the corresp attribute //System.out.println((AElement)(aligned.elements[tt].get(firstElementNumber))); ids = ((Element)(((AElement)(aligned.elements[tt].get(firstElementNumber))).element)).getAttribute("corresp"); } else { // the alignment has no element in the other text. // get the id's from the alignment's elements in _this_ text Iterator eIt = link.elementNumbers[t].iterator(); while (eIt.hasNext()) { int elementNumber = ((Integer)(eIt.next())).intValue(); String id = ((Element)(((AElement)(aligned.elements[t].get(elementNumber))).element)).getAttribute("id"); if (ids != "") { ids += " "; } ids += id; } } // ... if (t > 0) { xtargetsValue += ";"; } xtargetsValue += ids; } // create link (alignment) info data = "\n"; // output info try { out.write(data, 0, data.length()); } catch (IOException e) { JOptionPane.showMessageDialog( null, "Can't write to file " + f.getName(), //"€€€Title", "Error", // 2006-09-21 JOptionPane.ERROR_MESSAGE ); //System.err.println("Exception when writing to " + f.getName() + ": "); //System.err.println(e.toString()); ErrorMessage.error("Exception when writing to " + f.getName() + ":\n" + e.toString()); // 2006-08-10 return; } } // create and output root end element data = "\n"; try { out.write(data, 0, data.length()); } catch (IOException e) { JOptionPane.showMessageDialog( null, "Can't write to file " + f.getName(), //"€€€Title", "Error", // 2006-09-21 JOptionPane.ERROR_MESSAGE ); //System.err.println("Exception when writing to " + f.getName() + ": "); //System.err.println(e.toString()); ErrorMessage.error("Exception when writing to " + f.getName() + ":\n" + e.toString()); // 2006-08-10 return; } // close output file try { out.close(); } catch (IOException e) { JOptionPane.showMessageDialog( null, "Can't close file " + f.getName(), //"€€€Title", "Error", // 2006-09-21 JOptionPane.ERROR_MESSAGE ); //System.err.println("Exception when closing " + f.getName() + ": "); //System.err.println(e.toString()); ErrorMessage.error("Exception when closing " + f.getName() + ":\n" + e.toString()); // 2006-08-10 return; } } // compute and display info about the current anchor word matches // and other matches §§§ void computeMatches(AlignGui gui) { // ### compute and display //void computeMatches() { //System.out.println("model sin computeMatches(). gui = " + gui); //gui.setMatchInfoTextArea(matchInfoDisplayable.toString()); matchInfo.computeDisplayableList(); gui.matchInfoList.setVisible(true); } // clear info about the current anchor word matches // and other matches §§§ void clearMatches(AlignGui gui) { //matchInfoDisplayable.clear(); //gui.setMatchInfoTextArea(""); // ### earlier the info box was a JTextArea. // now it is a JList referring to a List. // it feels wrong to null the List. // instead we hide the box gui.matchInfoList.setVisible(false); } // 2006-02-23. log displayed info about the current anchor word matches and other matches §§§ void logMatches(AlignGui gui) { // //System.out.println("Skal jeg skrive alignete elementer og match-info til loggfil?"); if (gui.model.getLogging()) { // 2006-04-18 //System.out.println("Ja, jeg skal det."); try { // ###logMatches() er misvisende navn hvis også skal logge selve elementene String text = ""; for (int t=0; t 1) { //logMatchesHeader("*** More than one alignment - info below is misleading ***"); logHeader(gui, "*** More than one alignment - info below is misleading ***"); // 2006-04-18 } else { //logMatchesHeader("*** Next alignment ***"); logHeader(gui, "*** Next alignment ***"); // 2006-04-18 } logMatches(gui); aligned.pickUp(gui, toAlign.flush(), scroll); computeMatches(gui); // ### compute and display //ShowCompare.clear(gui); gui.compareInfoPanel.off(); gui.compareInfoPanel.repaint(); // garbage collect. // for each text find number of first element not yet aligned. // (if all are aligned the number will be one larger than the highest element number.) // (€€€perhaps the code here should check the element numbers themselves and not just rely on size()) int[] ix = new int[Alignment.NUM_FILES]; for (int t=0; t 0"); position[t] = ((AElement)(((DefaultListModel)(unaligned.elements[t])).get(0))).elementNumber - 1; // ############# } else { // no more unaligned elements in text t. // ### er dette suspekt? kan det hende at ikke alle elementene fra DOM er med??? //System.out.println("no more unaligned elements in text t"); position[t] = AlignmentModel.this.nodes[t].getLength() - 1; } } //System.out.println("!!! skal lage ny QueueList. position=" + position[0] + "," + position[1] + ". altså - dette er den siste cellen som vi har alignet, ikke den første vi skal aligne"); // will investigate "all" possible paths with a certain number of steps. // will loop once per step, each time building "all" paths // that are one step longer than in the previous loop. // collect the paths in the queue list. // init queue list (queueList) QueueList queueList = new QueueList(AlignmentModel.this, position); // the paths that are one step longer will, while they are being created, // reside in nextQueueList QueueList nextQueueList; // variable for each of all the possible steps to try when lengthening a path: 0-0, 0-1, etc PathStep step; // init counter for the lengthening loop int stepCount = 0; // the lengthening loop boolean doneLengthening = false; //System.out.println("before do ... while (!doneLengthening)"); do { /* debugCount++; if (debugCount <= 3) { System.out.println("\n>>>>>>>>>>>>>> queueList =\n" + queueList + "\n"); } */ //System.out.println("\nstepCount=" + stepCount + "\n"); //System.out.println("\nqueueList before lengthening = " + queueList + "\n"); Iterator qIt = queueList.entry.iterator(); nextQueueList = new QueueList(); // loop over each entry in the queue list. each entry is a path //System.out.println("before while (qIt.hasNext())"); while (qIt.hasNext()) { //System.out.println("in while (qIt.hasNext())"); //System.out.println("inner while"); Object temp = qIt.next(); ////System.out.println("crocodile"); QueueEntry queueEntry = (QueueEntry)(temp); //System.out.println("before if (!queueEntry.removed)"); if (!queueEntry.removed) { // ### 2005-11-02. hmmm. denne var det ikke så mye vits så lenge jeg ikke merket for fjerning i queueList, bare i nextQueueList if (queueEntry.end) { // path goes to the end of all texts. // use as it is QueueEntry newQueueEntry = (QueueEntry)queueEntry.clone(); // denne har allerede newQueueEntry.end = true; } else { //System.out.println("in if (!queueEntry.removed)"); //System.out.println("queueEntry = " + queueEntry); ////System.out.println("AlignmentModel.this.compare.incrementsList.size() = " + AlignmentModel.this.compare.incrementsList.size()); // loop through all the possible steps to lengthen the current path with. // note. some or all of these steps will not be possible after all // at the end of the texts Iterator iIt = AlignmentModel.this.compare.stepList.iterator(); //System.out.println("before while (iIt.hasNext())"); while (iIt.hasNext()) { //System.out.println("in while (iIt.hasNext())"); step = (PathStep)iIt.next(); //System.out.println("*** neste steg å prøve å forlenge med er " + step); //nextQueueList.entry.add(new QueueEntry(AlignmentModel.this, queueEntry, step)); //QueueEntry newQueueEntry = new QueueEntry(AlignmentModel.this, queueEntry, step); try { //System.out.println("before makeLongerPath(..."); QueueEntry newQueueEntry = queueEntry.makeLongerPath(AlignmentModel.this, step); //System.out.println("after makeLongerPath(..."); //System.out.println("after makeLongerPath(... newQueueEntry = " + newQueueEntry); if (newQueueEntry.path != null) { // €€€ .path = null er min krøkkete måte å fortelle at det nye forslaget til path ikke er bedre enn andre paths til samme position, og at forslaget skal kastes //System.out.println("forlenget path beste hittil"); // this new path might be a better (better-scoring) path than // some other paths in the new list. remove those other paths, if any int[] pos = newQueueEntry.path.position; //if ((pos[0] == 2) && (pos[1] == 3)) { // debugCount++; // if (debugCount == 2) { // System.out.println("\n>> >> >> 1 queueList = " + queueList + "\n"); // } //} nextQueueList.remove(pos); // doesn't remove them for real. just marks them for removal later //if ((pos[0] == 2) && (pos[1] == 3)) { // if (debugCount == 2) { // System.out.println("\n>> >> >> 2 queueList = " + queueList + "\n"); // } //} queueList.remove(pos); // must do the same thing in the source. (###dodgy?) see comments for the QueueList remove() method //if ((pos[0] == 2) && (pos[1] == 3)) { // if (debugCount == 2) { // System.out.println("\n>> >> >> 3 queueList = " + queueList + "\n"); // } //} // insert new path in the new list nextQueueList.add(newQueueEntry); } else { //System.out.println("forlenget path skåret ikke høyt nok"); } } catch (EndOfAllTextsException e) { //System.out.println("suggest() catches EndOfAllTextsException"); // end of all texts. // use path as it is, but mark it properly QueueEntry newQueueEntry = (QueueEntry)queueEntry.clone(); newQueueEntry.end = true; //System.out.println("suggest() made newQueueEntry = " + newQueueEntry); // insert new path in the new list unless already there. if (!nextQueueList.contains(newQueueEntry)) { nextQueueList.add(newQueueEntry); } } catch (EndOfTextException e) { //System.out.println("suggest() catches EndOfTextException"); // end of at least one text but not all of them. // forget //System.out.println("EndOfTextException"); } catch (BlockedException e) { //... } } } } } //System.out.println("\n>>>>>>>>>>>nextQueueList før removeForReal = " + nextQueueList); nextQueueList.removeForReal(); // remove for real. see above //System.out.println("\n>>>>>>>>>>>nextQueueList etter removeForReal = " + nextQueueList); if (nextQueueList.empty()) { // not possible to lengthen path. must have reached the end of all the texts doneLengthening = true; } else { queueList = nextQueueList; //System.out.println("queueList after lengthening = " + queueList); stepCount++; doneLengthening = (stepCount >= AlignmentModel.this.getMaxPathLength()); } } while (!doneLengthening); //System.out.println("!!! har laget ny QueueList med alle stier som har <= " + stepCount + " steg. queueList = " + queueList + "\n"); //System.out.println("!!! Skal finne den beste stien av disse"); // ... if ( (queueList.entry.size() == 0) // ### will not happen? || ( (queueList.entry.size() == 1) && (((QueueEntry)(queueList.entry.get(0))).path.steps.size() == 0) ) ) { // must be end of all texts doneAligning = true; } else { Iterator qIt2 = queueList.entry.iterator(); //float bestScore = -1.f; // ### // normalized = diveded by number of sentences. // done because: the paths compared may well have the same number of steps, // but they often have a different number of sentences. // if not normalized a path with e.g 2-1 + 1-2 can win over a correct 1-1 + 1-1 + 1-1 // because it gains extra points from the extra sentences the former path has at its end //float normalizedBestScore = -1.f; // ### float normalizedBestScore = AlignmentModel.BEST_PATH_SCORE_NOT_CALCULATED; // 2006-09-20 Path bestPath = null; //String report = ""; //%%% while (qIt2.hasNext()) { QueueEntry candidate = ((QueueEntry)qIt2.next()); //System.out.println("!!! candidate.score = " + candidate.score); //report += "---------------------" + "\n"; //%%% //report += "path: " + candidate.path + "\n"; //%%% //report += "score: " + candidate.score + "\n"; //%%% //report += "length in sentences: " + candidate.path.getLengthInSentences() + "\n"; //%%% float normalizedCandidateScore = candidate.score / candidate.path.getLengthInSentences(); //report += "normalized score: " + normalizedCandidateScore + "\n"; //%%% //if (candidate.score > bestScore) { if (normalizedCandidateScore > normalizedBestScore) { //System.out.println("!!! bedre enn bestScore = " + bestScore); //bestScore = candidate.score; normalizedBestScore = normalizedCandidateScore; bestPath = candidate.path; } } //System.out.println(">>>=================>>> bestScore = " + bestScore); //System.out.println(">>>=================>>> best path = " + bestPath); // ... if (bestPath.steps.size() > 0) { //System.out.print("A "); //MemTest.print("Tenured Gen", ""); PathStep stepSuggestion = (PathStep)bestPath.steps.get(0); //System.out.println(">>>=================>>> suggested step = " + stepSuggestion); //System.out.print("B "); //MemTest.print("Tenured Gen", ""); // ... for (int t=0; t 1) { System.out.println("more than one id. this element can't be a loner."); // more than one id. this element can't be a loner. // loners refer to one element only, on a "parent" level finishedLonersInThisText = true; } else { System.out.println("one id. check alignable elements in the other text to see if the id belongs to one of them"); // one id. check alignable elements in the other text // to see if the id belongs to one of them if (XmlTools.getElementByIdInNodeList(nodes[tt], correspValue) != null) { System.out.println("belongs to alignable element in other text. => not loner"); // belongs to alignable element in other text. // => not loner // check further if (XmlTools.getElementByIdInDefaultListModel(unaligned.elements[tt], correspValue) != null) { // the element in the other text is an unaligned element // ok finishedLonersInThisText = true; } else { System.out.println("error in corresp. treat as loner"); // the element in the other text is not an unaligned element // error in corresp //############### // treat as loner } } else if(XmlTools.getElementByIdInNodeList(allNodes[tt], correspValue) != null) { System.out.println("belongs to other element in other text, presumably one on a 'parent' level. => loner"); // belongs to other element in other text, // presumably one on a "parent" level. // (€€€but we don't check that element further,. // neither its "level" nor its location) // => loner } else { System.out.println("error in file. treat as loner"); // error in file. //############### // treat as loner } } // ... System.out.println("xxx"); if (!finishedLonersInThisText) { System.out.println("found loner. pop it from unaligned and make an alignment out of it"); // found loner. //// pop it from unaligned and make an alignment out of it // make an alignment out of it //AElement aEl = (AElement)(AlignmentModel.this.unaligned.pop(t)); System.out.println("1.5 get next available element in text " + t); // get next available element in text t ######### //AElement aEl = (AElement)(unaligned.elements[t].get(0)); aEl = Skip.getNextAvailableUnalignedElement(unaligned, someAligned, t); link = new Link(); link.alignmentNumber = alignmentNumber; aEl.alignmentNumber = link.alignmentNumber; alignmentNumber++; link.elementNumbers[t] = new TreeSet(); link.elementNumbers[t].add(aEl.elementNumber); link.elementNumbers[tt] = new TreeSet(); // add it to our collection of ... alignments (the someAligned thing) someAligned.add(link); //someAligned.print(); //if (someAligned.alignments.size() > 3) { // System.out.println("kill this process"); stop[0] = true; stop[1] = true; finishedLonersInThisText = true; //} // ###also the element. kunne ikke Link også holdt rede på disse? //someAligned.add(t, element); someAligned.add(t, aEl); //someAligned.print(); if (!someAligned.hasHoles()) { // got one or more alignments, with no holes. // pop the relevant elements out of unaligned. // we don't need their content. // we got all the data we need already. // just throw them away System.out.println("pop and throw them away 1"); for (t2=0; t2 3) { // System.out.println("kill this process"); stop[0] = true; stop[1] = true; //} if (!someAligned.hasHoles()) { // got one or more alignments, with no holes. // pop the relevant elements out of unaligned. // we don't need their content. // we got all the data we need already. // just throw them away System.out.println("pop and throw them away 2"); for (t2=0; t2 10) { // we have done ... alignments, and someAligned is still holey. // we suspect something is wrong System.out.println("kill this process"); stop[0] = true; stop[1] = true; } System.out.println("før while. stop[0]=" + stop[0] +", stop[1]=" + stop[1]); } while(!(stop[0] && stop[1])); System.out.println("G"); //MemTest.print("Tenured Gen", ""); if (!someAligned.empty()) { // error // ######### trenger feilmelding - ikke kun et pip? //dette blir ikke bra hvis someAligned har hull!!! ######################## Toolkit.getDefaultToolkit().beep(); System.out.println("!someAligned.empty()"); System.out.println("Dodgy case??????????????"); toAlign.catch_(someAligned); someAligned = new AlignmentsEtc(); } } // scroll aligned. (waited until now because of a memory leak - ?) for (int t=0; t? // if not, it must be the element in a 1-0 or 0-1 alignment, // with a reference to an ancestor element, e.g, a

if (XmlTools.getElementByIdInNodeList(nodes[tt], ref) { // "relevant" ... wanted.add(tt, ref); } else { // ancestor ...make sure to keep the exact id?... } } // remove the picked ones from the wanted list wanted.remove(picked); // update pointer to next element to pick next[t]++; } else { // error. no more elements in this text ... } } } // ... ... } while (!(wanted.empty() || picked.empty())); if (...) { // found a set of corresponding elements. // ### or perhaps several ones // make an alignment out of them? // or 'more' them up from unaligned to to-align? // ### and do something extra if there are crossed relations? ... if (...) { // something wrong with corresp attrs ... done = true; // ### or throw exception? } // then 'align' them ... } else { done = true; } } while(...); } } getIndexOfElementByIdInNodeList(nodes[...], id); class Link { int alignmentNumber; Set[] elementNumbers; */ void less(AlignGui gui, int t) { // package access ////System.out.println("at model.less()"); //unaligned.catch_(t, toAlign.drop(t)); unaligned.catch_(t, toAlign.drop(gui, t)); // ### 2006-03-30 computeMatches(gui); // ### compute and display //ShowCompare.clear(gui); gui.compareInfoPanel.off(); gui.compareInfoPanel.repaint(); // 2006-10-03 // update aligned/total ratio in status line gui.model.setMemoryUsage(gui); // 2006-10-03 gui.model.updateAlignedTotalRatio(gui); } void more(AlignGui gui, int t) { // package access //System.out.println("model sin more(). gui = " + gui); //System.out.println("\nmodel sin more(). t = " + t); //////////MemTest.print("Heap memory", ""); //MemTest.print("Tenured Gen", ""); //toAlign.pickUp(gui, t, unaligned.pop(t)); toAlign.pickUp(t, unaligned.pop(t)); computeMatches(gui); // ### compute and display); //ShowCompare.clear(gui); gui.compareInfoPanel.off(); gui.compareInfoPanel.repaint(); // 2006-10-03 // update aligned/total ratio in status line gui.model.setMemoryUsage(gui); // 2006-10-03 gui.model.updateAlignedTotalRatio(gui); } } /* gui.statusLine.setText(""); int percentDone = 0; gui.statusLine.setProgress(percentDone); gui.statusLine.repaint_(); int numElements = nodes[t].getLength(); for (int i=0; i