/* * AlignmentModel.java * * ... * ... * @author Oystein Reigem */ package aksis.alignment; import java.awt.Color; import java.util.*; import java.io.*; //import java.util.regex.*; import javax.swing.*; import java.awt.event.MouseEvent; import java.lang.reflect.*; import java.awt.Toolkit; // beep import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.FactoryConfigurationError; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.*; import java.nio.charset.*; import java.text.*; import java.lang.Thread; //import javax.swing.event.ListDataEvent; import java.awt.Rectangle; ////////////////////////////////////////////////////////////////////////////////////////// //apparatus for structure and flow of alignable elements through the alignment process // ////////////////////////////////////////////////////////////////////////////////////////// /* class MyException extends Exception { public MyException() { } public MyException(String gripe) { super(gripe); } } */ class Aligned { /** * lists of aligned elements. * one list for each text. * shown in the gui's 'aligned' JList components. * each element is an AElement object. */ protected DefaultListModel[] elements; /** * the finished alignments. * each alignment is a Link object. */ List alignments = new ArrayList(); // package access. list of Link objects Aligned() { elements = new DefaultListModel[Alignment.NUM_FILES]; for (int t=0; t s = new TreeSet(); // TreeSet is a sorted Set testElementNumbers[t] = s; //System.out.println("testElementNumbers["+t+"] = " + testElementNumbers[t]); } boolean holes = true; // pessimistic ¤¤¤ ////System.out.println("alignments.size() = " + alignments.size()); // find the first alignment to drop //System.out.println("skal gå i løkke og ta én alignment om gangen. starter med ingen"); int numberOfFirstAlignmentToDrop = alignments.size() - 1; while (holes) { //System.out.println("løkkegjennomløp for å se om vi har hull hvis vi tar med en alignment til"); if (numberOfFirstAlignmentToDrop < 0) { //System.err.println("*** Program error 1"); ErrorMessage.programError("AlignmentsEtc drop(). numberOfFirstAlignmentToDrop < 0"); // 2006-08-10 return null; } for (int t=0; t numberOfFirstAlignmentToDrop) { //System.out.println("drops alignment"); returnValue.alignments.add(returnValue.alignments.size(), alignments.remove(numberOfFirstAlignmentToDrop)); } // update aligned/total ratio in status line gui.model.setMemoryUsage(gui); // 2006-10-03 gui.model.updateAlignedTotalRatio(gui); // return returnValue; } } void pickUp(AlignGui gui, AlignmentsEtc valueGot, boolean scroll) { //System.out.println("pickUp A"); //System.out.println("pickUp()"); //MemTest.print("Tenured Gen", ""); if (valueGot != null) { // alignments //System.out.println("pickUp B"); //MemTest.print("Tenured Gen", ""); //######hvorfor tom løkke? for (int j=0; j pending = new ArrayList(); ///** // * the unaligned elements. // * also represented by a Link object. // */ //private Link unused = new Link(); /** * the number of the first pending alignment. * if no alignments pending, the number to use when establishing a pending alignment, * i.e, one higher than the highest number of the finished alignments. */ private int firstAlignmentNumber = 0; ToAlign() { elements = new DefaultListModel[Alignment.NUM_FILES]; for (int t=0; t toSort = new ArrayList(); for (int ii=0; ii < pending.size(); ii++) { // next pending alignment //System.out.println("next pending alignment. ii=" + ii); Link link = (Link)(pending.get(ii)); // find this alignment's first text for (int tt=0; tt 0) { int firstElementNumber = ((Integer)(((TreeSet)(link.elementNumbers[tt])).first())).intValue(); String firstElementNumberString = Integer.toString(1000000 + firstElementNumber).substring(1); toSort.add(Integer.toString(tt) + "-" + firstElementNumberString + "-" + ii); break; } } } // sort List sorted = new ArrayList(); Collections.sort(toSort); sorted = toSort; // reorder the pending alignments List newPending = new ArrayList(); for (int ii=0; ii < sorted.size(); ii++) { //System.out.println("neste sorterte. ii=" + ii); String tempStr = (String)(sorted.get(ii)); //System.out.println("tempStr = " + tempStr); String[] tempArr = tempStr.split("-"); int tempInt = Integer.parseInt(tempArr[2]); //System.out.println("tempInt = " + tempInt); newPending.add(pending.get(tempInt)); //((Link)(newPending.get(ii))).alignmentNumber = ii; ((Link)(newPending.get(ii))).alignmentNumber = ii + gui.model.aligned.alignments.size(); // endret 2005-05-31. bug som har overlevd lenge. har visst ikke prøvd toAlign-klikking når det står noe i aligned. alignmentnummer i toAlign starter på # aligned alignments, ikke nødvendigvis på 0 } //System.out.println("pending før sort ="); for (int ii=0; ii < pending.size(); ii++) { //System.out.println("pending.get(" + ii + ") =" + (Link)(pending.get(ii))); } pending = newPending; //System.out.println("pending etter sort ="); for (int ii=0; ii < pending.size(); ii++) { //System.out.println("pending.get(" + ii + ") =" + (Link)(pending.get(ii))); } // update and refresh elements for (int ii=0; ii < pending.size(); ii++) { //System.out.println("neste refresh elements"); for (int tt=0; tt 0) { returnValue.alignments.add(pending.remove(0)); //System.out.println("flush E"); //MemTest.print("Tenured Gen", ""); } //System.out.println("nå er pending.size() = " + pending.size()); for (int t=0; t 0) { ((DefaultListModel)(returnValue.elements[t])).addElement((AElement)(elements[t].remove(0))); //System.out.println("flush F"); //MemTest.print("Tenured Gen", ""); } //System.out.println("nå er elements[" + t + "].size() = " + elements[t].size()); } //System.out.println("flush G"); //MemTest.print("Tenured Gen", ""); return returnValue; } //} } } class Unaligned { /** * lists of unaligned elements. * one list for each text. * shown in the gui's 'unaligned' JList components. * each element is an AElement object. */ protected DefaultListModel[] elements; // ########## private + get-metode er bedre Unaligned() { elements = new DefaultListModel[Alignment.NUM_FILES]; for (int t=0; t alignments = new ArrayList(); Object[] elements; AlignmentsEtc() { elements = new DefaultListModel[Alignment.NUM_FILES]; for (int t=0; t[] testElementNumbers; testElementNumbers= new TreeSet[Alignment.NUM_FILES]; // TreeSet is a sorted Set for (int t=0; t(); } // loop through the alignments Iterator it = alignments.iterator(); while(it.hasNext()) { Link link = (Link)it.next(); for (int t=0; t 0) { int last = ((Integer)(testElementNumbers[t].last())).intValue(); int first = ((Integer)(testElementNumbers[t].first())).intValue(); if ((last - first + 1) != testElementNumbers[t].size()) { // found hole for text t return true; } } } // found no hole for any text return false; } public boolean empty() { return (alignments.size() == 0); } // for debugging purposes public void print() { // loop through the alignments Iterator it = alignments.iterator(); System.out.println("<<>>"); // loop ... elements for (int t=0; t>>"); } } } /** * the program works with elements from xml files, e.g sentences. * each element is a node in a DOM tree. * but the program also needs to know which alignment each element is involved in, if any. * for this purpose the AElement object knows not only the element * but also the element's sequence number and the number of the alignment. */ class AElement { public static final int NUM_COLORS = 10; // foreløpig ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ /** * the element itself. * a node in a DOM tree for the current text. */ Node element; /** * the sequence number of the element. * the elements of a text are numbered 0, 1, 2, 3, ... */ int elementNumber; /** * the number of the alignment the element is involved in. * alignments have a global numbering 0, 1, 2, 3, ... * #####################################################unused elements under consideration have a special value -1. */ int alignmentNumber; /** * the length in characters of the text content of the element. ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ burde normalisert whitespace */ int length; AElement(Node o, int en) { element = o; elementNumber = en; alignmentNumber = -1; // ¤¤¤ not used yet //length = XmlTools.getText(element).length(); length = element.getTextContent().length(); } public Color getColor() { //System.out.println("getColor. alignmentNumber = " + alignmentNumber); if (alignmentNumber == -1) { //return Color.white; return Color.getHSBColor((float)0.00, (float)0.00, (float)0.97); } else { return Color.getHSBColor((float)((float)alignmentNumber / NUM_COLORS), (float)0.13, (float)1.00); } } /** * makes a value that keeps - but normalizes - the division into lines of the element. * ¤¤¤because some files can have odd line endings. * if this value is rendered in a list box as a one-line thing it will not wrap. * if this value is rendered in a list box as a multi-line thing it will wrap at line endings. * ¤¤¤ ¤¤¤ ¤¤¤ suddenly wrap works after all! and this method isn't used! * ¤¤¤ fixed */ public String toString() { //System.out.println("AElement sin toString"); // pattern = [\n\r]+ , i.e, matches all kinds of line endings, also multiple endings // 2006-09-19 Pattern pattern = Pattern.compile("[\\n\\r]+"); //Matcher matcher = pattern.matcher(element.toString()); // since 1.5 Node.toString() yields e.g '[s: null]' and not 'Blah blah blah' //Matcher matcher = pattern.matcher(element.getTextContent()); // ¤¤¤just the text, e.g, 'Blah blah blah' //System.out.println("kaller getXmlContent. resultat: " + XmlTools.getXmlContent(element)); // 2006-09-19 Matcher matcher = pattern.matcher(XmlTools.getXmlContent(element)); // ¤¤¤just the text, e.g, 'Blah blah blah' //Matcher matcher = pattern.matcher(element.getNodeValue()); // ¤¤¤leads to Exception in thread "AWT-EventQueue-0" java.lang.NullPointerException //return "element nummer " + elementNumber + ": " + matcher.replaceAll("\n"); // replaces all kinds of line endings with a standard one //return matcher.replaceAll("\n"); // replaces all kinds of line endings with a standard one // 2006-09-19 return matcher.replaceAll(" "); // §§§ return XmlTools.getXmlContent(element); // 2006-09-19 //return "test"; // |||---||| // ¤¤¤ merkelig. dersom vi standardiserer til \n, // får vi ordentlig wrap + wrap der det er \n. // dersom vi setter blank, får vi ikke wrap. // ¤¤¤ hmm det er noe tull med \n-måten. // når linjer wrapper, ser vi ikke slutten. // det er noe feil i utregningen av hvor mye plass som trengs. // ¤¤¤ har ikke noe med scroll bar å gjøre. ser det i aligned også. //return matcher.replaceAll(" ") + "\n"; // ¤¤¤ yeah! word wrap works! ¤¤¤ øh/ alle elementer blir to linjer høye! //return matcher.replaceAll("\n") + "\n"; } // ###some users might like parent info prepended to the elements // in their newline format output files. // this method makes a suitable version for that purpose public String toNewString(AncestorFilter filter) { //Pattern pattern = Pattern.compile("[\\n\\r]+"); // pattern = [\n\r]+ , i.e, matches all kinds of line endings, also multiple endings //Matcher matcher = pattern.matcher(XmlTools.getXmlContent(element)); // ¤¤¤just the text, e.g, 'Blah blah blah' //#### skal dette ut i XmlTools? Node current = element; //short test2 = element.getNodeType(); //###debug String pathText = ""; if (!filter.denyAll()) { String ancestorInfo; NamedNodeMap attrs; String elementName; Attr attribute; boolean done = false; //short test = Node.ELEMENT_NODE; //###debug while (!done) { // next parent? try { current = current.getParentNode(); } catch (DOMException e) { done = true; } // ??? if (current == null) { done = true; } else { // but stop before root element is reached try { Node test = current.getParentNode(); if (test.getNodeName() == "#document") { done = true; } } catch (DOMException e) { done = true; } } if (!done) { if (current.getNodeType() == Node.ELEMENT_NODE) { // elementName = current.getNodeName(); if (filter.allowElement(elementName)) { ancestorInfo = "<" + elementName; attrs = current.getAttributes(); for (int i = 0; i < attrs.getLength(); i++) { attribute = (Attr)attrs.item(i); if (filter.allowAttribute(elementName, attribute.getName())) { ancestorInfo += " " + attribute.getName() + "='" + attribute.getValue() + "'"; } } ancestorInfo += ">"; pathText = ancestorInfo + " " + pathText; } } } } } //return pathText + matcher.replaceAll(" "); // §§§ return pathText + XmlTools.getXmlContent(element); // 2006-09-19 (nå skal elementet inneholde tekst uten (særlig) unødig whitespace) } } /** * each Link object represents an alignment - a finished one or pending one. * ##########################################################in addition a Link object is used for unused elements under consideration. */ class Link { /** * alignments are numbered 0, 1, 2, 3, ... * the numbering is global, so the numbering of pending alignments * continues the numbering of finished alignments. * #################################################unused elements under consideration have a special number -1. ¤¤¤UNUSED */ int alignmentNumber; // ########################skulle hatt set-metode. m.fl. /** * the numbers of the elements involved in the alignment. * one set for each text. */ TreeSet[] elementNumbers; Link() { alignmentNumber = -1; // ¤¤¤ elementNumbers = new TreeSet[Alignment.NUM_FILES]; for (int t=0; t(); // TreeSet is a SortedSet } } TreeSet getElementNumbers(int t) { return elementNumbers[t]; } boolean empty() { for (int t=0; t 0) { return false; } } return true; } int countElements() { int count = 0; for (int t=0; t 0) { str += ";"; } str += "size=" + elementNumbers[t].size(); Iterator e = ((TreeSet)(elementNumbers[t])).iterator(); while (e.hasNext()) { str += ",el="; str += e.next(); } } str += ")"; str += " alignment nummer " + alignmentNumber; return str; } } ///////////////////////////////////////////// /** * separate thread for loading files. * to be more precise it's not for the process of reading a file into a DOM tree * but the processing of the elements we do afterwards. * but anyway it's a process we want to show progress for in gui components, * so we need to have it in a separate thread. */ class LoadFileThread extends Thread { AlignGui gui; NodeList[] nodes; int t; int percentDone = 0; int prevPercentDisplayed = 0; int elementNumber; // do GUI updates void doUpdate(Runnable r) { try { SwingUtilities.invokeAndWait(r); } catch (InvocationTargetException e1) { //System.err.println(e1); ErrorMessage.error(e1.toString()); // 2006-08-10 } catch (InterruptedException e2) { //System.err.println(e2); ErrorMessage.error(e2.toString()); // 2006-08-10 } } // (we need a constructor with some arguments // to get references to the stuff the thread is working with) ¤¤¤ public LoadFileThread(AlignGui gui, NodeList[] nodes, int t) { this.gui = gui; this.nodes = nodes; this.t = t; } // do ¤¤¤ public void run() { // clear ¤¤¤ doUpdate(new Runnable() { public void run() { gui.statusLine.setText(""); gui.statusLine.setProgress(percentDone); } }); int numElements = nodes[t].getLength(); // ### i use Math.log(x)/Math.log(10) instead of Math.log10(x) until i've got java 1.5 installed int step = Math.round((float)(Math.pow(10, Math.sqrt((((Math.log((double)numElements / 100) / Math.log(10))) + 1))))); step = Math.min(step, 100); step = Math.max(step, 10); for (elementNumber = 0; elementNumber < numElements; elementNumber++) { AElement element = new AElement(nodes[t].item(elementNumber), elementNumber); gui.model.unaligned.add(t, element); percentDone = Math.round((float)((float)(elementNumber+1) / numElements * 100.0)); //if ((elementNumber + 1) % 100 == 0) { if (percentDone >= prevPercentDisplayed + step) { doUpdate(new Runnable() { public void run() { gui.statusLine.setText(Integer.toString(elementNumber+1)); gui.statusLine.setProgress(percentDone); } }); prevPercentDisplayed = percentDone; } } // ¤¤¤ doUpdate(new Runnable() { public void run() { //gui.statusLine.setText("Finished"); gui.statusLine.setText("Text parsed"); gui.statusLine.setProgress(100); } }); // ¤¤¤problem: sometimes at this point the content of the unaligned area doesn't show. // why? // shake it by removing and adding first element. // ¤¤¤doesn't always help!? // 2006-09-19: worse with JTextArea than JLabel? gui.model.unaligned.elements[t].add(0 ,gui.model.unaligned.elements[t].remove(0)); } } ///////////////////////////////////////////// /** * information about how the current elements under alignment match * with respect to anchor words, proper names, dice, length, etc. * displayable version. * formatted into a list of lines * ######### to ulike steder som beregner skåre */ //class MatchInfoDisplayable { class MatchInfo { AlignmentModel model; protected DefaultListModel displayableList; //MatchInfoDisplayable(AlignmentModel model) { MatchInfo(AlignmentModel model) { this.model = model; displayableList = new DefaultListModel(); } //public void compute() { // // //...; // //} public void clear() { // §§§§§§§§§§§§§§§§§§§§§§§ //...; } public void purge() { displayableList.clear(); // (keep model) } //public String toString() { public void computeDisplayableList() { int t; int n; //System.out.println("computeDisplayableList()"); ElementInfoToBeCompared elementInfoToBeCompared = new ElementInfoToBeCompared(model); // collect necessary info in an ElementInfoToBeCompared object for (t=0; t ret = new ArrayList(); public ElementInfoToBeCompared(AlignmentModel model) { this.model = model; for (int t=0; t(); } } public void add(ElementInfo elementInfo, int t) { //££££SKAL DENNE METODEN SØRGE FOR AT INFORMASJONEN OM ORD-POSISJON BLIR "GLOBAL", //OG IKKE LOKAL FOR HVERT ELEMENT? //nei, det går vel ikke, for denne klassen eier ikke elementene //System.out.println("&&& enter ElementInfoToBeCompared.add(...)"); info[t].add(elementInfo); //System.out.println("t=" + t + ", after add: info[t].size()=" + info[t].size()); } public boolean empty() { //System.out.println("&&& enter ElementInfoToBeCompared.empty()"); for (int t=0; t[] hits = new ArrayList[Alignment.NUM_FILES]; for (t=0; t(); //System.out.println("før anchor words sin 'it = info[t].iterator();'"); it = info[t].iterator(); //£££ER DETTE LØKKE OVER ELEMENTER? //System.out.println("etter anchor words sin 'it = info[t].iterator();'"); ///////////////@@@@@@@@@@int offset = 0; // 2006-04-05 while (it.hasNext()) { ElementInfo info1 = (ElementInfo)it.next(); it2 = info1.anchorWordHits.hits.iterator(); while (it2.hasNext()) { hit = (AnchorWordHit)it2.next(); //£££DA HAR hit HER EN NUMMERERING AV ORDPOSISJON SOM ER LOKAL FOR HVERT ELEMENT. //System.out.println("adder " + hit + " for tekst nr " + t); // change word position from local within each element // to global within all the elements under consideration for text t. // ####alternativ: operere med to-nivå nummerering i hit-ene etc: // 1 elementnummer, 2 lokalt ordnummer //System.out.println("hit.getPos() før = " + hit.getPos()); ///////////////@@@@@@@@@@if (offset != 0) { hit.setPos(hit.getPos() + offset); } // 2006-04-05 //System.out.println("hit.getPos() etter = " + hit.getPos()); hits[t].add(hit); } ///////////////@@@@@@@@@@offset += info1.words.length; // 2006-04-05 } //System.out.println("hits[" + t + "] = " + hits[t]); } // see if any hits match up, // i.e, if any occurring anchor words in different texts // share the same anchor word list entry // sort these lists of hits on // (1) index (anchor word list entry number) and // (2) word for (t=0; t high) { high = count; } //if (count < low) { low = count; } } /*if (presentInAllTexts) { if (model.getClusterScoreMethod() == 3) { //str.append(" (" + high + " points)"); retLine += " (" + high + " points)"; } if (model.getClusterScoreMethod() == 2) { //str.append(" (" + low + " points)"); retLine += " (" + low + " points)"; } else { // if (model.getClusterScoreMethod() == 1) //str.append(" (" + 1 + " points)"); // ¤¤¤ sløyfe? retLine += " (" + 1 + " points)"; // ¤¤¤ sløyfe? } //str.append("\n"); //System.out.println("add'er retLine = " + retLine); ret.add(retLine); //System.out.println("nå er det " + ret.size() + " linjer i ret"); }*/ // 2006-04-05 } /*// add points for this anchor word list entry if (presentInAllTexts) { highSum += high; lowSum += low; oneSum += 1; }*/ // 2006-04-05 } // ... /*if (model.getClusterScoreMethod() == 3) { anchorWordScore = highSum; } if (model.getClusterScoreMethod() == 2) { anchorWordScore = lowSum; } else { // if (model.getClusterScoreMethod() == 1) anchorWordScore = oneSum; }*/ // 2006-04-05 //System.out.println(">>> anchorWordScore = " + anchorWordScore + "\n"); // ... //if (str.length() > 0) { // str.insert(0, INDENT + "Anchor word score: " + anchorWordScore + "\n"); //} else { // str.insert(0, INDENT + "No anchor word matches. Score: 0\n"); //} //int anchorWordScore = anchorWordClusters.getScore(model.getClusterScoreMethod()); // 2006-04-05 //float anchorWordScore = anchorWordClusters.getScore(model.getClusterScoreMethod()); // 2006-04-07 float anchorWordScore = anchorWordClusters.getScore(model.getLargeClusterScorePercentage()); // next line of info... //if (anchorWordScore > 0) { // ### ryddigere med samme syntaks alltid retLine = INDENT + INDENT + "Anchor word score: " + myFormatter.format(anchorWordScore); // 2006-04-05 //} else { // retLine = INDENT + "No anchor word matches. Score: 0"; //} //// ...is header for anchor info. insert at top //ret.add(0, retLine); ret.add(retLine); // 2006-04-05 indentLevel = 3; // 2006-04-05 includeMatchType = true; // i.e, include anchor word entry number. ### + 1 ### ugly ret.addAll(anchorWordClusters.getDetails(indentLevel, includeMatchType)); // getDetails() does its own indentation and endline. ### ikke helt bra? // 2006-04-05 //// ... //score += anchorWordScore; // 2006-04-05 /////////////////// // proper names, // // dice, // // and numbers // /////////////////// //int properNameScore = 0; // 2006-04-05 //int diceScore = 0; // 2006-04-05 // check all the words in one text against all the words in the other. // collect clusters of proper names. // collect clusters of dice-related words. // collect clusters of numbers. // (usually all the words in a cluster will be related to each other, // but not necessarily.) String word1; String word2; String nextWord1; // 2006-04-07 String nextWord2; // 2006-04-07 //String phrase1; // 2006-04-07. words glued together without space between them //String phrase2; // 2006-04-07. words glued together without space between them String showPhrase1; // 2006-04-18. words with space between them String showPhrase2; // 2006-04-18. words with space between them //Clusters properNameClusters = new Clusters(model.getClusterScoreMethod()); Clusters properNameClusters = new Clusters(); // 2006-04-05 //Clusters diceClusters = new Clusters(model.getClusterScoreMethod()); Clusters diceClusters = new Clusters(); // 2006-04-05 Clusters numberClusters = new Clusters(); // 2006-04-06 //System.out.println("Skipper proper, dice, numbers"); for (t=0; t= model.getDiceMinCountingScore()) { if (SimilarityUtils.diceMatch(word1, word2, model.getDiceMinCountingScore())) { // 2006-08-09 // the words are related. // add to cluster list //System.out.println("\n" + word1 + " and " + word2 + " are dice-related. add to cluster list"); //diceClusters.add(t, tt, x, y, word1, word2); matchType = Match.DICE; // 2006-04-05 //weight = 1.0f; // 2006-04-05 weight = model.getDiceMatchWeight(); // 2006-04-07 //diceClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, word1, word2); // 2006-04-05 diceClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, 1, 1, word1, word2); // 2006-04-07 } } // 2006-04-07 // also try dice on 2 words against 1 word... if (nextWord1 != "") { //phrase1 = word1 + " " + nextWord1; //phrase1 = word1 + nextWord1; showPhrase1 = word1 + " " + nextWord1; // 2006-04-18 // first check if the phrases/words are long enough to be considered //if ((phrase1.length()-1 >= model.getDiceMinWordLength()) && (word2.length() >= model.getDiceMinWordLength())) { //if ((phrase1.length() >= model.getDiceMinWordLength()) && (word2.length() >= model.getDiceMinWordLength())) { if ( (word1.length() >= model.getDiceMinWordLength()) && (nextWord1.length() >= model.getDiceMinWordLength()) && (word2.length() >= model.getDiceMinWordLength())) { // 2006-04-18 //if (SimilarityUtils.dice(phrase1, word2) >= model.getDiceMinCountingScore()) { //if (SimilarityUtils.dice(phrase1, word2, "2-1") >= model.getDiceMinCountingScore()) { // 2006-04-18 if (SimilarityUtils.diceMatch(word1, nextWord1, word2, "2-1", model.getDiceMinCountingScore())) { // 2006-08-09 // the phrases/words are related. // add to cluster list //System.out.println("\n" + phrase1 + " and " + word2 + " are dice-related. add to cluster list"); matchType = Match.DICE; // 2006-04-05 weight = model.getDicePhraseMatchWeight(); // 2006-04-07 //diceClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, 2, 1, phrase1, word2); diceClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, 2, 1, showPhrase1, word2); // 2006-04-18 } } } // ...and 1 word against 2 words if (nextWord2 != "") { //phrase2 = word2 + " " + nextWord2; //phrase2 = word2 + nextWord2; showPhrase2 = word2 + " " + nextWord2; // 2006-04-18 // first check if the phrases/words are long enough to be considered //if ((word1.length() >= model.getDiceMinWordLength()) && (phrase2.length()-1 >= model.getDiceMinWordLength())) { //if ((word1.length() >= model.getDiceMinWordLength()) && (phrase2.length() >= model.getDiceMinWordLength())) { if ( (word1.length() >= model.getDiceMinWordLength()) && (word2.length() >= model.getDiceMinWordLength()) && (nextWord2.length() >= model.getDiceMinWordLength())) { // 2006-04-18 //if (SimilarityUtils.dice(word1, phrase2) >= model.getDiceMinCountingScore()) { //if (SimilarityUtils.dice(word1, phrase2, "1-2") >= model.getDiceMinCountingScore()) { // 2006-04-18 if (SimilarityUtils.diceMatch(word1, word2, nextWord2, "1-2", model.getDiceMinCountingScore())) { // 2006-08-09 // the phrases/words are related. // add to cluster list //System.out.println("\n" + word1 + " and " + phrase2 + " are dice-related. add to cluster list"); matchType = Match.DICE; // 2006-04-05 weight = model.getDicePhraseMatchWeight(); // 2006-04-07 //diceClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, 1, 2, word1, phrase2); diceClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, 1, 2, word1, showPhrase2); // 2006-04-18 } } } // end 2006-04-07 // 2006-04-06 // numbers float num1; float num2; try { num1 = Float.parseFloat(word1); num2 = Float.parseFloat(word2); if (num1 == num2) { // same number // add to cluster list matchType = Match.NUMBER; //weight = 1.0f; weight = model.getNumberMatchWeight(); // 2006-04-07 //numberClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, word1, word2); numberClusters.add(matchType, weight, t, tt, info1.elementNumber, info2.elementNumber, x, y, 1, 1, word1, word2); // 2006-04-07 } } catch (NumberFormatException ne) { } // end 2006-04-06 } } } } } } //System.out.println("%%% properNameClusters ferdig = " + properNameClusters); //int properNameScore = properNameClusters.getScore(model.getClusterScoreMethod()); //float properNameScore = properNameClusters.getScore(model.getClusterScoreMethod()); float properNameScore = properNameClusters.getScore(model.getLargeClusterScorePercentage()); //int diceScore = diceClusters.getScore(model.getClusterScoreMethod()); //float diceScore = diceClusters.getScore(model.getClusterScoreMethod()); float diceScore = diceClusters.getScore(model.getLargeClusterScorePercentage()); //int numberScore = numberClusters.getScore(model.getClusterScoreMethod()); //float numberScore = numberClusters.getScore(model.getClusterScoreMethod()); float numberScore = numberClusters.getScore(model.getLargeClusterScorePercentage()); // ... //str.append(INDENT + "Proper name score: " + properNameScore + "\n"); retLine = INDENT + INDENT + "Proper name score: " + myFormatter.format(properNameScore); // 2006-04-05 ret.add(retLine); //score += properNameScore; // 2006-04-05 //str.append(properNameClusters.getWords()); // getWords() does its own indentation and endline. ### ikke helt bra? //ret.addAll(properNameClusters.getDetails()); // getDetails() does its own indentation and endline. ### ikke helt bra? indentLevel = 3; // 2006-04-05 includeMatchType = false; ret.addAll(properNameClusters.getDetails(indentLevel, includeMatchType)); // getDetails() does its own indentation and endline. ### ikke helt bra? // 2006-04-05 //str.append(INDENT + "Dice score: " + diceScore + "\n"); retLine = INDENT + INDENT + "Dice score: " + myFormatter.format(diceScore); // 2006-04-05 ret.add(retLine); //score += diceScore; // 2006-04-05 //str.append(diceClusters.getWords()); // getWords() does its own indentation and endline. ### ikke helt bra? //ret.addAll(diceClusters.getDetails()); // getDetails() does its own indentation and endline. ### ikke helt bra? indentLevel = 3; // 2006-04-05 includeMatchType = false; ret.addAll(diceClusters.getDetails(indentLevel, includeMatchType)); // getDetails() does its own indentation and endline. ### ikke helt bra? // 2006-04-05 // 2006-04-06 retLine = INDENT + INDENT + "Number score: " + myFormatter.format(numberScore); ret.add(retLine); indentLevel = 3; includeMatchType = false; ret.addAll(numberClusters.getDetails(indentLevel, includeMatchType)); // end 2006-04-06 // 2006-04-05 //////////////////////////////// // common score for anchor words, proper names, dice and numbers Clusters commonClusters = new Clusters(); commonClusters.add(anchorWordClusters); commonClusters.add(properNameClusters); commonClusters.add(diceClusters); commonClusters.add(numberClusters); // 2006-04-06 //int commonScore = commonClusters.getScore(model.getClusterScoreMethod()); //float commonScore = commonClusters.getScore(model.getClusterScoreMethod()); float commonScore = commonClusters.getScore(model.getLargeClusterScorePercentage()); // go back and insert the common score for the word based methods ret.set(wordMethodsScoreLineNumber, (String)ret.get(wordMethodsScoreLineNumber) + myFormatter.format(commonScore)); score += commonScore; // end 2006-04-05 // debugging or testing String tempo = commonClusters.nonTrivialClusters_ToString(); if (tempo != "") { System.out.println(tempo); } //////////////////////////////// // scoring special characters // //////////////////////////////// //int scoringCharacterScore = 0; // 2006-04-05 // check all the ... ... ... String char1; String char2; //Clusters scoringCharacterClusters = new Clusters(model.getClusterScoreMethod()); Clusters scoringCharacterClusters = new Clusters(); // 2006-04-05 for (t=0; t>> score = " + score + "\n"); retLine = "Lengths " + length[0] + " (" + myFormatter.format(length[0]*model.getLengthRatio()) + ") and " + length[1]; if (score > scoreBefore) { //str.append("Lengths " + length[0] + " and " + length[1] + " match well,\n" + INDENT + "increasing score from " + scoreBefore + " to " + score + "\n"); //retLine = "Lengths " + length[0] + " and " + length[1] + " match well,"; retLine += " match well,"; ret.add(retLine); retLine = INDENT + "increasing score from " + myFormatter.format(scoreBefore) + " to " + myFormatter.format(score); ret.add(retLine); } else if (score < scoreBefore) { //str.append("Lengths " + length[0] + " and " + length[1] + " don't match well,\n" + INDENT + "reducing score from " + scoreBefore + " to " + score + "\n"); //retLine = "Lengths " + length[0] + " and " + length[1] + " don't match well,"; retLine += " don't match well,"; ret.add(retLine); retLine = INDENT + "reducing score from " + myFormatter.format(scoreBefore) + " to " + myFormatter.format(score); ret.add(retLine); } else { //str.append("Lengths " + length[0] + " and " + length[1] + " match so-so,\n" + INDENT + "making no change to the score " + score + "\n"); //retLine = "Lengths " + length[0] + " and " + length[1] + " match so-so,"; retLine += " match so-so,"; ret.add(retLine); retLine = INDENT + "making no change to the score " + myFormatter.format(score); ret.add(retLine); } //////////////////////////////////// // micro adjustment to break ties // 2005-11-03 //////////////////////////////////// // when otherwise scoring equal, // paths with 1-1's are to preferred // over paths with other alignments. // add (subtract) micro punishment if step is not 1-1 boolean is11 = true; for (t=0; t elements NodeList[] nodes; // package access. 2004-11-09: flytter denne fra load...thread til hit i model. liste over alle relevante elementer // list of all elements, e.g, also

elements NodeList[] allNodes; // package access. 2005-09-01. trenger denne fordi: søker etter node med bestemt id. noden kan være på høyere nivå, f.eks

i stedet for . og får ikke til å bruke Document.getElementById() // ########## skulle vært Hashtable? // alignable elements and their ancestors### HashMap relevantElementNames = new HashMap(); HashMap relevantAncestorElementNames = new HashMap(); private DocumentBuilder builder; protected File currentOpenDirectory; protected File currentSaveDirectory; protected String[] inputFilepath = new String[Alignment.NUM_FILES]; protected String[] outputFilepath = new String[Alignment.NUM_FILES]; protected String[] inputFilename = new String[Alignment.NUM_FILES]; protected String anchorFilename = ""; protected String settingsFilename = ""; // 2006-09-21 protected Charset[] charset = new Charset[Alignment.NUM_FILES]; // input files character set. output files character set = input files character set protected Aligned aligned; protected ToAlign toAlign; protected Unaligned unaligned; private String specialCharacters = Alignment.DEFAULT__SPECIAL_CHARACTERS; private String scoringCharacters = Alignment.DEFAULT__SCORING_CHARACTERS; private float lengthRatio = Alignment.DEFAULT__LENGTH_RATIO; private int diceMinWordLength = Alignment.DEFAULT__DICE_MIN_WORD_LENGTH; private float diceMinCountingScore = Alignment.DEFAULT__DICE_MIN_COUNTING_SCORE; //private int clusterScoreMethod = Alignment.DEFAULT__CLUSTER_SCORE_METHOD; private int largeClusterScorePercentage = Alignment.DEFAULT__LARGE_CLUSTER_SCORE_PERCENTAGE; private int maxPathLength = Alignment.DEFAULT__MAX_PATH_LENGTH; // private float anchorWordMatchWeight = Alignment.DEFAULT__ANCHORWORD_MATCH_WEIGHT; private float anchorPhraseMatchWeight = Alignment.DEFAULT__ANCHORPHRASE_MATCH_WEIGHT; private float properNameMatchWeight = Alignment.DEFAULT__PROPERNAME_MATCH_WEIGHT; private float diceMatchWeight = Alignment.DEFAULT__DICE_MATCH_WEIGHT; private float dicePhraseMatchWeight = Alignment.DEFAULT__DICEPHRASE_MATCH_WEIGHT; private float numberMatchWeight = Alignment.DEFAULT__NUMBER_MATCH_WEIGHT; private float scoringCharacterMatchWeight = Alignment.DEFAULT__SCORINGCHARACTER_MATCH_WEIGHT; /* private int outputFileNamingMethod = Alignment.DEFAULT__FILE_NAMING_METHOD; private String fileNamingCorrespExtension = Alignment.DEFAULT__CORRESP_EXTENSION; private String fileNamingNewlineExtension = Alignment.DEFAULT__NEWLINE_EXTENSION; private String fileNamingCorrespSuffix = Alignment.DEFAULT__CORRESP_SUFFIX; private String fileNamingNewlineSuffix = Alignment.DEFAULT__NEWLINE_SUFFIX; */ // filter for newline format ancestor info AncestorFilter ancestorFilter = new AncestorFilter(AncestorFilter.MODE_ALLOW, "", ""); // default = allow none = deny all // 2006-02-23 match info log file //protected String logFilename = Alignment.DEFAULT__LOG_FILENAME; protected String logFilename = ""; protected OutputStreamWriter logFileOut; boolean logging = false; // logging on/off (true/false) protected AnchorWordList anchorWordList; protected Compare compare; //protected AnchorWordMatches anchorWordMatches; // ### computed at suggest(), but not at unalign() //protected MatchInfoDisplayable matchInfoDisplayable; // ### computed at suggest(), but not at unalign() protected MatchInfo matchInfo; // ### computed at suggest(), but not at unalign() public AlignmentModel() { // package access ¤¤¤ nei dette er jo public ////System.out.println("går i gang med å lage model"); // ###hvorfor står disse her? skal de ikke opp blant members? setRelevantElementNames(Alignment.DEFAULT__RELEVANT_ELEMENT_NAMES); setRelevantAncestorElementNames(Alignment.DEFAULT__RELEVANT_ANCESTOR_ELEMENT_NAMES); ////System.out.println("skal be om å få laget aligned"); aligned = new Aligned(); ////System.out.println("skal be om å få laget toAlign"); toAlign = new ToAlign(); ////System.out.println("skal be om å få laget unaligned"); unaligned = new Unaligned(); ////System.out.println("har fått laget unaligned"); docs = new Document[Alignment.NUM_FILES]; nodes = new NodeList[Alignment.NUM_FILES]; allNodes = new NodeList[Alignment.NUM_FILES]; // set up the parser here. DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setValidating(false); // #### være et brukervalg??? //factory.setValidating(true); //factory.setNamespaceAware(true); try { builder = factory.newDocumentBuilder(); } catch (ParserConfigurationException pce) { // parser with specified options can't be built pce.printStackTrace(); } compare = new Compare(); //anchorWordList = new AnchorWordList(); anchorWordList = new AnchorWordList(AlignmentModel.this); //anchorWordMatches = new AnchorWordMatches(); //matchInfoDisplayable = new MatchInfoDisplayable(AlignmentModel.this); matchInfo = new MatchInfo(AlignmentModel.this); /* //The plugin that calculates alignment //2004-02-19: When I have other plugins, there must be a mechanism // to choose different plugins. plugin = new ExistingCorrespPlugin(); */ } public void purge(AlignGui gui) { // ###dupl kode. se konstruktor. // men gir det mening å skille dette ut i en metode, // f.eks la konstruktor bruke purge()? // #########ikke dupl likevel... aligned.purge(); toAlign.purge(); unaligned.purge(); for (int t=0; t 0) { text += " - "; } text += Integer.toString(getLowestUnalignedElementNumber(t) + 1) + "/" + nodes[t].getLength(); } gui.statusLine.setText(text); // 2006-10-03. ###disse funker ikke. må jeg yielde på en eller annen måte???? //gui.statusLine.invalidate(); // 2006-08-14 //gui.statusLine.validate(); // 2006-08-14 System.out.println(text); // 2006-10-03 } int getLowestUnalignedElementNumber(int t) { // lowest unaligned or under consideration if (toAlign.elements[t].size() > 0) { return ((AElement)(toAlign.elements[t].get(0))).elementNumber; } else if (unaligned.elements[t].size() > 0) { return ((AElement)(unaligned.elements[t].get(0))).elementNumber; } else { return nodes[t].getLength() - 1; } // ### AlignGui gui, // ### gui.model. } // /** * ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤Loads an xml file. * @return true if loading was successful, false if there was an error. * (most likely a parsing error) */ //void loadFile(AlignGui gui, File f, int t) { // package access //void loadFile(AlignGui gui, File f, int t) throws EmptyElementException { // package access // 2006-09-19 void loadFile(AlignGui gui, File f, int t) throws Exception { // package access // 2006-09-22 //void loadFile(File f, int t) { // package access ////System.out.println("f = " + f); // ... Document result = null; try { // make DOM tree from xml file ////gui.counterDoc.insertString(0, "file -> DOM", null); //gui.counter.setText("file -> DOM"); gui.statusLine.setText("File -> DOM"); result = builder.parse(f); //System.out.println("File " + f.getName() + " loaded as text " + t+1); //System.out.println("File " + f.getName() + " loaded as text " + (t+1)); //¤¤¤2006-02-28. for å kunne lagre utfil med samme encoding som innfil Charset cs = Charset.forName(result.getXmlEncoding()); setCharset(t, cs); //// ### gjør dette kun for å fortelle hvor mange elementer det er? //// ### men det er jo ikke direkte child nodes vi er interessert i. disse kan jo være

f.eks //NodeList childNodes = result.getChildNodes(); //System.out.println("Child node count: " + childNodes.getLength()); //childNodes = null; docs[t] = result; // 2006-09-19 } catch (Exception e) { // ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ ErrorMessage.error("Exception (1) when loading text " + (t+1) + " " + f.getName() + ":\n" + e.toString()); // 2006-08-10 } // end 2006-09-19 // get a list of alignable elements from the DOM tree try { // 2006-09-19 nodes[t] = getElements(t); //} catch (EmptyElementException e) { // 2006-09-19 } catch (Exception e) { // 2006-09-22 throw e; // ### // 2006-09-19 } // 2006-09-19 try { // 2006-09-19 // get a list of all elements allNodes[t] = docs[t].getElementsByTagName("*"); // ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤dette er ikke skikkelig // clear ... //System.out.println("*** do clear stuff here? ***"); // fill the unaligned list boxes with a suitable version of the elements //System.out.println("Element count: " + nodes[t].getLength()); // process element list. // update GUI while processing element list. // do processing in separate thread so GUI elements can be updated. Thread load = new LoadFileThread(gui, nodes, t); load.start(); //// init aligned/total ratio in status line // ### funker visst ikke, men skitt i det //updateAlignedTotalRatio(gui); // ### aha. metoden vil jo ikke funke når bare én fil er lest inn. // ### og når fil nr to er lest inn, vil vi vel at det skal stå "Parsed", // og ikke overskrive dette med "0/9999 - 0/9999" - ? // remember name and full pathname of input file gui.model.inputFilepath[t] = f.getCanonicalPath(); gui.model.inputFilename[t] = f.getName(); } catch (Exception e) { // ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ //System.err.println("Exception when loading " + t + " " + f.getName() + ": "); //System.err.println(e.toString()); //ErrorMessage.error("Exception when loading " + t + " " + f.getName() + ":\n" + e.toString()); // 2006-08-10 ErrorMessage.error("Exception (2) when loading text " + (t+1) + " " + f.getName() + ":\n" + e.toString()); // 2006-08-10 //e.printStackTrace(); //return false; } } /* * Added by boerre * Loads an xml file, without gui */ void loadTobeAlignedFile(File f, int t) throws Exception { Document result = null; try { result = builder.parse(f); Charset cs = Charset.forName(result.getXmlEncoding()); setCharset(t, cs); docs[t] = result; } catch (Exception e) { // ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ ErrorMessage.error("Exception (1) when loading text " + (t+1) + " " + f.getName() + ":\n" + e.toString()); } try { nodes[t] = getElements(t); } catch (Exception e) { throw e; } try { allNodes[t] = docs[t].getElementsByTagName("*"); } catch (Exception e) { // ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ ErrorMessage.error("Exception (2) when loading text " + (t+1) + " " + f.getName() + ":\n" + e.toString()); } } //private NodeList getElements(int t) { //private NodeList getElements(int t) throws EmptyElementException { // 2006-09-19 private NodeList getElements(int t) throws Exception { // 2006-09-22 //return docs[t].getElementsByTagName("s"); //return docs[t].getElementsByTagName("p"); funker // ### klønete String[] relevantElementNamesArray = new String[relevantElementNames.size()]; Iterator it = relevantElementNames.keySet().iterator(); int count = 0; while (it.hasNext()) { String name = (String)it.next(); relevantElementNamesArray[count] = name; count++; } try { // 2006-09-19 //return XmlTools.getElementsByTagNames(docs[t], relevantElementNamesArray); return XmlTools.getElementsByTagNames(docs[t], relevantElementNamesArray, getSpecialCharacters()); // 2006-10-03 //} catch (EmptyElementException e) { // 2006-09-19 } catch (Exception e) { // 2006-09-22 throw e; // ### // 2006-09-19 } // 2006-09-19 } /** * establishes corresp attributes in dom for text t */ void setCorrespAttributes(int t) { Iterator it; Iterator eIt; // clean dom of corresp attributes that may have been in the input file for (int i=0; i<((NodeList)(nodes[t])).getLength(); i++) { Element el = (Element)(((NodeList)(nodes[t])).item(i)); el.removeAttribute("corresp"); } // set new corresp attributes in dom. // loop through all finished alignments String newAttribute; it = aligned.alignments.iterator(); while (it.hasNext()) { // next alignment Link link = (Link)(it.next()); // get the corresp attribute values from all the other texts. // loop through all the other texts newAttribute = ""; for (int tt=0; tt refer to a

. // challenge: find the correct

(or similar) // check with previous siblings - // e.g, previous 's in the same

- // what they refer to in the other text // which element to start with? //System.out.println("which element to start with?"); // there might be more than one element from this text in the alignment. // find the first one int smallestElementNumber = Integer.MAX_VALUE; eIt = link.elementNumbers[t].iterator(); while (eIt.hasNext()) { int elementNumber = ((Integer)(eIt.next())).intValue(); if (elementNumber < smallestElementNumber) { smallestElementNumber = elementNumber; } } Node el = nodes[t].item(smallestElementNumber); //System.out.println("el. name = " + el.getNodeName() + ". type = " + el.getNodeType() + ". id = " + ((Element)el).getAttribute("id")); //System.out.println("look for previous"); Node prevEl = XmlTools.getPreviousRelevantSiblingElement(el, relevantElementNames); String otherId = ""; while (prevEl != null) { //System.out.println("prevEl. name = " + prevEl.getNodeName() + ". type = " + prevEl.getNodeType()); if (((Element)prevEl).getAttribute("corresp") != "") { // found a sibling which refers to #####this other text. // get its last corresp attribute value (if more than one) String[] values = ((Element)prevEl).getAttribute("corresp").split(" "); otherId = values[values.length-1]; //System.out.println("t = " + t + ". tt = " + tt + ". otherId = " + otherId); break; } else { prevEl = XmlTools.getPreviousRelevantSiblingElement(prevEl, relevantElementNames); } } //System.out.println("otherId = " + otherId); if (otherId == "") { // no previous sibling refers to the other text. //System.out.println("no previous sibling refers to the other text"); // must try to consult elements in the previous parent element //System.out.println("must try to consult elements in the previous parent element"); // first up one level //System.out.println("first up one level"); Node parent = XmlTools.getRelevantAncestorElement(el, relevantAncestorElementNames); if (parent == null) { // no higher level. //System.out.println("no higher level"); // no reason to believe there's a higher level in the other text either. // refer to nothing newAttribute = ""; } else { // then to previous sibling (previous parent) //System.out.println("then to previous sibling (previous parent)"); Node prevParent = XmlTools.getPreviousRelevantSiblingElement(parent, relevantAncestorElementNames); if (prevParent == null) { // no sibling. first parent. //System.out.println("no sibling. first parent"); // then it's the first parent in the other text we want. // first get the first element in the other text Node otherElement = nodes[tt].item(0); // then get its parent Node otherParent = XmlTools.getRelevantAncestorElement(otherElement, relevantAncestorElementNames); if (otherParent == null) { // no parent. // refer to nothing newAttribute = ""; } else { // refer to that parent newAttribute = ((Element)otherParent).getAttribute("id"); //System.out.println("refer to that otherParent. t = " + t + ". tt = " + tt + ". newAttribute = " + newAttribute); } } else { // found previous parent. //System.out.println("found previous parent"); // try its children (###which hopefully are on the right level, // and not e.g on a level between

and ). // work backwards from last child //System.out.println("try its children. work backwards from last child"); prevEl = XmlTools.getRelevantLastDescendantElement(prevParent, relevantElementNames); if (prevEl == null) { // no children. //System.out.println("no children"); // could be e.g empty

, // or e.g some irrelevant element between

's. // give up. // refer to nothing newAttribute = ""; } else { // ... //System.out.println("there are children"); otherId = ""; while (prevEl != null) { //System.out.println("look for child with corresp"); if (((Element)prevEl).getAttribute("corresp") != "") { // found a sibling which refers to #####this other text. //System.out.println("found a sibling which refers to #####this other text"); // get its last corresp attribute value (if more than one) //System.out.println("get its last corresp attribute value (if more than one)"); String[] values = ((Element)prevEl).getAttribute("corresp").split(" "); //System.out.println("values.length() = " + values.length() + ", values[0] = " + values[0]); // ### otherId = values[values.length-1]; //System.out.println("t = " + t + ". tt = " + tt + ". otherId = " + otherId); break; } else { prevEl = XmlTools.getPreviousRelevantSiblingElement(prevEl, relevantElementNames); } } if (otherId == "") { // no children of previous parent refer to the other text. // ... // give up. // refer to nothing newAttribute = ""; } else { // found reference to the other text //System.out.println("found reference to the other text"); // get element in the other text // reference to which level? //System.out.println("which level?"); //Node otherEl = docs[tt].getElementById(otherId); // ### funker ikke????!!!! Node otherEl = XmlTools.getElementByIdInNodeList(allNodes[tt], otherId); // ### gjør dette isteden if (relevantElementNames.containsKey(otherEl.getNodeName())) { // the "relevant" level //System.out.println("the 'relevant' level"); // get its parent. //System.out.println("get its parent"); // up one level in the other text Node otherParent = XmlTools.getRelevantAncestorElement(otherEl, relevantAncestorElementNames); //System.out.println("parent has id = " + ((Element)otherParent).getAttribute("id")); if (otherParent == null) { // no higher level. //System.out.println("no higher level"); // give up. // refer to nothing newAttribute = ""; } else { // then to next sibling (next parent) //System.out.println("then to next sibling (next parent)"); Node nextOtherParent = XmlTools.getNextRelevantSiblingElement(otherParent, relevantAncestorElementNames); if (nextOtherParent == null) { // no next sibling (next parent). //System.out.println("no next sibling (next parent)"); // give up // refer to nothing newAttribute = ""; } else { // refer to that next sibling (next parent), // which hopefully is "in synch" // with the current element and its parent newAttribute = ((Element)nextOtherParent).getAttribute("id"); //System.out.println("refer to that next sibling (next parent). t = " + t + ". tt = " + tt + ". newAttribute = " + newAttribute); } } } else { // something else, i.e, parent level. //System.out.println("something else, i.e, parent level"); // refer to that parent newAttribute = otherId; } } } } } } else { // found a previous sibling with a reference to the other text. // reference to which level? //System.out.println("found a previous sibling with a reference to the other text. otherId = " + otherId); //System.out.println("t = " + t); //System.out.println("tt = " + tt); //System.out.println("docs[tt].getXmlVersion() = " + docs[tt].getXmlVersion()); //Node otherEl = docs[tt].getElementById(otherId); // ### funker ikke????!!!! Node otherEl = XmlTools.getElementByIdInNodeList(allNodes[tt], otherId); // ### gjør dette isteden //System.out.println("otherEl = " + otherEl); //System.out.println("otherEl.getNodeName() = " + otherEl.getNodeName()); if (relevantElementNames.containsKey(otherEl.getNodeName())) { // the "relevant"level // get its parent Node otherParent = XmlTools.getRelevantAncestorElement(otherEl, relevantAncestorElementNames); if (otherParent == null) { // has no parent. // refer to nothing newAttribute = ""; } else { // refer to that parent newAttribute = ((Element)otherParent).getAttribute("id"); } } else { // something else, i.e, parent level. // refer to that parent newAttribute = otherId; } } } else { // loop through the elements the alignment has got in this other text eIt = link.elementNumbers[tt].iterator(); while (eIt.hasNext()) { int elementNumber = ((Integer)(eIt.next())).intValue(); String id = ((Element)(((AElement)(aligned.elements[tt].get(elementNumber))).element)).getAttribute("id"); if (newAttribute != "") { newAttribute += " "; } newAttribute += id; } } } } // set the corresp attribute values in all the elements // the alignment has got in the current text eIt = link.elementNumbers[t].iterator(); while (eIt.hasNext()) { int elementNumber = ((Integer)(eIt.next())).intValue(); ((Element)(((AElement)(aligned.elements[t].get(elementNumber))).element)).setAttribute("corresp", newAttribute); } } } /** * Saves an xml file with corresp attributes */ //void saveFile(AlignGui gui, File f, int t) { // package access //void saveFile(File f, int t) { // package access //void saveCorrespFormatFile(File f, int t) { // package access void saveCorrespFormatFile(File f, int t, Charset cs) { // package access // ¤¤¤ burde vært advarsel hvis pending alignments? // hvis unaligned? // komme spørsmål om prog skal sette et merke? //// establish corresp attributes in dom for text t // ### nei, gjør det på forhånd //setCorrespAttributes(t); // write dom to file //XmlOutput.writeXml(docs[t], f); XmlOutput.writeXml(docs[t], f, cs); } /** * Saves file in newline format */ //void saveNewlineFormatFile(File f, int t) { // package access //void saveNewlineFormatFile(File f, int t, Charset cs) { // package access void saveNewlineFormatFile(File f, int t, Charset cs, AncestorFilter filter) { // package access //System.out.println("filter = " + filter); // ¤¤¤ burde vært advarsel hvis pending alignments? // hvis unaligned? // komme spørsmål om prog skal sette et merke? Iterator it; Iterator eIt; // clean dom of corresp attributes for (int i=0; i<((NodeList)(nodes[t])).getLength(); i++) { Element el = (Element)(((NodeList)(nodes[t])).item(i)); el.removeAttribute("corresp"); } // ... //FileWriter out; OutputStreamWriter out; try { //out = new FileWriter(f); //¤¤¤endringer 2006-02-20 for å kunne skrive utf-8, o.a OutputStream fOut= new FileOutputStream(f); OutputStream bOut= new BufferedOutputStream(fOut); out = new OutputStreamWriter(bOut, cs); } catch (IOException e1) { // ### ### ### ### ### ### ### ### ### ### ### ### ### Toolkit.getDefaultToolkit().beep(); System.out.println("Program error? Can't create new FileWriter"); return; } // loop through all finished alignments and write to file it = aligned.alignments.iterator(); while (it.hasNext()) { // next alignment Link link = (Link)(it.next()); // loop through the alignment's elements String line = ""; boolean first = true; eIt = link.elementNumbers[t].iterator(); while (eIt.hasNext()) { int elementNumber = ((Integer)(eIt.next())).intValue(); //Element element = (Element)(((AElement)(aligned.elements[t].get(elementNumber))).element); //String elementText = XmlTools.getText(element); ### heller bruke .getTextContent() AElement aElement = (AElement)(aligned.elements[t].get(elementNumber)); //String elementText = aElement.toString(); // ###toNewString(): some users might like parent info prepended to the elements // in their newline format output files String elementText = aElement.toNewString(filter); if (first) { first = false; } else { line += " "; } line += elementText; } try { out.write(line + "\n"); } catch (IOException e2) { // ### ### ### ### ### ### ### ### ### ### ### ### ### Toolkit.getDefaultToolkit().beep(); System.out.println("Program error? Can't do out.write"); try { out.close(); } catch (IOException e3) { // ### ### ### ### ### ### ### ### ### ### ### ### ### Toolkit.getDefaultToolkit().beep(); System.out.println("Program error? Can't do out.close"); return; } return; } } try { out.close(); } catch (IOException e4) { // ### ### ### ### ### ### ### ### ### ### ### ### ### Toolkit.getDefaultToolkit().beep(); System.out.println("Program error? Can't do out.close"); return; } // what if there are unfinished ones? //... } /** * Saves file in "external" format */ void saveExternalFormatFile(File f) { // ¤¤¤ samme spm som for de andre formatene // establish corresp attributes in dom for all texts. // ### no - not necessary if already saved in "corresp" format // This is necessary so that all data is written to the file. // Otherwise the file gets no references for (int t=0; t\n"; try { out.write(data, 0, data.length()); } catch (IOException e) { JOptionPane.showMessageDialog( null, "Can't write to file " + f.getName(), //"¤¤¤Title", "Error", // 2006-09-21 JOptionPane.ERROR_MESSAGE ); //System.err.println("Exception when writing to " + f.getName() + ": "); //System.err.println(e.toString()); ErrorMessage.error("Exception when writing to " + f.getName() + ":\n" + e.toString()); // 2006-08-10 return; } // loop through all finished alignments and write to file Iterator it = aligned.alignments.iterator(); while (it.hasNext()) { // next alignment. // get all the id's to link Link link = (Link)(it.next()); String xtargetsValue = ""; // loop through the texts for (int t=0; t 0) { int firstElementNumber = ((Integer)(((TreeSet)(link.elementNumbers[tt])).first())).intValue(); // get the corresp attribute //System.out.println((AElement)(aligned.elements[tt].get(firstElementNumber))); ids = ((Element)(((AElement)(aligned.elements[tt].get(firstElementNumber))).element)).getAttribute("corresp"); } else { // the alignment has no element in the other text. // get the id's from the alignment's elements in _this_ text Iterator eIt = link.elementNumbers[t].iterator(); while (eIt.hasNext()) { int elementNumber = ((Integer)(eIt.next())).intValue(); String id = ((Element)(((AElement)(aligned.elements[t].get(elementNumber))).element)).getAttribute("id"); if (ids != "") { ids += " "; } ids += id; } } // ... if (t > 0) { xtargetsValue += ";"; } xtargetsValue += ids; } // create link (alignment) info data = "\n"; // output info try { out.write(data, 0, data.length()); } catch (IOException e) { JOptionPane.showMessageDialog( null, "Can't write to file " + f.getName(), //"¤¤¤Title", "Error", // 2006-09-21 JOptionPane.ERROR_MESSAGE ); //System.err.println("Exception when writing to " + f.getName() + ": "); //System.err.println(e.toString()); ErrorMessage.error("Exception when writing to " + f.getName() + ":\n" + e.toString()); // 2006-08-10 return; } } // create and output root end element data = "\n"; try { out.write(data, 0, data.length()); } catch (IOException e) { JOptionPane.showMessageDialog( null, "Can't write to file " + f.getName(), //"¤¤¤Title", "Error", // 2006-09-21 JOptionPane.ERROR_MESSAGE ); //System.err.println("Exception when writing to " + f.getName() + ": "); //System.err.println(e.toString()); ErrorMessage.error("Exception when writing to " + f.getName() + ":\n" + e.toString()); // 2006-08-10 return; } // close output file try { out.close(); } catch (IOException e) { JOptionPane.showMessageDialog( null, "Can't close file " + f.getName(), //"¤¤¤Title", "Error", // 2006-09-21 JOptionPane.ERROR_MESSAGE ); //System.err.println("Exception when closing " + f.getName() + ": "); //System.err.println(e.toString()); ErrorMessage.error("Exception when closing " + f.getName() + ":\n" + e.toString()); // 2006-08-10 return; } } // compute and display info about the current anchor word matches // and other matches §§§ void computeMatches(AlignGui gui) { // ### compute and display //void computeMatches() { //System.out.println("model sin computeMatches(). gui = " + gui); //gui.setMatchInfoTextArea(matchInfoDisplayable.toString()); matchInfo.computeDisplayableList(); gui.matchInfoList.setVisible(true); } // clear info about the current anchor word matches // and other matches §§§ void clearMatches(AlignGui gui) { //matchInfoDisplayable.clear(); //gui.setMatchInfoTextArea(""); // ### earlier the info box was a JTextArea. // now it is a JList referring to a List. // it feels wrong to null the List. // instead we hide the box gui.matchInfoList.setVisible(false); } // 2006-02-23. log displayed info about the current anchor word matches and other matches §§§ void logMatches(AlignGui gui) { // //System.out.println("Skal jeg skrive alignete elementer og match-info til loggfil?"); if (gui.model.getLogging()) { // 2006-04-18 //System.out.println("Ja, jeg skal det."); try { // ###logMatches() er misvisende navn hvis også skal logge selve elementene String text = ""; for (int t=0; t 1) { //logMatchesHeader("*** More than one alignment - info below is misleading ***"); // boerre: ikke viktig logHeader(gui, "*** More than one alignment - info below is misleading ***"); // 2006-04-18 } else { //logMatchesHeader("*** Next alignment ***"); // boerre: ikke viktig logHeader(gui, "*** Next alignment ***"); // 2006-04-18 } // Heller ikke viktig logMatches(gui); aligned.pickUp(gui, toAlign.flush(), scroll); computeMatches(gui); // ### compute and display //ShowCompare.clear(gui); gui.compareInfoPanel.off(); gui.compareInfoPanel.repaint(); // garbage collect. // for each text find number of first element not yet aligned. // (if all are aligned the number will be one larger than the highest element number.) // (¤¤¤perhaps the code here should check the element numbers themselves and not just rely on size()) int[] ix = new int[Alignment.NUM_FILES]; for (int t=0; t, (C) 2006 * * Copyright: See COPYING file that comes with this distribution * */ void suggest2() { String report = ""; boolean outOfMemory = false; int runLimit = 0; int mode = Alignment.MODE_AUTO; if (!toAlign.empty()) { System.out.println("!toAlign.empty()"); Toolkit.getDefaultToolkit().beep(); } else { System.out.println("else !toAlign.empty()"); mode = Alignment.MODE_AUTO; runLimit = 200; } int runCount = 0; boolean doneAligning = false; while (!doneAligning) { System.out.println("while !doneAligning"); AlignmentModel.this.compare.resetBestPathScores(); int[] position = new int[Alignment.NUM_FILES]; for (int t=0; t 0) { position[t] = ((AElement)(((DefaultListModel)(unaligned.elements[t])).get(0))).elementNumber - 1; // ############# } else { position[t] = AlignmentModel.this.nodes[t].getLength() - 1; } } QueueList queueList = new QueueList(AlignmentModel.this, position); QueueList nextQueueList; PathStep step; int stepCount = 0; boolean doneLengthening = false; do { System.out.println("inne i do"); Iterator qIt = queueList.entry.iterator(); nextQueueList = new QueueList(); while (qIt.hasNext()) { System.out.println("while (qIt.hasNext())"); Object temp = qIt.next(); QueueEntry queueEntry = (QueueEntry)(temp); if (!queueEntry.removed) { if (queueEntry.end) { System.out.println("if (!queueEntry.removed)"); QueueEntry newQueueEntry = (QueueEntry)queueEntry.clone(); } else { Iterator iIt = AlignmentModel.this.compare.stepList.iterator(); while (iIt.hasNext()) { System.out.println("while (iIt.hasNext())"); System.out.println(runCount); step = (PathStep)iIt.next(); try { QueueEntry newQueueEntry = queueEntry.makeLongerPath(AlignmentModel.this, step); if (newQueueEntry.path != null) { int[] pos = newQueueEntry.path.position; nextQueueList.remove(pos); queueList.remove(pos); nextQueueList.add(newQueueEntry); } } catch (EndOfAllTextsException e) { QueueEntry newQueueEntry = (QueueEntry)queueEntry.clone(); newQueueEntry.end = true; if (!nextQueueList.contains(newQueueEntry)) { nextQueueList.add(newQueueEntry); } } catch (EndOfTextException e) { } catch (BlockedException e) { //... } } } } } nextQueueList.removeForReal(); if (nextQueueList.empty()) { doneLengthening = true; } else { queueList = nextQueueList; stepCount++; doneLengthening = (stepCount >= AlignmentModel.this.getMaxPathLength()); } } while (!doneLengthening); if ((queueList.entry.size() == 0) || ((queueList.entry.size() == 1) && (((QueueEntry)(queueList.entry.get(0))).path.steps.size() == 0))) { doneAligning = true; } else { System.out.println("4060 "); Iterator qIt2 = queueList.entry.iterator(); float normalizedBestScore = AlignmentModel.BEST_PATH_SCORE_NOT_CALCULATED; Path bestPath = null; while (qIt2.hasNext()) { System.out.println("while (qIt2.hasNext())"); QueueEntry candidate = ((QueueEntry)qIt2.next()); float normalizedCandidateScore = candidate.score / candidate.path.getLengthInSentences(); report += "normalized score: " + normalizedCandidateScore + "\n"; if (normalizedCandidateScore > normalizedBestScore) { normalizedBestScore = normalizedCandidateScore; bestPath = candidate.path; } } // System.out.println(">>>=================>>> bestScore = " + bestScore); System.out.println(">>>=================>>> best path = " + bestPath); if (bestPath.steps.size() > 0) { PathStep stepSuggestion = (PathStep)bestPath.steps.get(0); System.out.println(">>>=================>>> suggested step = " + stepSuggestion); for (int t=0; t 0"); position[t] = ((AElement)(((DefaultListModel)(unaligned.elements[t])).get(0))).elementNumber - 1; // ############# } else { // no more unaligned elements in text t. // ### er dette suspekt? kan det hende at ikke alle elementene fra DOM er med??? //System.out.println("no more unaligned elements in text t"); position[t] = AlignmentModel.this.nodes[t].getLength() - 1; } } /* System.out.println("!!! skal lage ny QueueList. position=" + position[0] + "," + position[1] + ". altså - dette er den siste cellen som vi har alignet, ikke den første vi skal aligne"); will investigate "all" possible paths with a certain number of steps. will loop once per step, each time building "all" paths that are one step longer than in the previous loop. collect the paths in the queue list. init queue list (queueList) */ QueueList queueList = new QueueList(AlignmentModel.this, position); // the paths that are one step longer will, while they are being created, // reside in nextQueueList QueueList nextQueueList; // variable for each of all the possible steps to try when lengthening a path: 0-0, 0-1, etc PathStep step; // init counter for the lengthening loop int stepCount = 0; // the lengthening loop boolean doneLengthening = false; //System.out.println("before do ... while (!doneLengthening)"); do { /* debugCount++; if (debugCount <= 3) { System.out.println("\n>>>>>>>>>>>>>> queueList =\n" + queueList + "\n"); } */ //System.out.println("\nstepCount=" + stepCount + "\n"); //System.out.println("\nqueueList before lengthening = " + queueList + "\n"); Iterator qIt = queueList.entry.iterator(); nextQueueList = new QueueList(); // loop over each entry in the queue list. each entry is a path //System.out.println("before while (qIt.hasNext())"); while (qIt.hasNext()) { //System.out.println("in while (qIt.hasNext())"); //System.out.println("inner while"); Object temp = qIt.next(); ////System.out.println("crocodile"); QueueEntry queueEntry = (QueueEntry)(temp); //System.out.println("before if (!queueEntry.removed)"); if (!queueEntry.removed) { // ### 2005-11-02. hmmm. denne var det ikke så mye vits så lenge jeg ikke merket for fjerning i queueList, bare i nextQueueList if (queueEntry.end) { // path goes to the end of all texts. // use as it is QueueEntry newQueueEntry = (QueueEntry)queueEntry.clone(); // denne har allerede newQueueEntry.end = true; } else { //System.out.println("in if (!queueEntry.removed)"); //System.out.println("queueEntry = " + queueEntry); ////System.out.println("AlignmentModel.this.compare.incrementsList.size() = " + AlignmentModel.this.compare.incrementsList.size()); // loop through all the possible steps to lengthen the current path with. // note. some or all of these steps will not be possible after all // at the end of the texts Iterator iIt = AlignmentModel.this.compare.stepList.iterator(); //System.out.println("before while (iIt.hasNext())"); while (iIt.hasNext()) { //System.out.println("in while (iIt.hasNext())"); step = (PathStep)iIt.next(); //System.out.println("*** neste steg å prøve å forlenge med er " + step); //nextQueueList.entry.add(new QueueEntry(AlignmentModel.this, queueEntry, step)); //QueueEntry newQueueEntry = new QueueEntry(AlignmentModel.this, queueEntry, step); try { //System.out.println("before makeLongerPath(..."); QueueEntry newQueueEntry = queueEntry.makeLongerPath(AlignmentModel.this, step); //System.out.println("after makeLongerPath(..."); //System.out.println("after makeLongerPath(... newQueueEntry = " + newQueueEntry); if (newQueueEntry.path != null) { // ¤¤¤ .path = null er min krøkkete måte å fortelle at det nye forslaget til path ikke er bedre enn andre paths til samme position, og at forslaget skal kastes //System.out.println("forlenget path beste hittil"); // this new path might be a better (better-scoring) path than // some other paths in the new list. remove those other paths, if any int[] pos = newQueueEntry.path.position; //if ((pos[0] == 2) && (pos[1] == 3)) { // debugCount++; // if (debugCount == 2) { // System.out.println("\n>> >> >> 1 queueList = " + queueList + "\n"); // } //} nextQueueList.remove(pos); // doesn't remove them for real. just marks them for removal later //if ((pos[0] == 2) && (pos[1] == 3)) { // if (debugCount == 2) { // System.out.println("\n>> >> >> 2 queueList = " + queueList + "\n"); // } //} queueList.remove(pos); // must do the same thing in the source. (###dodgy?) see comments for the QueueList remove() method //if ((pos[0] == 2) && (pos[1] == 3)) { // if (debugCount == 2) { // System.out.println("\n>> >> >> 3 queueList = " + queueList + "\n"); // } //} // insert new path in the new list nextQueueList.add(newQueueEntry); } else { //System.out.println("forlenget path skåret ikke høyt nok"); } } catch (EndOfAllTextsException e) { //System.out.println("suggest() catches EndOfAllTextsException"); // end of all texts. // use path as it is, but mark it properly QueueEntry newQueueEntry = (QueueEntry)queueEntry.clone(); newQueueEntry.end = true; //System.out.println("suggest() made newQueueEntry = " + newQueueEntry); // insert new path in the new list unless already there. if (!nextQueueList.contains(newQueueEntry)) { nextQueueList.add(newQueueEntry); } } catch (EndOfTextException e) { //System.out.println("suggest() catches EndOfTextException"); // end of at least one text but not all of them. // forget //System.out.println("EndOfTextException"); } catch (BlockedException e) { //... } } } } } //System.out.println("\n>>>>>>>>>>>nextQueueList før removeForReal = " + nextQueueList); nextQueueList.removeForReal(); // remove for real. see above //System.out.println("\n>>>>>>>>>>>nextQueueList etter removeForReal = " + nextQueueList); if (nextQueueList.empty()) { // not possible to lengthen path. must have reached the end of all the texts doneLengthening = true; } else { queueList = nextQueueList; //System.out.println("queueList after lengthening = " + queueList); stepCount++; doneLengthening = (stepCount >= AlignmentModel.this.getMaxPathLength()); } } while (!doneLengthening); //System.out.println("!!! har laget ny QueueList med alle stier som har <= " + stepCount + " steg. queueList = " + queueList + "\n"); //System.out.println("!!! Skal finne den beste stien av disse"); // ... if ( (queueList.entry.size() == 0) // ### will not happen? || ( (queueList.entry.size() == 1) && (((QueueEntry)(queueList.entry.get(0))).path.steps.size() == 0) ) ) { // must be end of all texts doneAligning = true; } else { Iterator qIt2 = queueList.entry.iterator(); //float bestScore = -1.f; // ### // normalized = diveded by number of sentences. // done because: the paths compared may well have the same number of steps, // but they often have a different number of sentences. // if not normalized a path with e.g 2-1 + 1-2 can win over a correct 1-1 + 1-1 + 1-1 // because it gains extra points from the extra sentences the former path has at its end //float normalizedBestScore = -1.f; // ### float normalizedBestScore = AlignmentModel.BEST_PATH_SCORE_NOT_CALCULATED; // 2006-09-20 Path bestPath = null; //String report = ""; //%%% while (qIt2.hasNext()) { QueueEntry candidate = ((QueueEntry)qIt2.next()); //System.out.println("!!! candidate.score = " + candidate.score); //report += "---------------------" + "\n"; //%%% //report += "path: " + candidate.path + "\n"; //%%% //report += "score: " + candidate.score + "\n"; //%%% //report += "length in sentences: " + candidate.path.getLengthInSentences() + "\n"; //%%% float normalizedCandidateScore = candidate.score / candidate.path.getLengthInSentences(); //report += "normalized score: " + normalizedCandidateScore + "\n"; //%%% //if (candidate.score > bestScore) { if (normalizedCandidateScore > normalizedBestScore) { //System.out.println("!!! bedre enn bestScore = " + bestScore); //bestScore = candidate.score; normalizedBestScore = normalizedCandidateScore; bestPath = candidate.path; } } //System.out.println(">>>=================>>> bestScore = " + bestScore); //System.out.println(">>>=================>>> best path = " + bestPath); // ... if (bestPath.steps.size() > 0) { //System.out.print("A "); //MemTest.print("Tenured Gen", ""); PathStep stepSuggestion = (PathStep)bestPath.steps.get(0); //System.out.println(">>>=================>>> suggested step = " + stepSuggestion); //System.out.print("B "); //MemTest.print("Tenured Gen", ""); // ... for (int t=0; t 1) { System.out.println("more than one id. this element can't be a loner."); // more than one id. this element can't be a loner. // loners refer to one element only, on a "parent" level finishedLonersInThisText = true; } else { System.out.println("one id. check alignable elements in the other text to see if the id belongs to one of them"); // one id. check alignable elements in the other text // to see if the id belongs to one of them if (XmlTools.getElementByIdInNodeList(nodes[tt], correspValue) != null) { System.out.println("belongs to alignable element in other text. => not loner"); // belongs to alignable element in other text. // => not loner // check further if (XmlTools.getElementByIdInDefaultListModel(unaligned.elements[tt], correspValue) != null) { // the element in the other text is an unaligned element // ok finishedLonersInThisText = true; } else { System.out.println("error in corresp. treat as loner"); // the element in the other text is not an unaligned element // error in corresp //############### // treat as loner } } else if(XmlTools.getElementByIdInNodeList(allNodes[tt], correspValue) != null) { System.out.println("belongs to other element in other text, presumably one on a 'parent' level. => loner"); // belongs to other element in other text, // presumably one on a "parent" level. // (¤¤¤but we don't check that element further,. // neither its "level" nor its location) // => loner } else { System.out.println("error in file. treat as loner"); // error in file. //############### // treat as loner } } // ... System.out.println("xxx"); if (!finishedLonersInThisText) { System.out.println("found loner. pop it from unaligned and make an alignment out of it"); // found loner. //// pop it from unaligned and make an alignment out of it // make an alignment out of it //AElement aEl = (AElement)(AlignmentModel.this.unaligned.pop(t)); System.out.println("1.5 get next available element in text " + t); // get next available element in text t ######### //AElement aEl = (AElement)(unaligned.elements[t].get(0)); aEl = Skip.getNextAvailableUnalignedElement(unaligned, someAligned, t); link = new Link(); link.alignmentNumber = alignmentNumber; aEl.alignmentNumber = link.alignmentNumber; alignmentNumber++; link.elementNumbers[t] = new TreeSet(); link.elementNumbers[t].add(aEl.elementNumber); link.elementNumbers[tt] = new TreeSet(); // add it to our collection of ... alignments (the someAligned thing) someAligned.add(link); //someAligned.print(); //if (someAligned.alignments.size() > 3) { // System.out.println("kill this process"); stop[0] = true; stop[1] = true; finishedLonersInThisText = true; //} // ###also the element. kunne ikke Link også holdt rede på disse? //someAligned.add(t, element); someAligned.add(t, aEl); //someAligned.print(); if (!someAligned.hasHoles()) { // got one or more alignments, with no holes. // pop the relevant elements out of unaligned. // we don't need their content. // we got all the data we need already. // just throw them away System.out.println("pop and throw them away 1"); for (t2=0; t2(); link.elementNumbers[t].add(aEl.elementNumber); link.elementNumbers[tt] = new TreeSet(); // ###also ... //someAligned.add(t, element); aEl.alignmentNumber = link.alignmentNumber; someAligned.add(t, aEl); //someAligned.print(); System.out.println("get all the corresponding elements in the other text."); // get all the corresponding elements in the other text. String[] correspIds = correspValue.split(" "); AElement otherAEl = null; for (int i = 0; i < correspIds.length; i++) { System.out.println("get Node otherEl"); Node otherEl = XmlTools.getElementByIdInNodeList(nodes[tt], correspIds[i]); if (otherEl != null) { System.out.println("the corresp id makes sense insofar as it is an id for an alignable element in the other text."); // the corresp id makes sense insofar as // it is an id for an alignable element in the other text. // check further otherAEl = AlignmentModel.this.unaligned.get(tt, otherEl); if (otherAEl != null) { System.out.println("it's an unaligned element all right"); // it's an unaligned element all right link.elementNumbers[tt].add(otherAEl.elementNumber); // ###also ... //someAligned.add(tt, (Element)otherAEl.element); otherAEl.alignmentNumber = link.alignmentNumber; someAligned.add(tt, otherAEl); //someAligned.print(); } else { System.out.println("error"); // error // ######### trenger feilmelding - ikke kun et pip? Toolkit.getDefaultToolkit().beep(); System.out.println("Hit a case the program can't handle"); // ####grisete stop[0] = true; stop[1] = true; break; } } } if (!(stop[0] && stop[1])) { // ### System.out.println("take one of these elements in the other text and get all the corresponding elements in the first text"); System.out.println("otherAEl = " + otherAEl); // take one of these elements in the other text and get // all the corresponding elements in the first text String backCorrespValue = otherAEl.element.getAttributes().getNamedItem("corresp").getNodeValue(); System.out.println("backCorrespValue = " + backCorrespValue); String[] backCorrespIds = backCorrespValue.split(" "); for (int i = 0; i < backCorrespIds.length; i++) { System.out.println("get Node backEl"); Node backEl = XmlTools.getElementByIdInNodeList(nodes[t], backCorrespIds[i]); if ( backEl != null) { System.out.println("the ... id makes sense insofar as it is an id for an alignable element in the first text."); // the .. id makes sense insofar as // it is an id for an alignable element in this text. // check further AElement backAEl = AlignmentModel.this.unaligned.get(t, backEl); if (backAEl != null) { System.out.println("it's an unaligned element all right"); // it's an unaligned element all right link.elementNumbers[t].add(backAEl.elementNumber); // ###also ... //someAligned.add(t, (Element)backAEl.element); backAEl.alignmentNumber = link.alignmentNumber; someAligned.add(t, backAEl); //someAligned.print(); } else { System.out.println("error"); // error // ######### trenger feilmelding - ikke kun et pip? Toolkit.getDefaultToolkit().beep(); System.out.println("Hit a case the program can't handle"); // ####grisete stop[0] = true; stop[1] = true; break; } } } } if (!(stop[0] && stop[1])) { // ### // ??? stop text t if no element or no corresp // check both sides to see if all the corresp''s agree //if link.consistentCorresp() { ##################################i.g.n.m. I.G.N.M. // we have made Link out of them. // put it in a new someAligned someAligned.add(link); //someAligned.print(); //if (someAligned.alignments.size() > 3) { // System.out.println("kill this process"); stop[0] = true; stop[1] = true; //} if (!someAligned.hasHoles()) { // got one or more alignments, with no holes. // pop the relevant elements out of unaligned. // we don't need their content. // we got all the data we need already. // just throw them away System.out.println("pop and throw them away 2"); for (t2=0; t2 10) { // we have done ... alignments, and someAligned is still holey. // we suspect something is wrong System.out.println("kill this process"); stop[0] = true; stop[1] = true; } System.out.println("før while. stop[0]=" + stop[0] +", stop[1]=" + stop[1]); } while(!(stop[0] && stop[1])); System.out.println("G"); //MemTest.print("Tenured Gen", ""); if (!someAligned.empty()) { // error // ######### trenger feilmelding - ikke kun et pip? //dette blir ikke bra hvis someAligned har hull!!! ######################## Toolkit.getDefaultToolkit().beep(); System.out.println("!someAligned.empty()"); System.out.println("Dodgy case??????????????"); toAlign.catch_(someAligned); someAligned = new AlignmentsEtc(); } } // scroll aligned. (waited until now because of a memory leak - ?) for (int t=0; t? // if not, it must be the element in a 1-0 or 0-1 alignment, // with a reference to an ancestor element, e.g, a

if (XmlTools.getElementByIdInNodeList(nodes[tt], ref) { // "relevant" ... wanted.add(tt, ref); } else { // ancestor ...make sure to keep the exact id?... } } // remove the picked ones from the wanted list wanted.remove(picked); // update pointer to next element to pick next[t]++; } else { // error. no more elements in this text ... } } } // ... ... } while (!(wanted.empty() || picked.empty())); if (...) { // found a set of corresponding elements. // ### or perhaps several ones // make an alignment out of them? // or 'more' them up from unaligned to to-align? // ### and do something extra if there are crossed relations? ... if (...) { // something wrong with corresp attrs ... done = true; // ### or throw exception? } // then 'align' them ... } else { done = true; } } while(...); } } getIndexOfElementByIdInNodeList(nodes[...], id); class Link { int alignmentNumber; Set[] elementNumbers; */ void less(AlignGui gui, int t) { // package access ////System.out.println("at model.less()"); //unaligned.catch_(t, toAlign.drop(t)); unaligned.catch_(t, toAlign.drop(gui, t)); // ### 2006-03-30 computeMatches(gui); // ### compute and display //ShowCompare.clear(gui); gui.compareInfoPanel.off(); gui.compareInfoPanel.repaint(); // 2006-10-03 // update aligned/total ratio in status line gui.model.setMemoryUsage(gui); // 2006-10-03 gui.model.updateAlignedTotalRatio(gui); } void more(AlignGui gui, int t) { // package access //System.out.println("model sin more(). gui = " + gui); //System.out.println("\nmodel sin more(). t = " + t); //////////MemTest.print("Heap memory", ""); //MemTest.print("Tenured Gen", ""); //toAlign.pickUp(gui, t, unaligned.pop(t)); toAlign.pickUp(t, unaligned.pop(t)); computeMatches(gui); // ### compute and display); //ShowCompare.clear(gui); gui.compareInfoPanel.off(); gui.compareInfoPanel.repaint(); // 2006-10-03 // update aligned/total ratio in status line gui.model.setMemoryUsage(gui); // 2006-10-03 gui.model.updateAlignedTotalRatio(gui); } } /* gui.statusLine.setText(""); int percentDone = 0; gui.statusLine.setProgress(percentDone); gui.statusLine.repaint_(); int numElements = nodes[t].getLength(); for (int i=0; i