/* * Compare.java * * ... * ... * @author Oystein Reigem */ package aksis.alignment; import java.awt.Color; import java.util.*; import java.io.*; import java.util.regex.*; import javax.swing.*; import java.awt.event.MouseEvent; import java.lang.reflect.*; import java.awt.Toolkit; // beep //java.util.regex.Pattern ///////////////////////////////////////////// // the world of the alignment algorithm(s) // ///////////////////////////////////////////// /** * information about an alignable element. * this is information that is used when comparing elements from the texts. ¤¤¤ */ class ElementInfo { // ###### skulle den hatt en referanse til selve elementet? // length of text content in characters int length = 0; // number of words int numWords = 0; // array of all the words String[] words; // list of anchor word hits, // a hit being a 2 element list, consisting of // - a reference to an entry (line) in an anchor word file, // - a copy of the matching word. // the anchor word entry reference is an Integer - a line number, starting with 0 (?) AnchorWordHits anchorWordHits = new AnchorWordHits(); // list of proper names ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ hva hvis samme navn flere ganger - m i den ene teksten og n i den andre? kan gi for mange poeng - m x n. bør vel heller gi max(m, n) poeng. List properNames = new ArrayList(); // a string of all the "scoring special characters" in the element, as many times as the occur, in original order String scoringCharacters = ""; int elementNumber; // ###nyttig eller unødvendig? 2006-04-05 /** * */ public ElementInfo() { // } /** * */ //public ElementInfo(AlignmentModel model, String text, int t) { public ElementInfo(AlignmentModel model, String text, int t, int elementNumber) { // 2006-04-05 // this.elementNumber = elementNumber; // ###nyttig eller unødvendig? 2006-04-05 length = text.length(); //System.out.println("ElementInfo constructor"); //System.out.println("text = " + text); //System.out.println("length = " + length); // ¤¤¤ foreløpig. // deler ved whitespace, og skreller noen spesialtegn av ordene // ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ blir det tomme ord også? ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ SJEKK! // words have space between them, and may be flanked by special characters //String specialCharacters = ".,;:()'" + '"'; //String specialCharactersPattern = Pattern.quote(specialCharacters); // Pattern.quote is JDK 1.5 //System.out.println("specialCharacters=" + specialCharacters); //System.out.println("specialCharactersPattern=" + specialCharactersPattern); //specialCharactersPattern = "[" + "\\s" + ".,;:()'" + '"' + "]"; // §§§§§§§§§§§§§§§§§§midlertidig //words = text.split("[\\s.,;:()]*\\s[\\s.,;:()]*"); //String specialCharacters = ".,;:?!&^(){}[]'" + '"'; String specialCharacters = model.getSpecialCharacters(); //String[] difficultCharacters = { "]", "\\", "^", "-" }; // build pattern. first a 'character class' ('grouping', 'set', i.e, []-bracketed thingie) // with all special, characters and whitespace String specialCharactersClass = "[\\s"; for (int i=0; i < specialCharacters.length(); i++) { //specialCharactersClass += "\\" + specialCharacters.substring(i, i); // ??? får ikke tak i verdi fra specialCharacters.substring(i, i). nullstreng specialCharactersClass += "\\" + String.valueOf(specialCharacters.charAt(i)); // escape all of them, to be certain that the difficult ones are escaped: [\^- } specialCharactersClass += "]"; // then a pattern to split the string into words. // it is assumed words are separated by whitespace, // possibly with special characters sticking to the ends. // the splitting will split into words // and remove the leading/trailing special characters from the words String specialCharactersPattern = specialCharactersClass + "*\\s" + specialCharactersClass + "*"; //System.out.println("specialCharactersPattern = " + specialCharactersPattern); //for (int i=0; i < specialCharacters.length(); i++) { // if (difficultCharacters. //// force metacharacters to be treated as ordinary characters by enclosing them within \Q and \E //String specialCharactersPattern = "[" + "\\s" + "\\Q" + specialCharacters + "\\E" + "]*"; //// when splitting surround the text with spaces. //// this will cause the splitting pattern to do its special character stripping work //// not just between the words, but also before and after the first and last word //String[] tempWords = (" " + text + " ").split(specialCharactersPattern + "\\s" + specialCharactersPattern); //String[] tempWords = (" " + text + " ").split("[\\s\\Q.,;:()'\\E]*\\s[\\s\\Q.,;:()'\\E]*"); // får ikke \Q...\E til å funke //String[] tempWords = (" " + text + " ").split("[\\s\\.\\,\\;\\:\\(\\)\\'\\""]*\\s[\\s\\.\\,\\;\\:\\(\\)\\'\\"'+"]*"); // dette funker String[] tempWords = (" " + text + " ").split(specialCharactersPattern); // ¤¤¤ Note that the only special characters or metacharacters inside a character class are the closing bracket (]), the backslash (\), the caret (^) and the hyphen (-). // ¤¤¤ hva med &? java docs sier && har spesiell betydning, men det er kanskje etter ]? // remove that extra element that was produced because of the leading space numWords = tempWords.length - 1; words = new String[numWords]; System.arraycopy(tempWords, 1, words, 0, numWords); tempWords = null; //System.out.print("# = " + numWords + ":"); for (int w=0; w>> ElementInfo constructor. anchorWordHits = " + anchorWordHits); // ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ getProperNames() hører vel ikke hjemme i AnchorWordList, men hvor skal jeg ha den? properNames = model.anchorWordList.getProperNames(words); // ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ tilsvarende scoringCharacters = model.anchorWordList.getScoringCharacters(text); //System.out.println("ElementInfo constructor. scoringCharacters = " + scoringCharacters); } // ### for debuggingsformål public String toString() { StringBuffer ret = new StringBuffer(); ret.append("# chars = " + length); ret.append("; "); ret.append("# words = " + numWords); ret.append("; "); ret.append("words = {"); for (int i=0; i < words.length; i++) { if (i > 0) { ret.append(", "); } ret.append(words[i]); } ret.append("}; "); ret.append("anchor word hits = " + anchorWordHits); ret.append("; "); ret.append("proper names = " + properNames); return new String(ret); } } /** * a list with information about alignable elements in one text. * doesn't span the whole text, * just a suitable range, * starting with the first element not yet aligned. * belongs to a Compare object */ class ElementsInfo { // which interval is stored // - first and last elementNumber, ikke sant?### int first = 0; int last = -1; // list of ElementInfo objects private List elementInfo = new ArrayList(); public ElementsInfo() { // } public void purge() { first = 0; last = -1; elementInfo.clear(); } //public ElementInfo getElementInfo(AlignmentModel model, int elementNumber, int t) { public ElementInfo getElementInfo(AlignmentModel model, int elementNumber, int t) throws EndOfTextException { //System.out.println("\ngetElementInfo. t = " + t + ", elementNumber = " + elementNumber); //System.out.println("getElementInfo. first = " + first + ", last = " + last); if (elementNumber < first) { // wanted element is outside range // expand range. //System.out.println("wanted element is outside range. expand range"); setFirst(model, elementNumber, t); ////setFirst(model, elementNumber, t, elementNumber); // 2006-04-05 } else if (elementNumber > last) { // wanted element is outside range - too high. // expand range. //System.out.println("wanted element is outside range - too high. expand range"); try { setLast(model, elementNumber, t); ////setLast(model, elementNumber, t, elementNumber); // 2006-04-05 } catch (EndOfTextException e) { throw e; } } //System.out.println("first = " + first + ", last = " + last); // debug ElementInfo temp = (ElementInfo)elementInfo.get(elementNumber - first); //System.out.println("getElementInfo. " + temp + "\n"); // end debug return (ElementInfo)elementInfo.get(elementNumber - first); } /* public setElementInfo(int elementNumber, int t) { ... element = ...[t]...; String text = XmlTools.getText(element); ### heller bruke .getTextContent() elementInfo.set... } */ //public int getFirst(AlignmentModel model, int t) { public int getFirst() { return first; } /** * change range - set a new start point * update content accordingly. */ public void setFirst(AlignmentModel model, int newFirst, int t) { ////public void setFirst(AlignmentModel model, int newFirst, int t, int elementNumber) { // 2006-04-05 //System.out.println("enter setFirst(). t = " + t + ", first = " + first + ", last = " + last + ", newFirst = " + newFirst); if (newFirst < first) { //System.out.println("setFirst(). tilfelle 1"); List more = new ArrayList(); for (int count = 0; count < first - newFirst; count++) { /* //Object element = model.unaligned.elements[t].get(newFirst + count); // feil x 2 ###################### // newFirst + count is absolute index. // calculate index relative unaligned. // ###### griseri. trenger metoder aligned.size(t) og toAlign.size(t) int globalIndex = newFirst + count; int numAligned = model.aligned.elements[t].size(); int numToAlign = model.toAlign.elements[t].getSize(); if (globalIndex < numAligned) { // get from aligned int index = globalIndex; Object element = ... } else if (globalIndex < numAligned + numToAlign) { // get from to-align int index = globalIndex - numAligned; Object element = ((AElement)(model.toAligned.elements[t].get(index))).element; } else { // get from unaligned int index = globalIndex - (numAligned + numToAlign); Object element = ((AElement)(model.unaligned.elements[t].get(index))).element; } // ### nei, f#¤%& - kan den tas fra dom? §§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§ */ //String text = XmlTools.getText((Node)element); ### heller bruke .getTextContent() int index = newFirst + count; //String text = XmlTools.getText(model.nodes[t].item(index)); String text = model.nodes[t].item(index).getTextContent(); /* skal dette aktiveres her også? // 2006-9-15 // ###replace all " by ' because of a bug in LineBreakMeasurer System.out.println("2"); Pattern pattern = Pattern.compile("\""); Matcher matcher = pattern.matcher(text); text = matcher.replaceAll("'"); // end 2006-9-15 */ //more.add(new ElementInfo(model, text, t)); more.add(new ElementInfo(model, text, t, index)); // index = elementNumber. ???? 20056-04-05 } elementInfo.addAll(0, more); first = newFirst; } else if (newFirst > last) { //System.out.println("setFirst(). tilfelle 2"); elementInfo.clear(); first = newFirst; int husk = last; last = first - 1; //System.out.println("setFirst endret last fra " + husk + " til " + last); } else { //System.out.println("setFirst(). tilfelle 3"); for (int count = 0; count < newFirst - first; count++) { //elementInfo.remove(first); ### ugh elementInfo.remove(0); } first = newFirst; } //System.out.println("end setFirst(). t = " + t + ", first = " + first + ", last = " + last + ", newFirst = " + newFirst); //System.out.println("end setFirst(). ElementsInfo = " + ElementsInfo.this); } /** * change range - set a new end point * update content accordingly. */ //public void setLast(AlignmentModel model, int newLast, int t) { public void setLast(AlignmentModel model, int newLast, int t) throws EndOfTextException { ////public void setLast(AlignmentModel model, int newLast, int t, int elementNumber) throws EndOfTextException { // 2006-04-05 //System.out.println("enter setLast(). t = " + t + ", first = " + first + ", last = " + last + ", newLast = " + newLast); if (newLast > last) { //System.out.println("setLast(). tilfelle 1"); for (int count = 0; count < newLast - last; count++) { /* //Object element = ((AElement)(model.unaligned.elements[t].get(last + 1 + count))).element; // ###################### // last + 1 + count is absolute index. // calculate index relative unaligned. // ###### griseri. trenger metoder aligned.size(t) og toAlign.size(t) System.out.println("# aligned = " + model.aligned.elements[t].size()); System.out.println("# to align = " + model.toAlign.elements[t].getSize()); System.out.println("vil ha el nr " + (last + 1 + count) + " globalt"); int index = last + 1 + count - (model.aligned.elements[t].size() + model.toAlign.elements[t].getSize()); System.out.println("vil ha el nr " + index + " i unaligned"); Object element = ((AElement)(model.unaligned.elements[t].get(index))).element; */ //String text = XmlTools.getText((Node)element); ### heller bruke .getTextContent() int index = last + 1 + count; if (index >= model.nodes[t].getLength()) { last = index - 1 - count; //System.out.println("setter last = " + last + " (sjekk at verdien er riktig!), og throw'er en EndOfTextException"); throw new EndOfTextException(); } //String text = XmlTools.getText(model.nodes[t].item(index)); String text = model.nodes[t].item(index).getTextContent(); /* skal dette aktiveres her også? // 2006-9-15 // ###replace all " by ' because of a bug in LineBreakMeasurer System.out.println("1"); Pattern pattern = Pattern.compile("\""); Matcher matcher = pattern.matcher(text); text = matcher.replaceAll("'"); // end 2006-9-15 */ //elementInfo.add(new ElementInfo(model, text, t)); elementInfo.add(new ElementInfo(model, text, t, index)); // index = elementNumber. ???? 2006-04-05 } last = newLast; } else if (newLast < first) { //System.out.println("setLast(). tilfelle 2"); elementInfo.clear(); last = first - 1; } else { //System.out.println("setLast(). tilfelle 3"); for (int count = 0; count < last - newLast; count++) { //elementInfo.remove(last - count); ### ugh elementInfo.remove(last - first - count); } last = newLast; } //System.out.println("end setLast(). t = " + t + ", first = " + first + ", last = " + last + ", newLast = " + newLast); //System.out.println("end setLast(). ElementsInfo = " + ElementsInfo.this); } // ### for debuggingsformål public String toString() { StringBuffer ret = new StringBuffer(); ret.append("[\n"); Iterator it = elementInfo.iterator(); while (it.hasNext()) { ElementInfo e = (ElementInfo)it.next(); ret.append("" + e + "\n"); } ret.append("]\n"); return new String(ret); } } /** * scores from comparing a pair ¤¤¤ of elements from different texts. */ /* class CompareCell { //// score from comparison of length of text content measured in characters //float lengthScore = 0; //float oppositeLengthScore = 0; // debug //// score from comparison of length of text content measured in number of words //int numWordsScore = 0; // length of text content in characters int[] length = new int[Alignment.NUM_FILES]; // number of words int[] numWords = new int[Alignment.NUM_FILES]; // score from comparison of anchor words int anchorWordScore = 0; // score from comparison of proper names int properNameScore = 0; // dice equality score int diceScore = 0; // best score so far for path leading to this position in the compare matrix float bestPathScore = (float)(Integer.MIN_VALUE); // ##### public CompareCell() { } public CompareCell(AlignmentModel model, ElementInfo info1, ElementInfo info2) { // Iterator it1; Iterator it2; int i1; int i2; // compare lengths measured in characters // ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ diktet opp selv //System.out.println("compare lengths measured in characters"); //System.out.println("info1.length = " + info1.length + " info2.length = " + info2.length); //lengthScore = (float)(1.0 / (((double)Math.abs(info1.length - info2.length) / (double)Math.min(info1.length, info2.length)) + 1.0)); //System.out.println("lengthScore = " + lengthScore); //oppositeLengthScore = (float)(1.0 / (((double)Math.abs(info2.length - info1.length) / (double)Math.min(info2.length, info1.length)) + 1.0)); //System.out.println("oppositeLengthScore = " + oppositeLengthScore); // compare lengths measured in number of words //System.out.println("compare lengths measured in number of words - dummy version"); //numWordsScore = 0; // ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ foreløpig???? // set ... length[0] = info1.length; length[1] = info2.length; numWords[0] = info1.numWords; numWords[1] = info2.numWords; // compare anchor words §§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§ //System.out.println("compare anchor words"); anchorWordScore = 0; it1 = info1.anchorWordHits.hits.iterator(); while (it1.hasNext()) { Integer anchorWordHit1 = ((AnchorWordHit)it1.next()).getIndex(); // ### dårlig navn. kanskje elementene i ...hits skal kalles ...hit. og ...hit være kort liste bestående av referanse (Integer) til ankerordslisten, samt kopi av ordet (String) it2 = info2.anchorWordHits.hits.iterator(); while (it2.hasNext()) { Integer anchorWordHit2 = ((AnchorWordHit)it2.next()).getIndex(); //if (anchorWordHit1 == anchorWordHit2) { if (anchorWordHit1.equals(anchorWordHit2)) { anchorWordScore++; //System.out.println( // "match " // + ((AnchorWordList)(model.anchorWordList)).getEntry(anchorWordHit1.intValue()) // + " <-> " // + ((AnchorWordList)(model.anchorWordList)).getEntry(anchorWordHit2.intValue()) //); //System.out.println("> > > > anchorWordScore øker til " + anchorWordScore); } } } // compare proper names //System.out.println("compare proper names"); properNameScore = 0; it1 = info1.properNames.iterator(); while (it1.hasNext()) { Object properName1 = it1.next(); it2 = info2.properNames.iterator(); while (it2.hasNext()) { Object properName2 = it2.next(); //if (properName1 == properName2) { if (properName1.equals(properName2)) { properNameScore++; // ### feil å bare summere? } } } // compute dice equality ### // ### dette er sikkert feil. skal vi telle opp de ord-par som er tilstrekkelig like? // // ... // // // DICE: Double the number of shared character bigrams // and divide by total number of bigrams in each string // Example: reagir and repair have bigram sets // {re,ea,ag,gi,ir} and {re,ep,pa,ai,ir}, respectively, // and shared bigrams are {re,ir}. // Similarity score = (2 * 2)/(5+5) = 2/5 = 0.40 // //System.out.println("compute dice equality"); float diceScore1; // dice score for one single word pair. member diceScore is total score. each word pair that scores at least 0.7 contributes with 1 to the total for (i1 = 0; i1 < info1.words.length; i1++) { String word1 = info1.words[i1]; // ### ha valgmulighet her? if (true) { word1.toLowerCase(); } for (i2 = 0; i2 < info2.words.length; i2++) { String word2 = info2.words[i2]; // ### ha valgmulighet her? if (true) { word2.toLowerCase(); } // diceScore1 = SimilarityUtils.dice(word1, word2); //if (word1.equals(word2)) { // System.out.println("diceScore1 av " + word1 + " og " + word2 + " = " + diceScore1); //} if (diceScore1 >= 0.7) { diceScore += 1; } } } //if (word1.substring(i1, i1+2).equalsIgnoreCase(word2.substring(i2, i2+2)) { // diceScore++; //} } */ /** * scores from comparing elements from different texts, * either ... * or ... . * ### CompareCells dårlig navn. cell dårlig navn. eller?? * belongs to a CompareMatrix object */ class CompareCells { /* // length of text content in characters int[] length = new int[Alignment.NUM_FILES]; // number of words int[] numWords = new int[Alignment.NUM_FILES]; // score from comparison of anchor words int anchorWordScore = 0; // score from comparison of proper names int properNameScore = 0; // dice equality score int diceScore = 0; */ ElementInfoToBeCompared elementInfoToBeCompared; // textual representation of match info String matchInfoDisplayable; //// score - see elementInfoToBeCompared //int score; //float bestPathScore = (float)(Integer.MIN_VALUE); // ##### // best score so far for path leading to this position in the compare matrix // (i.e, the last position of the cell; the cell may span more than one element in each direction) // resides in a diffent map. //// this is the key to the other map //String bestPathScoreKey; // this is a reference to an element in the other map BestPathScore bestPathScore; /* public CompareCells() { } */ /* public CompareCell(AlignmentModel model, ElementInfo info1, ElementInfo info2) { // Iterator it1; Iterator it2; int i1; int i2; // compare lengths measured in characters // ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ diktet opp selv //System.out.println("compare lengths measured in characters"); //System.out.println("info1.length = " + info1.length + " info2.length = " + info2.length); //lengthScore = (float)(1.0 / (((double)Math.abs(info1.length - info2.length) / (double)Math.min(info1.length, info2.length)) + 1.0)); //System.out.println("lengthScore = " + lengthScore); //oppositeLengthScore = (float)(1.0 / (((double)Math.abs(info2.length - info1.length) / (double)Math.min(info2.length, info1.length)) + 1.0)); //System.out.println("oppositeLengthScore = " + oppositeLengthScore); // compare lengths measured in number of words //System.out.println("compare lengths measured in number of words - dummy version"); //numWordsScore = 0; // ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ foreløpig???? // set ... length[0] = info1.length; length[1] = info2.length; numWords[0] = info1.numWords; numWords[1] = info2.numWords; // compare anchor words §§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§ //System.out.println("compare anchor words"); anchorWordScore = 0; it1 = info1.anchorWordHits.hits.iterator(); while (it1.hasNext()) { Integer anchorWordHit1 = ((AnchorWordHit)it1.next()).getIndex(); // ### dårlig navn. kanskje elementene i ...hits skal kalles ...hit. og ...hit være kort liste bestående av referanse (Integer) til ankerordslisten, samt kopi av ordet (String) it2 = info2.anchorWordHits.hits.iterator(); while (it2.hasNext()) { Integer anchorWordHit2 = ((AnchorWordHit)it2.next()).getIndex(); //if (anchorWordHit1 == anchorWordHit2) { if (anchorWordHit1.equals(anchorWordHit2)) { anchorWordScore++; //System.out.println( // "match " // + ((AnchorWordList)(model.anchorWordList)).getEntry(anchorWordHit1.intValue()) // + " <-> " // + ((AnchorWordList)(model.anchorWordList)).getEntry(anchorWordHit2.intValue()) //); //System.out.println("> > > > anchorWordScore øker til " + anchorWordScore); } } } // compare proper names //System.out.println("compare proper names"); properNameScore = 0; it1 = info1.properNames.iterator(); while (it1.hasNext()) { Object properName1 = it1.next(); it2 = info2.properNames.iterator(); while (it2.hasNext()) { Object properName2 = it2.next(); //if (properName1 == properName2) { if (properName1.equals(properName2)) { properNameScore++; // ### feil å bare summere? } } } // compute dice equality ### // ### dette er sikkert feil. skal vi telle opp de ord-par som er tilstrekkelig like? // // ... // // // DICE: Double the number of shared character bigrams // and divide by total number of bigrams in each string // Example: reagir and repair have bigram sets // {re,ea,ag,gi,ir} and {re,ep,pa,ai,ir}, respectively, // and shared bigrams are {re,ir}. // Similarity score = (2 * 2)/(5+5) = 2/5 = 0.40 // //System.out.println("compute dice equality"); float diceScore1; // dice score for one single word pair. member diceScore is total score. each word pair that scores at least 0.7 contributes with 1 to the total for (i1 = 0; i1 < info1.words.length; i1++) { String word1 = info1.words[i1]; // ### ha valgmulighet her? if (true) { word1.toLowerCase(); } for (i2 = 0; i2 < info2.words.length; i2++) { String word2 = info2.words[i2]; // ### ha valgmulighet her? if (true) { word2.toLowerCase(); } // diceScore1 = SimilarityUtils.dice(word1, word2); //if (word1.equals(word2)) { // System.out.println("diceScore1 av " + word1 + " og " + word2 + " = " + diceScore1); //} if (diceScore1 >= 0.7) { diceScore += 1; } } } //if (word1.substring(i1, i1+2).equalsIgnoreCase(word2.substring(i2, i2+2)) { // diceScore++; //} } */ // ### new version 2005-06-30 for comparison of ###steps and not just single cells. // ### CompareCell not a good name // ### CompareCells better? //public CompareCells(AlignmentModel model, int[] position, PathStep step) { public CompareCells(AlignmentModel model, int[] position, PathStep step) throws EndOfAllTextsException, EndOfTextException { //System.out.println("CompareCells constructor. position=" + position[0] + "," + position[1] + " , step=" + step); //bestPathScore = new BestPathScore(); elementInfoToBeCompared = new ElementInfoToBeCompared(model); // loop through all texts and collect info for comparison int textEndCount = 0; for (int t=0; t 0) { // System.out.println("CompareCells constructor. textEndCount = " + textEndCount); //} if (textEndCount >= Alignment.NUM_FILES) { //System.out.println("CompareCells constructor throws EndOfAllTextsException"); throw new EndOfAllTextsException(); } else if (textEndCount > 0) { //System.out.println("CompareCells constructor throws EndOfTextException"); throw new EndOfTextException(); } // actual comparison done later // ### no - done now //System.out.println("CompareCells constructor. på slutten. kaller elementInfoToBeCompared.getScore()"); elementInfoToBeCompared.getScore(); //System.out.println("CompareCells constructor. på slutten. etter kall av elementInfoToBeCompared.getScore()"); } //public int getScore() { public float getScore() { return elementInfoToBeCompared.getScore(); } // §§§ for debugging public String toString() { // //System.out.println("CompareCells' toString"); ////return "CompareCells' toString. score=" + score + ", best path score=" + bestPathScore.getScore(); return "CompareCells' toString. score=" + elementInfoToBeCompared.getScore() + ", best path score=" + bestPathScore.getScore(); } } class BestPathScore { private float score; public BestPathScore() { //score = Float.MIN_VALUE; //score = -1.0f; // 2005-08-23. varianten over var sikkert også ok, men hadde stått utkommentert lenge. uten disse fikk jeg initiell verdi 0.0, som nye stier med skåre 0.0 tapte mot score = AlignmentModel.BEST_PATH_SCORE_NOT_CALCULATED; // 2006-09-20 } public BestPathScore(float score) { //score = Float.MIN_VALUE; this.score = score; } public float getScore() { return score; } public String toString() { return "" + score; } } /** * a matrix of CompareCells cells. * a cell represents the comparison of elements from the various texts. * there can be more then one element for each text, e.g, a 2-1 comparison. * so the matrix is not just a 2-dimensional array. * implemented as a Map, * with cells not calculated until they are needed, * and with garbage collection. * belongs to a Compare object */ class CompareMatrix { //... Map cells = new HashMap(); // map of CompareCells objects //Map cells = Collections.synchronizedMap(new HashMap()); Map bestPathScores = new HashMap(); // map of BestPathScore objects public CompareMatrix() { } public void purge() { cells.clear(); bestPathScores.clear(); } void garbageCollect(int[] ix) { //System.out.println("CompareMatrix sin garbageCollect()"); int currIx; Iterator it; String[] temp; String key; List keysToRemove; // ... it = cells.keySet().iterator(); keysToRemove = new ArrayList(); while (it.hasNext()) { //System.out.println("CompareMatrix sin garbageCollect(). neste cells"); key = (String)it.next(); //System.out.println("CompareMatrix sin garbageCollect(). key = " + key); temp = key.split(","); for (int t=0; t(); while (it.hasNext()) { //System.out.println("CompareMatrix sin garbageCollect(). neste bestPathScores"); key = (String)it.next(); temp = key.split(","); for (int t=0; t0) { key11 += ","; } key11 += Integer.toString(position[t]); } key11 = key11 + "," + key11; //System.out.println("CompareMatrix sin getScore(). key11=" + key11); */ boolean outside = false; for (int t=0; t0) { bestPathScoreKey += ","; } bestPathScoreKey += position[t]; } //System.out.println("getScore() kalt for key=" + bestPathScoreKey); // if (bestPathScores.get(bestPathScoreKey) == null) { /* // ###cell does not exist. %¤#%¤#%¤#%¤#create cell //System.out.println("cell does not exist. create cell. key=" + key); CompareCells compareCells = model.compare.getCellValues(model, position[0], position[1]); return compareCells.bestPathScore; */ //throw new MyException("Noe gikk feil!!!!"); // §§§§§§§§§§§§§§§§§§§§§§§ Toolkit.getDefaultToolkit().beep(); // §§§§§§§§§§§§§§§§§§§§§§§ Toolkit.getDefaultToolkit().beep(); // §§§§§§§§§§§§§§§§§§§§§§§ Toolkit.getDefaultToolkit().beep(); // §§§§§§§§§§§§§§§§§§§§§§§ Toolkit.getDefaultToolkit().beep(); // §§§§§§§§§§§§§§§§§§§§§§§ Toolkit.getDefaultToolkit().beep(); // §§§§§§§§§§§§§§§§§§§§§§§ //System.out.println("cell doesn't exist. BEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEP CompareMatrix getScore"); System.out.println("Program error? Cell doesn't exist. Position = " + position[0] + "," + position[1]); //return 0; // §§§§§§§§§§§§§§§§§§§§§§§ //return -1.0f; // a value less than real scores. smallest possible real score is 0. 2005-08-22 return AlignmentModel.BEST_PATH_SCORE_BAD; // 2006-09-20 } else { //System.out.println("cell exists. bestPathScoreKey=" + bestPathScoreKey); //System.out.println("skåre = " + ((BestPathScore)bestPathScores.get(bestPathScoreKey)).getScore()); return ((BestPathScore)bestPathScores.get(bestPathScoreKey)).getScore(); } } } void setScore(int[] position, float score) { String bestPathScoreKey = ""; for (int t=0; t0) { bestPathScoreKey += ","; } bestPathScoreKey += Integer.toString(position[t]); } //System.out.println("setScore() setter best path score=" + score + " for key=" + bestPathScoreKey); bestPathScores.put(bestPathScoreKey, new BestPathScore(score)); /* // must set score in all cells ending in this position. ### neida. alle relevante celler peker jo hit med sin bestPathScore // §§§§§§§§§§§§§§§§§§§§§§§§§§§ dårlig programmering String keyEnd = "," + bestPathScoreKey; Iterator it = cells.keySet().iterator(); while (it.hasNext()) { String key = (String)it.next(); if (key.substring(key.length() - keyEnd.length(), key.length()).equals(keyEnd)) { // match ((CompareCells)(cells.get(key))).bestPathScoreKey = bestPathScoreKey; } } */ //System.out.println("setScore() setter member bestPathScore. score=" + score + ", key=" + key); return; } void resetBestPathScores() { Iterator it = bestPathScores.keySet().iterator(); while (it.hasNext()) { //bestPathScores.put(it.next(), new BestPathScore(-1.0f)); // ##### burde hatt konstanter for disse to stedene hvor jeg bruker (float)-1? bruker det både for cells-greie og best path bestPathScores.put(it.next(), new BestPathScore(AlignmentModel.BEST_PATH_SCORE_NOT_CALCULATED)); // 2006-09-20 } } // §§§ for debugging public String toString() { String ret = ""; String key; Iterator it1 = cells.keySet().iterator(); while (it1.hasNext()) { key = (String)it1.next(); //System.out.println("cells. key=" + key); ret += "(" + key + " : "; ret += (CompareCells)cells.get(key); ret += ")\n"; } Iterator it2 = bestPathScores.keySet().iterator(); while (it2.hasNext()) { key = (String)it2.next(); //System.out.println("bestPathScores. key=" + key); ret += "(" + key + " : "; ret += (BestPathScore)bestPathScores.get(key); ret += ")\n"; } return ret; } } /** * ... */ class Compare { // info about elements in each text ElementsInfo[] elementsInfo = new ElementsInfo[Alignment.NUM_FILES]; // info about how elements combine CompareMatrix matrix = new CompareMatrix(); // list of the various possible steps (e.g, 1-1, 0-1, 1-0, 1-2, etc) List stepList = new ArrayList(); Compare() { //System.out.println("Compare constructor"); for (int t=0; t>>>>x = " + x + "y = " + y + "key = " + key); if (!matrix.cells.containsKey(key)) { //System.out.println("matrix doesn't contain key"); // ¤¤¤ heller skjule bruken av first? en get(x)-metode //¤¤¤matrix.cells.put(key, new CompareCell(model, elementsInfo[0].elementInfo.get(x - elementsInfo[0].first).getElementInfo(model, x, 0), elementsInfo[1].elementInfo.get(y - elementsInfo[1].first).getElementInfo(model, y, 1))); //System.out.println("gjør test 1"); //Object test1 = elementsInfo[0]; //System.out.println("elementsInfo[0] = " + elementsInfo[0]); //System.out.println("gjør test m"); //Object testm = model; //System.out.println("gjør test 2"); //Object test2 = elementsInfo[0].getElementInfo(model, x, 0); //System.out.println("gjør test 3"); //Object test3 = elementsInfo[1]; //System.out.println("gjør test 4"); //Object test4 = elementsInfo[1].getElementInfo(model, y, 1); //System.out.println("gjør put"); if (x < 0 || y < 0) { //System.out.println("(1) make matrix cell for element " + x + " against element " + y); matrix.cells.put(key, new CompareCell()); } else { System.out.println("(2) make matrix cell for element " + x + " against element " + y); matrix.cells.put(key, new CompareCell(model, elementsInfo[0].getElementInfo(model, x, 0), elementsInfo[1].getElementInfo(model, y, 1))); } } return (CompareCell)matrix.cells.get(key); } */ // ### new version 2005-06-30 for comparison of ###steps and not just single cells // ### CompareCells getCellValues not good names //public CompareCells getCellValues(AlignmentModel model, int[] position, PathStep step) { public CompareCells getCellValues(AlignmentModel model, int[] position, PathStep step) throws EndOfAllTextsException, EndOfTextException { // ### position er vel siste pos i sti så langt (XXX) // da starter den nye cellen etter denne pos: // // XXX // her String key = ""; String bestPathScoreKey = ""; // cell start position for (int t=0; t0) { key += ","; } key += Integer.toString(position[t] + 1); } // ... key += ","; // cell end position for (int t=0; t0) { key += ","; bestPathScoreKey += ","; } key += Integer.toString(position[t] + 1 + step.increment[t] - 1); bestPathScoreKey += Integer.toString(position[t] + 1 + step.increment[t] - 1); } //int x, y, xInc, yInc; //x = position[0]; //System.out.println("getCellValues method new version"); //// key format: start x , start y , end x , end y //String key = Integer.toString(x) + "," + Integer.toString(y) + "," + Integer.toString(x+xInc-1) + "," + Integer.toString(y+yInc-1); //System.out.println("getCellValues. key = " + key); if (!matrix.cells.containsKey(key)) { //System.out.println("!matrix.cells.containsKey(key)"); //if (position[0] < 0 || position[1] < 0) { // System.out.println("(1) make matrix cell for elements " + key); // matrix.cells.put(key, new CompareCells()); //} else { //¤//System.out.println("(2) make matrix cell for elements " + key); // make a new cells thing ### // ### with a bestPathScore reference not yet set try { matrix.cells.put(key, new CompareCells(model, position, step)); } catch (EndOfAllTextsException e) { //System.out.println("getCellValues() throws EndOfAllTextsException"); throw e; // ¤¤¤ er dette måten ...? } catch (EndOfTextException e) { //System.out.println("getCellValues() throws EndOfTextException"); throw e; // ¤¤¤ er dette måten ...? } if (matrix.bestPathScores.containsKey(bestPathScoreKey)) { // there is a best score for this end position. // use it //((CompareCells)matrix.cells.get(key)).bestPathScore = (BestPathScore)matrix.bestPathScores.get(bestPathScoreKey); // (((this version just to see where the ClassCastException occurs))) BestPathScore temp = (BestPathScore)matrix.bestPathScores.get(bestPathScoreKey); ((CompareCells)matrix.cells.get(key)).bestPathScore = temp; } else { // there is no best score for this end position. // set very low value ### no not that ### uh ((CompareCells)matrix.cells.get(key)).bestPathScore = new BestPathScore(); } // put the best score object in the best score map matrix.bestPathScores.put(bestPathScoreKey, ((CompareCells)matrix.cells.get(key)).bestPathScore); // #### //System.out.println("satt inn ny celle. matrix er nå\n" + matrix); //} } return (CompareCells)matrix.cells.get(key); } /* public CompareCell getCellValues(AlignmentModel model, int[] ix) { String key = ""; for (int t=0; t0) { key += ","; } key += Integer.toString(ix[t]); } ... } */ void createStepList() { // //System.out.println("createStepList"); int range = Alignment.MAX_NUM_TRY - Alignment.MIN_NUM_TRY + 1; int limit = 1; for (int j=0; j" + Integer.toString(limit + i, range)); String combString = Integer.toString(limit + i, range).substring(1,Alignment.NUM_FILES+1); int minimum = Alignment.MAX_NUM_TRY + 1; int maximum = Alignment.MIN_NUM_TRY - 1; int total = 0; for (int t=0; t"); //System.out.println(""); //System.out.println(""); for (int x=0; x<10; x++) { //System.out.println("" + x + ""); } //System.out.println(""); for (int y=0; y<10; y++) { //System.out.println(""); //System.out.print("" + y + ""); for (int x=0; x<10; x++) { CompareCell compareCell = model.compare.getCellValues(model, y, x); //System.out.print(""); //System.out.print("a=" + compareCell.anchorWordScore); //System.out.print("
"); //System.out.print("p=" + compareCell.properNameScore); //System.out.print("
"); ////System.out.print("n=" + compareCell.numWordsScore); ////System.out.print("
"); ////System.out.print("l=" + (Float.toString(compareCell.lengthScore)).substring(0,3)); ////System.out.print("
"); //System.out.print("l1=" + compareCell.length[0]); //System.out.print("
"); //System.out.print("l2=" + compareCell.length[1]); //System.out.print("
"); //System.out.print("d=" + compareCell.diceScore); //System.out.print(""); } //System.out.println(""); } //System.out.println(""); */ /* int a = 15; int b = 3; float test1 = (float)(1.0 / (((double)Math.abs(a - b) / (double)Math.min(a, b)) + 1.0)); System.out.println("test1 = " + test1); float test2 = (float)(1.0 / (((double)Math.abs(b - a) / (double)Math.min(b, a)) + 1.0)); System.out.println("test2 = " + test2); */ /*}*/ // §§§ for debugging public String toString() { return "\n*************\nCompare sin matrix:\n" + this.matrix.toString() + "\n"; } } /////////////////////////////////////////////