int clusterScoreMethod; Clusters(int clusterScoreMethod) { clusters = new ArrayList(); this.clusterScoreMethod = clusterScoreMethod; } // a cluster contains references to words in several text (in practice 2). // some texts may have more references than others, // and this method finds the largest number of references int getScore() { if (clusterScoreMethod == 1) { // each cluster scores exactly 1 return 1; } int high = 0; int low = Integer.MAX_VALUE; Iterator rIt; Ref ref; for (int t=0; t prevT) { if (first) { numSlashes = ref.t - prevT -1; first = false; } else { numSlashes = ref.t - prevT; } for (int i=0; i prevT) { if (first) { numSlashes = ref.t - prevT -1; first = false; } else { numSlashes = ref.t - prevT; } for (int i=0; i= hits[t].size()) { done2 = true; } } // ... current[t] += count; } } } } // ... anchorWordScore = anchorWordClusters.getScore(); // do reporting### // next line of info... //if (anchorWordScore > 0) { // ### ryddigere med samme syntaks alltid retLine = INDENT + INDENT + "Anchor word score: " + anchorWordScore; // 2006-04-05 //} else { // retLine = INDENT + "No anchor word matches. Score: 0"; //} ret.add(retLine); indentLevel = 3; ret.addAll(anchorWordClusters.getDetails(indentLevel)); // getDetails() does its own indentation and endline. ### ikke helt bra? ////////////////// // proper names // // and // // dice // ////////////////// // check all the words in one text against all the words in the other. // collect clusters of proper names. // collect clusters of dice-related words. // (usually all the words in a cluster will be related to each other, // but not necessarily.) String word1; String word2; Clusters properNameClusters = new Clusters(); Clusters diceClusters = new Clusters(); for (t=0; t= model.getDiceMinWordLength()) && (word2.length() >= model.getDiceMinWordLength())) { //System.out.println("\nskal dice-sammenlikne " + word1 + " med " + word2); if (SimilarityUtils.dice(word1, word2) >= model.getDiceMinCountingScore()) { // the words are related. // add to cluster list matchType = Match.DICE; £££ weight = 1.0f; £££ diceClusters.add(matchType, weight, t, tt, x, y, word1, word2); } } } } } } } } properNameScore = properNameClusters.getScore(); diceScore = diceClusters.getScore(); // ... retLine = INDENT + INDENT + "Proper name score: " + properNameScore; ret.add(retLine); indentLevel = 3; ret.addAll(properNameClusters.getDetails(indentLevel)); // getDetails() does its own indentation and endline. ### ikke helt bra? retLine = INDENT + INDENT + "Dice score: " + diceScore; ret.add(retLine); indentLevel = 3; ret.addAll(diceClusters.getDetails(indentLevel)); // getDetails() does its own indentation and endline. ### ikke helt bra? //////////////////////////////// // common score for anchor words, proper names and dice Clusters commonClusters = new Clusters(); commonClusters.add(anchorWordClusters); commonClusters.add(properNameClusters); commonClusters.add(diceClusters); commonScore = commonClusters.getScore(); // go back and insert the common score for the word based methods ret[wordMethodsScoreLineNumber] = ret.get(wordMethodsScoreLineNumber) + commonScore; score += commonScore; //////////////////////////////// // scoring special characters // //////////////////////////////// int scoringCharacterScore = 0; // check all the ... ... ... String char1; String char2; Clusters scoringCharacterClusters = new Clusters(); for (t=0; t scoreBefore) { retLine += " match well,"; ret.add(retLine); retLine = INDENT + "increasing score from " + scoreBefore + " to " + score; ret.add(retLine); } else if (score < scoreBefore) { retLine += " don't match well,"; ret.add(retLine); retLine = INDENT + "reducing score from " + scoreBefore + " to " + score; ret.add(retLine); } else { retLine += " match so-so,"; ret.add(retLine); retLine = INDENT + "making no change to the score " + score; ret.add(retLine); } //////////////////////////////////// // micro adjustment to break ties // 2005-11-03 //////////////////////////////////// // when otherwise scoring equal, // paths with 1-1's are to preferred // over paths with other alignments. // add (subtract) micro punishment if step is not 1-1 boolean is11 = true; for (t=0; t Alignment.NUM_FILES) { throw new Exception("Too many slashes: " + anchorWordListEntryText); // §§§ } for (int t=0; t 0) { //System.out.println("legger til frase i data-liste"); language[t].add(phrase); //System.out.println("har lagt til frase i data-liste"); } //System.out.println("ferdig med en frase"); } //System.out.println("ferdig med data for et språk"); } //System.out.println("ferdig med hele entry"); } } } /** * anchor word list. * list, with each element a AnchorWordListEntry. */ class AnchorWordList { java.util.List entries = new ArrayList(); // explicit to avoid ambiguousness AlignmentModel model; //AnchorWordList() { // // €€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€ foreløpig hardkodet € // entries.add(new AnchorWordListEntry("paragraph/avsnittet")); // entries.add(new AnchorWordListEntry("inflation/inflasjon,inflasjonen")); // entries.add(new AnchorWordListEntry("policy/pengepolitikken")); // entries.add(new AnchorWordListEntry("krone/krone,kronen,kronens,kroner")); // entries.add(new AnchorWordListEntry("euro/euro")); //} AnchorWordList(AlignmentModel model) { this.model = model; } public void load(ArrayList lines) { //System.out.println("AnchorWordList sin load()"); //System.out.println("lines.size() = " + lines.size()); // clear list //java.util.List entries = new ArrayList(); denne førte til at model sin anchorWordList likevel ikke ble satt. har ikke tenkt gjennom hvorfor entries.clear(); // load list boolean ok = true; Iterator it = lines.iterator(); while (it.hasNext()) { String line = ((String)(it.next())).trim(); if (line.length() > 0) { //System.out.println("line='"+line+"'"); try { entries.add(new AnchorWordListEntry(line)); } catch (Exception e) { System.err.println("Error in anchor word entry: " + e.getMessage()); JOptionPane.showMessageDialog( null, "Error in anchor word entry: " + e.getMessage(), "Error in anchor word entry", JOptionPane.ERROR_MESSAGE ); ok = false; break; } } //System.out.println("entries.size() = " + entries.size()); } if (!ok) { // error occurred. clear list again System.out.println("Error occurred. clear list again"); //entries = new ArrayList(); entries.clear(); // €€€ er ikke dette bedre? } } public void display(JTextArea content) { if (entries != null) { Iterator eIt = entries.iterator(); while (eIt.hasNext()) { //System.out.println("neste entry"); StringBuffer anchorWordListEntryText = new StringBuffer(""); AnchorWordListEntry anchorWordListEntry = (AnchorWordListEntry)eIt.next(); for (int t=0; t>>>>>>>>>>getAnchorWordHits<<<<<<<<<<<<"); AnchorWordHits ret = new AnchorWordHits(); // Iterator aIt = this.entries.iterator(); int anchorWordEntryCount = 0; while (aIt.hasNext()) { //System.out.println("anchorWordEntryCount = " + anchorWordEntryCount); //java.util.List synonyms = ((AnchorWordListEntry)(aIt.next())).language[t]; AnchorWordListEntry entry = (AnchorWordListEntry)(aIt.next()); java.util.List synonyms = (java.util.List)(entry.language[t]); Iterator sIt = synonyms.iterator(); while (sIt.hasNext()) { java.util.List anchorPhrase = (java.util.List)sIt.next(); //System.out.println("anchorPhrase = " + anchorPhrase); for (int w=0; w