#!/usr/bin/env python # -*- coding: utf-8 -*- # # This program tests the max and min values of tca2 applied to the tmx goldstandard files # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with program. If not, see . # # Copyright 2011 Børre Gaup # # test tca2 parameters min and max values import os import sys import shutil import subprocess sys.path.append(os.environ['GTHOME'] + '/gt/script/langTools') import parallelize def main(): # Set the name of the file to write the test to paragstestfile = os.path.join(os.environ['GTHOME'], 'techdoc/tools/tca2_testruns.paragstesting.xml') # First a run with the default values compile_tca() # Initialize an instance of a tmx test data writer tester = parallelize.TmxGoldstandardTester(paragstestfile, '_Default_values') # run the test tester.runTest() minDiffLines = tester.getNumberOfDiffLines() # copy the original file to a backup file shutil.copy(os.path.join(os.environ['GTHOME'], 'tools/alignment-tools/tca2/aksis/alignment/Alignment.java'), os.path.join(os.environ['GTHOME'], 'tools/alignment-tools/tca2/aksis/alignment/Alignment.java.tcatest')) winners = findWinners(paragstestfile, minDiffLines) testWinners(paragstestfile, winners) return 0 def testWinners(paragstestfile, winners): """ Do a test run with the winners """ # Set the constants found in winners for constant, value in winners.iteritems(): set_value(constant, value) compile_tca() # Initialize an instance of a tmx test data writer tester = parallelize.TmxGoldstandardTester(paragstestfile, '_Winner_values') # run the test tester.runTest() # reset to the orig file after each run shutil.copy(os.path.join(os.environ['GTHOME'], 'tools/alignment-tools/tca2/aksis/alignment/Alignment.java.tcatest'), os.path.join(os.environ['GTHOME'], 'tools/alignment-tools/tca2/aksis/alignment/Alignment.java')) def findWinners(paragstestfile, defaultWinner): """ Find those combinations of constant, value that gives better results than the default setup. Collect them in a winners dictionary, and return that """ winners = {} # A dictionary of constants default = {} default['DEFAULT__ANCHORWORD_MATCH_WEIGHT'] = ['0.5', '3.0'] default['DEFAULT__ANCHORPHRASE_MATCH_WEIGHT'] = ['0.5', '3.0'] default['DEFAULT__PROPERNAME_MATCH_WEIGHT'] = ['0.5', '3.0'] default['DEFAULT__DICE_MATCH_WEIGHT'] = ['0.5', '3.0'] default['DEFAULT__DICEPHRASE_MATCH_WEIGHT'] = ['0.5', '3.0'] default['DEFAULT__NUMBER_MATCH_WEIGHT'] = ['0.5', '3.0'] default['DEFAULT__SCORINGCHARACTER_MATCH_WEIGHT'] = ['0.5', '3.0'] # Then for each constant, change them for constant, values in default.iteritems(): winner = defaultWinner print "testing", constant for value in values: print value # setvalue set_value(constant, value) compile_tca() # Initialize an instance of a tmx test data writer tester = parallelize.TmxGoldstandardTester(paragstestfile, '_' + constant + '_' + value) # run the test tester.runTest() # Find out if this combination of constant and value gives a better # result than the default setting. If it is, add it to the winners # dictionary if winner > tester.getNumberOfDiffLines(): winner = tester.getNumberOfDiffLines() winners[constant] = value # reset to the orig file after each run shutil.copy(os.path.join(os.environ['GTHOME'], 'tools/alignment-tools/tca2/aksis/alignment/Alignment.java.tcatest'), os.path.join(os.environ['GTHOME'], 'tools/alignment-tools/tca2/aksis/alignment/Alignment.java')) return winners def set_value(constant, value): """ Replace the line containing constant with value """ import fileinput for line in fileinput.FileInput(os.path.join(os.environ['GTHOME'], 'tools/alignment-tools/tca2/aksis/alignment/Alignment.java'), inplace = 1): if line.find(constant) > 0: line = line[:line.find(constant) + len(constant)] line = line + " = " + value + "f;\n" print line[:-1] def compile_tca(): """ Compile tca2 """ os.chdir(os.path.join(os.environ['GTHOME'], 'tools/alignment-tools/tca2')) subp = subprocess.Popen(['ant'], stdout = subprocess.PIPE, stderr = subprocess.PIPE) (output, error) = subp.communicate() if subp.returncode != 0: print >>sys.stderr, 'Could not compile tca2' print >>sys.stderr, output print >>sys.stderr, error sys.exit(1) if __name__ == "__main__": main()