#!/usr/bin/env python # -*- coding: utf-8 -*- """ This program should be called like this: ./lene.py vocabulary_file1 vocabulary_file2 final_file It results in a final_file which has this format: dahje jus háliidat vuodjit shell fiillaid, de daga dáná: bash mu_fiila.sh perl fiillaid vuoját dáná: perl perl_fiila.pl python fiillaid vuoját dáná: python python_fiila.py de ii leat dárbu chmod:et files: .actual.ti.final and the two .vcb """ import sys import getopt def file_to_dic(voc_file): voc_dic = {} for line in open(voc_file): fields = line.strip().split(" ") voc_dic[fields[1]] = fields[2] return voc_dic def final_to_lene(voc_dic1, voc_dic2, finalfile): buffert = [] for line in open(finalfile): fields = line.strip().split(" ") fields.reverse() fields[0] = float(fields[0]) try: fields.insert(2,voc_dic2[fields[1]]) except KeyError: fields.insert(2,'-1') try: fields.append(voc_dic1[fields[3]]) except KeyError: fields.append('-1') buffert.append(fields) buffert.sort() for line in buffert: line[0] = str(line[0]) print ' '.join(line) def main(): if len(sys.argv) == 1: print __doc__ sys.exit(0) # parse command line options try: opts, args = getopt.getopt(sys.argv[1:], "h", ["help"]) except getopt.error, msg: print msg print "for help use --help" sys.exit(2) # process options for o, a in opts: if o in ("-h", "--help"): print __doc__ sys.exit(0) args = sys.argv[1:] voc_dic1 = file_to_dic(args[0]) voc_dic2 = file_to_dic(args[1]) final_to_lene(voc_dic1, voc_dic2, args[2]) if __name__ == "__main__": main()