#!/usr/bin/env python # -*- coding: utf-8 -*- # # Script that prints out lemmas in # $GTHOME/langs/sma/src/morphology/stems/sm[x]-propernouns.lexc that are found # in both $GTHOME/langs/sma/src/morphology/stems/sm[x]-propernouns.lexc and # $GTHOME//gtcore/templates/smi/src/morphology/stems/smi-propernouns.lexc # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this file. If not, see . # # Copyright 2013 Børre Gaup # import os import re import fileinput import sys smiwords = [] possiblesmxduplicates = [] myre = re.compile(r" .*;") def addSmxLines(line): if not (line.startswith("!") or line.startswith("\n") or line.startswith(" ")): line = line.strip() if ":" in line and line[:line.find(":")] in smiwords: possiblesmxduplicates.append(line) elif myre.search(line) and line[:myre.search(line).start()] in smiwords: possiblesmxduplicates.append(line) def readSmi(): with open(os.getenv("GTHOME") + "/gtcore/templates/smi/src/morphology/stems/smi-propernouns.lexc", 'r') as content_file: for line in content_file: if line.startswith("!"): pass elif line.startswith("\n"): pass elif ":" in line: smiwords.append(line[:line.find(":")]) elif myre.search(line): smiwords.append(line[:myre.search(line).start()]) def readSmx(lang): smxname = os.getenv("GTHOME") + "/langs/" + lang + "/src/morphology/stems/" + lang + "-propernouns.lexc" for line in fileinput.FileInput(smxname): addSmxLines(line) def printConclusion(): for line in possiblesmxduplicates: print line print "There are", len(possiblesmxduplicates), "possible duplexes printed above" def main(): if len(sys.argv) == 2: readSmi() readSmx(sys.argv[1]) printConclusion() else: print "State the name of the language that you would like to check, e.g:" print "smx-smi.py sme" sys.exit(1) if __name__ == "__main__": main()