# -*- coding:utf-8 -*-
#
# This file contains routines to change names of corpus files
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this file. If not, see .
#
# Copyright 2013 Børre Gaup
#
import os
import sys
import unittest
import unicodedata
import subprocess
import inspect
import lxml.etree as etree
def lineno():
"""Returns the current line number in our program."""
return inspect.currentframe().f_back.f_lineno
class TestNameChanger(unittest.TestCase):
def testNoneAsciiLower(self):
want = 'astndzcaoaoai_'
name = u'ášŧŋđžčåøæöäï+'
nc = NameChanger(name)
self.assertEqual(nc.newname, want)
def testNoneAsciiUpper(self):
want = 'astndzcaoaoai_'
name = u'ÁŠŦŊĐŽČÅØÆÖÄÏ+'
nc = NameChanger(name)
self.assertEqual(nc.newname, want)
def testNoneAsciiBlabla(self):
want = 'astndzcaoaoai_'
name = u'ášŧŋđŽČÅØÆÖÄï+'
nc = NameChanger(name)
self.assertEqual(nc.newname, want)
def testOwnNameWithOnlyAscii(self):
want = 'youllneverwalkalone'
oldname = 'haha'
newname = 'YoullNeverWalkAlone'
nc = NameChanger(oldname, newname)
self.assertEqual(nc.newname, want)
def testOwnNameWithOnlyAsciiAndSpace(self):
want = 'youll_never_walk_alone'
oldname = 'haha'
newname = 'Youll Never Walk Alone'
nc = NameChanger(oldname, newname)
self.assertEqual(nc.newname, want)
def testOwnNameWithAsciiAndSpaceAndApostrophe(self):
want = 'you_ll_never_walk_alone'
oldname = 'haha'
newname = "You'll Never Walk Alone"
nc = NameChanger(oldname, newname)
self.assertEqual(nc.newname, want)
def testOwnNameWithNonAscii(self):
want = 'saddago_beaivi_vai_idja'
oldname = 'haha'
newname = u'Šaddágo beaivi vai idja'
klass = newname.encode('utf8')
nc = NameChanger(oldname.encode('utf8'), klass)
self.assertEqual(nc.newname, want)
class NameChanger:
"""Class to change names of corpus files.
Will also take care of changing info in meta data of parallel files.
"""
def __init__(self, oldname, newname = None):
"""Find the directory the oldname is in.
self.oldname is the basename of oldname.
self.newname is the basename of oldname, in lowercase and
with some characters replaced.
"""
self.dirname = os.path.dirname(oldname)
self.oldname = os.path.basename(oldname)
if newname is not None:
self.newname = self.changeToAscii(newname.decode('utf8'))
else:
self.newname = self.changeToAscii(self.oldname)
def changeToAscii(self, oldname):
"""Downcase all chars in oldname, replace some chars
"""
chars = {u'á':u'a', u'š':u's', u'ŧ':u't', u'ŋ':u'n', u'đ':u'd', u'ž':u'z', u'č':u'c', u'å':u'a', u'ø':u'o', u'æ':u'a', u'ö':u'o', u'ä':u'a', u'ï':u'i', u'+':'_', u' ': u'_', u'(': u'_', u')': u'_', u"'": u'_', u'–': u'-', u'?': u'_'}
newname = oldname.lower()
for key, value in chars.items():
utf8keys = [unicodedata.normalize('NFD', key), unicodedata.normalize('NFC', key)]
for utf8key in utf8keys:
if utf8key in newname:
newname = newname.replace(utf8key, value)
return newname
def moveFile(self, fromname, toname):
"""Change name of file from fromname to toname"""
if os.path.exists(fromname):
subp = subprocess.Popen(['svn', 'mv', fromname, toname], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
(output, error) = subp.communicate()
if subp.returncode != 0:
print >>sys.stderr, 'Could not move', fromname, 'to', toname
print >>sys.stderr, output
print >>sys.stderr, error
else:
sys.stdout.write('.')
pass
def changeName(self):
"""Change the name of the original file and it's metadata file
Update the name in parallel files
Also move the other files that's connected to the original file
"""
if self.oldname != self.newname:
self.moveOrigfile()
self.moveXslfile()
self.updateNameInParallelFiles()
self.movePrestableConverted()
self.movePrestableToktmx()
self.movePrestableTmx()
pass
def moveOrigfile(self):
"""Change the name of the original file
using the routines of a given repository tool
"""
fromname = os.path.join(self.dirname, self.oldname)
toname = os.path.join(self.dirname, self.newname)
self.moveFile(fromname, toname)
pass
def moveXslfile(self):
"""Change the name of an xsl file using the
routines of a given repository tool
"""
fromname = os.path.join(self.dirname, self.oldname + '.xsl')
toname = os.path.join(self.dirname, self.newname + '.xsl')
self.moveFile(fromname, toname)
pass
def openXslfile(self, xslfile):
"""Open xslfile, return the tree"""
try:
tree = etree.parse(xslfile)
except Exception, inst:
print "Unexpected error opening %s: %s" % (xslfile, inst)
sys.exit(254)
return tree
def setNewname(self, mainlang, paralang, paraname):
"""
"""
paradir = self.dirname.replace(mainlang, paralang)
parafile = os.path.join(paradir, paraname + '.xsl')
if os.path.exists(parafile):
paratree = self.openXslfile()
pararoot = paratree.getroot()
pararoot.find(".//*[@name='para_" + mainlang + "']").set('select', "'" + self.newname + "'")
paratree.write(parafile, encoding = 'utf8', xml_declaration = True)
pass
def updateNameInParallelFiles(self):
"""Open the .xsl file belonging to the file we are changing names of. Look for parallel files.
Open the xsl files of these parallel files and change the name of this
parallel from the old to the new one
"""
xslfile = os.path.join(self.dirname, self.newname + '.xsl')
if os.path.exists(xslfile):
xsltree = self.openXslfile(xslfile)
xslroot = xsltree.getroot()
mainlang = xslroot.find(".//*[@name='mainlang']").get('select').replace("'", "")
if mainlang != "":
for element in xslroot.iter():
if element.attrib.get('name') and \
'para_' in element.attrib.get('name') and \
element.attrib.get('select') != "''":
paralang = element.attrib.get('name').replace('para_', '')
paraname = element.attrib.get('select').replace("'", "")
self.setNewname(mainlang, paralang, paraname)
pass
def movePrestableConverted(self):
"""Move the file in prestable/converted from the old to the new name
"""
dirname = self.dirname.replace('/orig/', '/prestable/converted/')
fromname = os.path.join(dirname, self.oldname + '.xml')
toname = os.path.join(dirname, self.newname + '.xml')
self.moveFile(fromname, toname)
pass
def movePrestableToktmx(self):
"""Move the file in prestable/toktmx from the old to the new name
"""
for suggestion in ['/prestable/toktmx/sme2nob/', '/prestable/toktmx/nob2sme/']:
dirname = self.dirname.replace('/orig/', suggestion)
fromname = os.path.join(dirname, self.oldname + '.toktmx')
if os.path.exists(fromname):
toname = os.path.join(dirname, self.newname + '.toktmx')
self.moveFile(fromname, toname)
pass
def movePrestableTmx(self):
"""Move the file in prestable/tmx from the old to the new name
"""
for suggestion in ['/prestable/tmx/sme2nob/', '/prestable/tmx/nob2sme/']:
dirname = self.dirname.replace('/orig/', suggestion)
fromname = os.path.join(dirname, self.oldname + '.tmx')
if os.path.exists(fromname):
toname = os.path.join(dirname, self.newname + '.tmx')
self.moveFile(fromname, toname)
pass
if __name__ == "__main__":
nc = NameChanger(os.path.abspath(sys.argv[1]).decode('utf8'))
print nc.dirname
print nc.oldname
print nc.newname