#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file contains a class to analyse text in giellatekno xml format
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this file. If not, see .
#
# Copyright 2013-2014 Børre Gaup
#
import unittest
import doctest
from lxml import etree
from lxml import doctestcompare
import os
import analyser
class TestAnalyser(unittest.TestCase):
def setUp(self):
self.a = analyser.Analyser(u'sme')
self.a.xmlFile = u'smefile.xml'
self.a.setAnalysisFiles(
abbrFile='abbr.txt',
fstFile='analyser.xfst',
disambiguationAnalysisFile='disambiguation.cg3',
functionAnalysisFile='functions.cg3',
dependencyAnalysisFile='dependency.cg3')
self.a.setCorrFile(corrFile='corr.txt')
def assertXmlEqual(self, got, want):
u"""Check if two stringified xml snippets are equal
"""
checker = doctestcompare.LXMLOutputChecker()
if not checker.check_output(want, got, 0):
message = checker.output_difference(doctest.Example(u"", want), got, 0).encode(u'utf-8')
raise AssertionError(message)
def testSmeCcatOutput(self):
u"""Test if the ccat output is what we expect it to be
"""
got = self.a.ccat()
want = u'''Muhto gaskkohagaid, ja erenoamážit dalle go lei buolaš, de aggregáhta billánii. ¶\n'''
self.assertEqual(got, want.encode(u'utf8'))
def testSmePreprocessOutput(self):
u"""Test if the preprocess output is what we expect it to be
"""
got = self.a.preprocess()
want = u'''Muhto\ngaskkohagaid\n,\nja\nerenoamážit\ndalle go\nlei\nbuolaš\n,\nde\naggregáhta\nbillánii\n.\n¶\n'''
self.assertEqual(got, want.encode(u'utf8'))
def testSmeDisambiguationOutput(self):
u"""Check if disambiguation analysis gives the expected output
"""
self.a.disambiguationAnalysis()
got = self.a.getDisambiguation()
want = u'""\n\t"muhto" CC @CVP \n""\n\t"gaskkohagaid" Adv \n"<,>"\n\t"," CLB \n""\n\t"ja" CC @CNP \n""\n\t"erenoamážit" Adv \n""\n\t"dalle_go" MWE CS @CVP \n""\n\t"leat" V IV Ind Prt Sg3 @+FMAINV \n""\n\t"buolaš" Sem/Wthr N Sg Nom \n"<,>"\n\t"," CLB \n""\n\t"de" Adv \n""\n\t"aggregáhta" N Sg Nom \n""\n\t"billánit" V IV Ind Prt Sg3 @+FMAINV \n"<.>"\n\t"." CLB \n\n"<¶>"\n\t"¶" CLB \n\n'
self.assertEqual(got, want.encode(u'utf8'))
def testSmeDependencyOutput(self):
u"""Check if disambiguation analysis gives the expected output
"""
self.a.dependencyAnalysis()
got = self.a.getDependency()
want = u'""\n\t"muhto" CC @CVP #1->1 \n""\n\t"gaskkohagaid" Adv @ADVL> #2->12 \n"<,>"\n\t"," CLB #3->4 \n""\n\t"ja" CC @CNP #4->2 \n""\n\t"erenoamážit" Adv @ADVL> #5->12 \n""\n\t"dalle_go" CS @CVP #6->7 \n""\n\t"leat" V IV Ind Prt Sg3 @FS-ADVL> #7->12 \n""\n\t"buolaš" N Sg Nom @7 \n"<,>"\n\t"," CLB #9->6 \n""\n\t"de" Adv @ADVL> #10->12 \n""\n\t"aggregáhta" N Sg Nom @SUBJ> #11->12 \n""\n\t"billánit" V IV Ind Prt Sg3 @FS-ADVL> #12->0 \n"<.>"\n\t"." CLB #13->12 \n\n"<¶>"\n\t"¶" CLB #1->1 \n\n'
self.assertEqual(got, want.encode(u'utf8'))
def testAnalysisXml(self):
u"""Check if the xml is what it is supposed to be
"""
self.a.eTree = etree.parse(self.a.xmlFile)
self.a.dependencyAnalysis()
got = self.a.getAnalysisXml()
want = u'''
Internáhtta sosiálalaš giliguovddážin
2005
Almmuheaddji OS
10
aarseth_s.htm
XSLtemplate 1.9 ; file-specific xsl $Revision: 1.3 $; common.xsl $Revision$;
"<Muhto>"\n\t"muhto" CC <sme> @CVP \n"<gaskkohagaid>"\n\t"gaskkohagaid" Adv <sme> \n"<,>"\n\t"," CLB \n"<ja>"\n\t"ja" CC <sme> @CNP \n"<erenoamážit>"\n\t"erenoamážit" Adv <sme> \n"<dalle_go>"\n\t"dalle_go" MWE CS <sme> @CVP \n"<lei>"\n\t"leat" V <sme> IV Ind Prt Sg3 @+FMAINV \n"<buolaš>"\n\t"buolaš" Sem/Wthr N <sme> Sg Nom \n"<,>"\n\t"," CLB \n"<de>"\n\t"de" Adv <sme> \n"<aggregáhta>"\n\t"aggregáhta" N <sme> Sg Nom \n"<billánii>"\n\t"billánit" V <sme> IV Ind Prt Sg3 @+FMAINV \n"<.>"\n\t"." CLB \n\n"<¶>"\n\t"¶" CLB \n\n"<Muhto>"\n\t"muhto" CC @CVP #1->1 \n"<gaskkohagaid>"\n\t"gaskkohagaid" Adv @ADVL> #2->12 \n"<,>"\n\t"," CLB #3->4 \n"<ja>"\n\t"ja" CC @CNP #4->2 \n"<erenoamážit>"\n\t"erenoamážit" Adv @ADVL> #5->12 \n"<dalle_go>"\n\t"dalle_go" CS @CVP #6->7 \n"<lei>"\n\t"leat" V IV Ind Prt Sg3 @FS-ADVL> #7->12 \n"<buolaš>"\n\t"buolaš" N Sg Nom @<SPRED #8->7 \n"<,>"\n\t"," CLB #9->6 \n"<de>"\n\t"de" Adv @ADVL> #10->12 \n"<aggregáhta>"\n\t"aggregáhta" N Sg Nom @SUBJ> #11->12 \n"<billánii>"\n\t"billánit" V IV Ind Prt Sg3 @FS-ADVL> #12->0 \n"<.>"\n\t"." CLB #13->12 \n\n"<¶>"\n\t"¶" CLB #1->1 \n\n'''
self.maxDiff = None
self.assertEqual(etree.tostring(got, encoding=u'unicode'), want)
def main():
unittest.main()
if __name__ == u'__main__':
main()