#!/usr/bin/env python # -*- coding: utf-8 -*- # # This file contains a class to analyse text in giellatekno xml format # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this file. If not, see . # # Copyright 2013-2014 Børre Gaup # import unittest import doctest from lxml import etree from lxml import doctestcompare import os import analyser class TestAnalyser(unittest.TestCase): def setUp(self): self.a = analyser.Analyser(u'sme') self.a.xmlFile = u'smefile.xml' self.a.setAnalysisFiles( abbrFile='abbr.txt', fstFile='analyser.xfst', disambiguationAnalysisFile='disambiguation.cg3', functionAnalysisFile='functions.cg3', dependencyAnalysisFile='dependency.cg3') self.a.setCorrFile(corrFile='corr.txt') def assertXmlEqual(self, got, want): u"""Check if two stringified xml snippets are equal """ checker = doctestcompare.LXMLOutputChecker() if not checker.check_output(want, got, 0): message = checker.output_difference(doctest.Example(u"", want), got, 0).encode(u'utf-8') raise AssertionError(message) def testSmeCcatOutput(self): u"""Test if the ccat output is what we expect it to be """ got = self.a.ccat() want = u'''Muhto gaskkohagaid, ja erenoamážit dalle go lei buolaš, de aggregáhta billánii. ¶\n''' self.assertEqual(got, want.encode(u'utf8')) def testSmePreprocessOutput(self): u"""Test if the preprocess output is what we expect it to be """ got = self.a.preprocess() want = u'''Muhto\ngaskkohagaid\n,\nja\nerenoamážit\ndalle go\nlei\nbuolaš\n,\nde\naggregáhta\nbillánii\n.\n¶\n''' self.assertEqual(got, want.encode(u'utf8')) def testSmeDisambiguationOutput(self): u"""Check if disambiguation analysis gives the expected output """ self.a.disambiguationAnalysis() got = self.a.getDisambiguation() want = u'""\n\t"muhto" CC @CVP \n""\n\t"gaskkohagaid" Adv \n"<,>"\n\t"," CLB \n""\n\t"ja" CC @CNP \n""\n\t"erenoamážit" Adv \n""\n\t"dalle_go" MWE CS @CVP \n""\n\t"leat" V IV Ind Prt Sg3 @+FMAINV \n""\n\t"buolaš" Sem/Wthr N Sg Nom \n"<,>"\n\t"," CLB \n""\n\t"de" Adv \n""\n\t"aggregáhta" N Sg Nom \n""\n\t"billánit" V IV Ind Prt Sg3 @+FMAINV \n"<.>"\n\t"." CLB \n\n"<¶>"\n\t"¶" CLB \n\n' self.assertEqual(got, want.encode(u'utf8')) def testSmeDependencyOutput(self): u"""Check if disambiguation analysis gives the expected output """ self.a.dependencyAnalysis() got = self.a.getDependency() want = u'""\n\t"muhto" CC @CVP #1->1 \n""\n\t"gaskkohagaid" Adv @ADVL> #2->12 \n"<,>"\n\t"," CLB #3->4 \n""\n\t"ja" CC @CNP #4->2 \n""\n\t"erenoamážit" Adv @ADVL> #5->12 \n""\n\t"dalle_go" CS @CVP #6->7 \n""\n\t"leat" V IV Ind Prt Sg3 @FS-ADVL> #7->12 \n""\n\t"buolaš" N Sg Nom @7 \n"<,>"\n\t"," CLB #9->6 \n""\n\t"de" Adv @ADVL> #10->12 \n""\n\t"aggregáhta" N Sg Nom @SUBJ> #11->12 \n""\n\t"billánit" V IV Ind Prt Sg3 @FS-ADVL> #12->0 \n"<.>"\n\t"." CLB #13->12 \n\n"<¶>"\n\t"¶" CLB #1->1 \n\n' self.assertEqual(got, want.encode(u'utf8')) def testAnalysisXml(self): u"""Check if the xml is what it is supposed to be """ self.a.eTree = etree.parse(self.a.xmlFile) self.a.dependencyAnalysis() got = self.a.getAnalysisXml() want = u'''
Internáhtta sosiálalaš giliguovddážin 2005 Almmuheaddji OS 10 aarseth_s.htm XSLtemplate 1.9 ; file-specific xsl $Revision: 1.3 $; common.xsl $Revision$;
"<Muhto>"\n\t"muhto" CC <sme> @CVP \n"<gaskkohagaid>"\n\t"gaskkohagaid" Adv <sme> \n"<,>"\n\t"," CLB \n"<ja>"\n\t"ja" CC <sme> @CNP \n"<erenoamážit>"\n\t"erenoamážit" Adv <sme> \n"<dalle_go>"\n\t"dalle_go" MWE CS <sme> @CVP \n"<lei>"\n\t"leat" V <sme> IV Ind Prt Sg3 @+FMAINV \n"<buolaš>"\n\t"buolaš" Sem/Wthr N <sme> Sg Nom \n"<,>"\n\t"," CLB \n"<de>"\n\t"de" Adv <sme> \n"<aggregáhta>"\n\t"aggregáhta" N <sme> Sg Nom \n"<billánii>"\n\t"billánit" V <sme> IV Ind Prt Sg3 @+FMAINV \n"<.>"\n\t"." CLB \n\n"<¶>"\n\t"¶" CLB \n\n"<Muhto>"\n\t"muhto" CC @CVP #1->1 \n"<gaskkohagaid>"\n\t"gaskkohagaid" Adv @ADVL> #2->12 \n"<,>"\n\t"," CLB #3->4 \n"<ja>"\n\t"ja" CC @CNP #4->2 \n"<erenoamážit>"\n\t"erenoamážit" Adv @ADVL> #5->12 \n"<dalle_go>"\n\t"dalle_go" CS @CVP #6->7 \n"<lei>"\n\t"leat" V IV Ind Prt Sg3 @FS-ADVL> #7->12 \n"<buolaš>"\n\t"buolaš" N Sg Nom @<SPRED #8->7 \n"<,>"\n\t"," CLB #9->6 \n"<de>"\n\t"de" Adv @ADVL> #10->12 \n"<aggregáhta>"\n\t"aggregáhta" N Sg Nom @SUBJ> #11->12 \n"<billánii>"\n\t"billánit" V IV Ind Prt Sg3 @FS-ADVL> #12->0 \n"<.>"\n\t"." CLB #13->12 \n\n"<¶>"\n\t"¶" CLB #1->1 \n\n
''' self.maxDiff = None self.assertEqual(etree.tostring(got, encoding=u'unicode'), want) def main(): unittest.main() if __name__ == u'__main__': main()