/* Copyright (C) 2013 Børre Gaup This file is part of the program wordlist2hunspell. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include #include #include // dirty trick to make private functions available for testing :D #define private public // place includes for classes that should be test below this line #include "../wordlistparserall.h" extern void debugoutputSetSetMap(std::map, std::set > uffsetsetmap); extern void debugoutputStringIntMap(std::map uffstringintmap); extern std::istream* setupinputfile(); extern std::string wp2wordlist(std::map stems, std::map suffixes, std::map< std::set, std::set > ss); TEST(TestAddSuffix) { WordlistParserAll wp; wp.addSuffix("abcd\tGpBO"); wp.addSuffix("def\tGpBO"); wp.addSuffix("abcd\tGpBO"); SuffixMap got = wp.getSuffixMap(); SuffixMap want; want["abcd\tGpBO"] = 1; want["def\tGpBO"] = 2; CHECK_EQUAL(want.size(), got.size()); CHECK(equal(want.begin(), want.end(), got.begin())); } std::map setupstemmap() { std::map stems; int i = 1; stems.insert(std::pair("100000-geardásačča", i)); ++i; stems.insert(std::pair("100000-geardásaš", i)); ++i; stems.insert(std::pair("10000-geardásačča", i)); ++i; stems.insert(std::pair("10000-geardásaš", i)); ++i; stems.insert(std::pair("1000-geardásačča", i)); ++i; stems.insert(std::pair("1000-geardásaš", i)); ++i; stems.insert(std::pair("10-Đ", i)); ++i; stems.insert(std::pair("10-Đ-", i)); ++i; stems.insert(std::pair("1-D", i)); ++i; stems.insert(std::pair("1-D-", i)); ++i; stems.insert(std::pair("2-C", i)); ++i; stems.insert(std::pair("2-C-", i)); ++i; stems.insert(std::pair("3-juvllatsihkkel", i)); ++i; stems.insert(std::pair("7. juni-plassen", i)); ++i; stems.insert(std::pair("Aabakken", i)); ++i; stems.insert(std::pair("Aabel", i)); ++i; stems.insert(std::pair("abandonere", i)); ++i; stems.insert(std::pair("abess-", i)); ++i; stems.insert(std::pair("abess.-", i)); ++i; stems.insert(std::pair("abl-", i)); ++i; stems.insert(std::pair("aborigiinnalačča", i)); ++i; stems.insert(std::pair("absoluhta", i)); ++i; stems.insert(std::pair("absoluhtalačča", i)); ++i; stems.insert(std::pair("addalanvuoigatvuođa", i)); ++i; stems.insert(std::pair("addámuša", i)); ++i; stems.insert(std::pair("addinbeai", i)); ++i; stems.insert(std::pair("addinvejolašvuođa", i)); ++i; stems.insert(std::pair("advokáhta gukto", i)); ++i; stems.insert(std::pair("agibeai", i)); ++i; stems.insert(std::pair("a.", i)); ++i; stems.insert(std::pair("almmustusbeai", i)); ++i; stems.insert(std::pair("a-", i)); ++i; stems.insert(std::pair("as", i)); ++i; stems.insert(std::pair("á-", i)); ++i; stems.insert(std::pair("c-", i)); ++i; stems.insert(std::pair("čuohpan", i)); ++i; stems.insert(std::pair("dáppe", i)); ++i; stems.insert(std::pair("dieppe", i)); ++i; stems.insert(std::pair("doppe", i)); ++i; stems.insert(std::pair("NSR", i)); ++i; return stems; } TEST(TestProcessFileStems) { std::istream* inputfile = setupinputfile(); WordlistParserAll wp; wp.processFile(inputfile); std::map got = wp.getStems(); std::map want = setupstemmap(); CHECK_EQUAL(want.size(), got.size()); CHECK(equal(want.begin(), want.end(), got.begin())); } std::map setupsuffixmap() { std::map s; int i = 1; s.insert(std::pair("0\tGaBO", i)); ++i; s.insert(std::pair("ide\tNAIE", i)); ++i; s.insert(std::pair("ide\tNePAE", i)); ++i; s.insert(std::pair("id\tGpBO", i)); ++i; s.insert(std::pair("id\tNABO", i)); ++i; s.insert(std::pair("id\tNePIOE", i)); ++i; s.insert(std::pair("id\tNAIE", i)); ++i; s.insert(std::pair("id\tNePAE", i)); ++i; s.insert(std::pair("id\tNePAOE", i)); ++i; s.insert(std::pair("id-\tNEX", i)); ++i; s.insert(std::pair("id-\tWIX", i)); ++i; s.insert(std::pair("iguin\tNAIE", i)); ++i; s.insert(std::pair("iguin\tNePAE", i)); ++i; s.insert(std::pair("0\tNAIE", i)); ++i; s.insert(std::pair("0\tNePOE", i)); ++i; s.insert(std::pair("-\tNEX", i)); ++i; s.insert(std::pair("-\tWIX", i)); ++i; s.insert(std::pair("0\tNABO", i)); ++i; s.insert(std::pair("0\tNAIBOE", i)); ++i; s.insert(std::pair(":ide\tNI", i)); ++i; s.insert(std::pair(":ide\tNePE", i)); ++i; s.insert(std::pair(":id\tNI", i)); ++i; s.insert(std::pair(":id\tNePE", i)); ++i; s.insert(std::pair(":iguin\tNI", i)); ++i; s.insert(std::pair(":iguin\tNePE", i)); ++i; s.insert(std::pair("0\tNePE", i)); ++i; s.insert(std::pair("0\tWI", i)); ++i; s.insert(std::pair("0\tNIX", i)); ++i; s.insert(std::pair("0\tNePABO", i)); ++i; s.insert(std::pair("aččade\tNI", i)); ++i; s.insert(std::pair("aččadet\tNI", i)); ++i; s.insert(std::pair("aččaidanguin\tNI", i)); ++i; s.insert(std::pair("a\tNePIE", i)); ++i; s.insert(std::pair("a\tVaE", i)); ++i; s.insert(std::pair("a-\tJuBO", i)); ++i; s.insert(std::pair("a-\tNePIEX", i)); ++i; s.insert(std::pair("ii\tNePIE", i)); ++i; s.insert(std::pair("iin\tNePIE", i)); ++i; s.insert(std::pair("-\tGaB", i)); ++i; s.insert(std::pair("-\tNB", i)); ++i; s.insert(std::pair("-\tNePABO", i)); ++i; s.insert(std::pair("0\tNePIE", i)); ++i; s.insert(std::pair("0\tVaE", i)); ++i; s.insert(std::pair("-\tJuBO", i)); ++i; s.insert(std::pair("-\tNePIEX", i)); ++i; s.insert(std::pair("ba\tVI", i)); ++i; s.insert(std::pair("beahtti\tVI", i)); ++i; s.insert(std::pair("behtet\tVI", i)); ++i; s.insert(std::pair("dan\tNaAE", i)); ++i; s.insert(std::pair("dan\tNABI", i)); ++i; s.insert(std::pair("dan\tNAE", i)); ++i; s.insert(std::pair("dan\tNaO", i)); ++i; s.insert(std::pair("dan\tNO", i)); ++i; s.insert(std::pair("dan\tNpAE", i)); ++i; s.insert(std::pair("dan\tNpO", i)); ++i; s.insert(std::pair("deaddjái\tNaAE", i)); ++i; s.insert(std::pair("deaddjái\tNAE", i)); ++i; s.insert(std::pair("deaddjái\tNpAE", i)); ++i; s.insert(std::pair("deaddji\tNaAE", i)); ++i; s.insert(std::pair("deaddji\tNAE", i)); ++i; s.insert(std::pair("deaddji\tNaO", i)); ++i; s.insert(std::pair("deaddjin\tNpAE", i)); ++i; s.insert(std::pair("deaddji\tNO", i)); ++i; s.insert(std::pair("deaddji\tNpO", i)); ++i; s.insert(std::pair("deami\tNaO", i)); ++i; s.insert(std::pair("deami\tNO", i)); ++i; s.insert(std::pair("deami\tNpO", i)); ++i; s.insert(std::pair("dišgoahtán\tNABI", i)); ++i; s.insert(std::pair("duvvogoahtán\tNABI", i)); ++i; s.insert(std::pair("0\tNePABX", i)); ++i; s.insert(std::pair("t\tNAI", i)); ++i; s.insert(std::pair("buččat\tNAI", i)); ++i; s.insert(std::pair("0\tNAB", i)); ++i; s.insert(std::pair("-\tNAIX", i)); ++i; s.insert(std::pair("-\tNePABX", i)); ++i; s.insert(std::pair("de\tNIE", i)); ++i; s.insert(std::pair("det\tNIE", i)); ++i; s.insert(std::pair("me\tNIE", i)); ++i; s.insert(std::pair(":in\tWEIX", i)); ++i; s.insert(std::pair(":in-\tNePABX", i)); ++i; s.insert(std::pair(":in-\tWIX", i)); ++i; s.insert(std::pair(":i\tWEIX", i)); ++i; s.insert(std::pair(":i-\tNePABX", i)); ++i; s.insert(std::pair(":i-\tWIX", i)); ++i; s.insert(std::pair(":n\tWEIX", i)); ++i; s.insert(std::pair(":n-\tNePABX", i)); ++i; s.insert(std::pair(":n-\tWIX", i)); ++i; s.insert(std::pair("0\tNBX", i)); ++i; s.insert(std::pair(":ide\tWI", i)); ++i; s.insert(std::pair(":id\tWI", i)); ++i; s.insert(std::pair(":iguin\tWI", i)); ++i; s.insert(std::pair("laččaide\tNAIE", i)); ++i; s.insert(std::pair("laččaid\tNAIE", i)); ++i; s.insert(std::pair("laččaiguin\tNAIE", i)); ++i; s.insert(std::pair("0\tNAPBX", i)); ++i; s.insert(std::pair("0\tNtPAB", i)); ++i; s.insert(std::pair(":i\tNI", i)); ++i; s.insert(std::pair(":i\tNePE", i)); ++i; return s; } TEST(TestProcessFileSuffixes) { std::istream* inputfile = setupinputfile(); WordlistParserAll wp; wp.processFile(inputfile); std::map want = setupsuffixmap(); std::map got = wp.getSuffixMap(); CHECK_EQUAL(want.size(), got.size()); CHECK(equal(got.begin(), got.end(), want.begin())); } TEST(TestProcessFileSuffixStemMap) { std::istream* inputfile = setupinputfile(); WordlistParserAll wp; wp.processFile(inputfile); std::string want("100000-geardásačča\tGaBO\n100000-geardásaččaide\tNAIE\n100000-geardásaččaide\tNePAE\n100000-geardásaččaid\tGpBO\n100000-geardásaččaid\tNABO\n100000-geardásaččaid\tNePIOE\n100000-geardásaččaid\tNAIE\n100000-geardásaččaid\tNePAE\n100000-geardásaččaid\tNePAOE\n100000-geardásaččaid-\tNEX\n100000-geardásaččaid-\tWIX\n100000-geardásaččaiguin\tNAIE\n100000-geardásaččaiguin\tNePAE\n100000-geardásačča\tNAIE\n100000-geardásačča\tNePOE\n100000-geardásačča-\tNEX\n100000-geardásačča-\tWIX\n1000-geardásačča\tGaBO\n1000-geardásaččaid\tGpBO\n1000-geardásaččaid\tNABO\n1000-geardásaččaid\tNePIOE\n1000-geardásaččaid\tNAIE\n1000-geardásaččaid\tNePAOE\n10000-geardásačča\tGaBO\n10000-geardásaččaid\tGpBO\n10000-geardásaččaid\tNABO\n10000-geardásaččaid\tNePIOE\n10000-geardásaččaid\tNAIE\n10000-geardásaččaid\tNePAOE\n10000-geardásačča\tNAIE\n10000-geardásačča\tNePOE\naddalanvuoigatvuođaid\tGpBO\naddámušaid\tGpBO\naddinvejolašvuođaid\tGpBO\n100000-geardásaš\tNAIE\n100000-geardásaš\tNePOE\n100000-geardásaš-\tNEX\n100000-geardásaš-\tWIX\n100000-geardásaš\tNABO\n100000-geardásaš\tNAIBOE\n7. juni-plassen-\tWIX\n7. juni-plassena\tNePIE\n7. juni-plassena\tVaE\n7. juni-plassena-\tJuBO\n7. juni-plassena-\tNePIEX\n7. juni-plassenii\tNePIE\n7. juni-plasseniin\tNePIE\n7. juni-plassen-\tGaB\n7. juni-plassen-\tNB\n7. juni-plassen-\tNePABO\n7. juni-plassen\tNePIE\n7. juni-plassen\tVaE\n7. juni-plassen-\tJuBO\n7. juni-plassen-\tNePIEX\nAabakken-\tWIX\nAabakkena\tNePIE\nAabakkena\tVaE\nAabakkena-\tJuBO\nAabakkena-\tNePIEX\nAabakken-\tGaB\nAabakken-\tNB\nAabakken-\tNePABO\nAabel-\tWIX\nAabel-\tGaB\nAabel-\tNB\nAabel-\tNePABO\n10000-geardásaš\tNAIBOE\n1000-geardásaš\tNAIBOE\n10-Đ:ide\tNI\n10-Đ:ide\tNePE\n10-Đ:id\tNI\n10-Đ:id\tNePE\n10-Đ:iguin\tNI\n10-Đ:iguin\tNePE\n10-Đ\tNePE\n10-Đ\tWI\n1-D\tNePE\n1-D\tWI\n2-C\tNePE\n2-C\tWI\nNSR\tNePE\nNSR\tWI\nNSR:i\tNI\nNSR:i\tNePE\n10-Đ-\tNIX\n10-Đ-\tNePABO\n1-D-\tNIX\n1-D-\tNePABO\n2-C-\tNIX\n2-C-\tNePABO\ndáppe\tNIX\ndáppe\tNAPBX\ndáppe\tNtPAB\ndieppe\tNIX\ndieppe\tNAPBX\ndieppe\tNtPAB\ndoppe\tNIX\ndoppe\tNAPBX\ndoppe\tNtPAB\n3-juvllatsihkkelaččade\tNI\n3-juvllatsihkkelaččadet\tNI\n3-juvllatsihkkelaččaidanguin\tNI\nabandonereba\tVI\nabandonerebeahtti\tVI\nabandonerebehtet\tVI\nabandoneredan\tNaAE\nabandoneredan\tNABI\nabandoneredan\tNAE\nabandoneredan\tNaO\nabandoneredan\tNO\nabandoneredan\tNpAE\nabandoneredan\tNpO\nabandoneredeaddjái\tNaAE\nabandoneredeaddjái\tNAE\nabandoneredeaddjái\tNpAE\nabandoneredeaddji\tNaAE\nabandoneredeaddji\tNAE\nabandoneredeaddji\tNaO\nabandoneredeaddjin\tNpAE\nabandoneredeaddji\tNO\nabandoneredeaddji\tNpO\nabandoneredeami\tNaO\nabandoneredeami\tNO\nabandoneredeami\tNpO\nabandoneredišgoahtán\tNABI\nabandonereduvvogoahtán\tNABI\nabess-\tNePABX\nabess.-\tNePABX\nabl-\tNePABX\naborigiinnalaččat\tNAI\nabsoluhtalaččat\tNAI\nabsoluhtabuččat\tNAI\naddinbeai\tNAB\naddinbeai-\tNAIX\naddinbeai-\tNePABX\nagibeai\tNAB\nagibeai-\tNAIX\nagibeai-\tNePABX\nalmmustusbeai\tNAB\nalmmustusbeai-\tNAIX\nalmmustusbeai-\tNePABX\nadvokáhta guktode\tNIE\nadvokáhta guktodet\tNIE\nadvokáhta guktome\tNIE\na.:in\tWEIX\na.:in-\tNePABX\na.:in-\tWIX\na.:i\tWEIX\na.:i-\tNePABX\na.:i-\tWIX\na.:n\tWEIX\na.:n-\tNePABX\na.:n-\tWIX\na-\tNBX\ná-\tNBX\nc-\tNBX\nas:ide\tWI\nas:id\tWI\nas:iguin\tWI\nčuohpanlaččaide\tNAIE\nčuohpanlaččaid\tNAIE\nčuohpanlaččaiguin\tNAIE\n"); std::map< std::set, std::set > ss = wp.getSuffixSetStemSet(); std::string got = wp2wordlist(wp.getStems(), wp.getSuffixMap(), ss); CHECK_EQUAL(want, got); } std::set setupplxclasses() { std::set plxclasses; plxclasses.insert("GaB"); plxclasses.insert("GaBO"); plxclasses.insert("GpBO"); plxclasses.insert("JuBO"); plxclasses.insert("NAB"); plxclasses.insert("NABI"); plxclasses.insert("NABO"); plxclasses.insert("NAE"); plxclasses.insert("NAI"); plxclasses.insert("NAIBOE"); plxclasses.insert("NAIE"); plxclasses.insert("NAIX"); plxclasses.insert("NAPBX"); plxclasses.insert("NB"); plxclasses.insert("NBX"); plxclasses.insert("NEX"); plxclasses.insert("NI"); plxclasses.insert("NIE"); plxclasses.insert("NIX"); plxclasses.insert("NO"); plxclasses.insert("NaAE"); plxclasses.insert("NaO"); plxclasses.insert("NePABO"); plxclasses.insert("NePABX"); plxclasses.insert("NePAE"); plxclasses.insert("NePAOE"); plxclasses.insert("NePE"); plxclasses.insert("NePIE"); plxclasses.insert("NePIEX"); plxclasses.insert("NePIOE"); plxclasses.insert("NePOE"); plxclasses.insert("NpAE"); plxclasses.insert("NpO"); plxclasses.insert("NtPAB"); plxclasses.insert("VI"); plxclasses.insert("VaE"); plxclasses.insert("WEIX"); plxclasses.insert("WI"); plxclasses.insert("WIX"); return plxclasses; } TEST(TestProcessFilePlx) { std::cerr << "TestProcessFilePlx" << std::endl;; std::istream* inputfile = setupinputfile(); WordlistParserAll wp; wp.processFile(inputfile); std::set want = setupplxclasses(); std::set got = wp.getPlxClasses(); CHECK_EQUAL(want.size(), got.size()); CHECK(equal(got.begin(), got.end(), want.begin())); }