/* Copyright (C) 2012 Børre Gaup This file is part of the program wordlist2hunspell. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include // for printing the affix file #include // for the sort function #include #include #include #include "hunspellprintercompounding.h" HunspellPrinterCompounding::HunspellPrinterCompounding(std::string language): _language(language), _gthome(getenv("GTHOME")), _hunspellMax(65009) { if (_gthome.empty()) { std::cerr << "GTHOME NOT DEFINED" << std::endl; std::cerr << "GTHOME must be defined for this program to be usable" << std::endl; std::cerr << "More info here: https://giellalt.uit.no/tools/docu-svn-user.html#The+first+commands" << std::endl; exit(2); } setConstantsMap(); } void HunspellPrinterCompounding::setConstantsMap() { // hunspellMax is reserved for the clitics suffix _constantsMap["NEEDAFFIX"] = _hunspellMax - 1; // 65008 _constantsMap["COMPOUNDPERMITFLAG"] = _hunspellMax - 2; // 65007 _constantsMap["COMPOUNDBEGIN"] = _hunspellMax - 3; // 65006 _constantsMap["COMPOUNDMIDDLE"] = _hunspellMax - 4; // 65005 _constantsMap["COMPOUNDEND"] = _hunspellMax - 5; // 65004 _constantsMap["COMPOUNDFORBIDFLAG"] = _hunspellMax - 6; // 65003 _constantsMap["ONLYINCOMPOUND"] = _hunspellMax - 7; // 65002 _constantsMap["NOSUGGEST"] = _hunspellMax - 8; // 65001 } std::map HunspellPrinterCompounding::preparePLXClasses(std::set plxClasses) { std::map hmhm; int counter = _afMap.size() + 1; int clitics = _hunspellMax; for (std::set::iterator plxClass = plxClasses.begin(); plxClass != plxClasses.end(); ++plxClass) { hmhm.insert(std::pair(*plxClass, counter)); std::stringstream s; if (plxClass->find("X") != std::string::npos) { s << _constantsMap["NOSUGGEST"] << ","; } if (plxClass->find("B") != std::string::npos || \ plxClass->find("O") != std::string::npos || \ plxClass->find("E") != std::string::npos) { s << _constantsMap["COMPOUNDPERMITFLAG"] << ","; } if (plxClass->find("B") != std::string::npos) { s << _constantsMap["COMPOUNDBEGIN"] << ","; } if (plxClass->find("O") != std::string::npos) { s << _constantsMap["COMPOUNDMIDDLE"] << ","; } if (plxClass->find("E") != std::string::npos) { s << _constantsMap["COMPOUNDEND"] << ","; } if (plxClass->find("E") == std::string::npos && \ plxClass->find("I") == std::string::npos) { s << _constantsMap["ONLYINCOMPOUND"] << ","; } if (plxClass->find("E") != std::string::npos || \ plxClass->find("I") != std::string::npos) { s << clitics << ","; } _afMap.insert(std::pair(counter, s.str().substr(0, s.str().size() - 1))); ++counter; } return hmhm; } void HunspellPrinterCompounding::prepareAfmap(std::map, std::set > suffixSetStemsMap) { int counter = _afMap.size() + 1; for (std::map, std::set >::iterator it1 = suffixSetStemsMap.begin(); it1 != suffixSetStemsMap.end(); ++it1, ++counter) { std::stringstream s; for (std::set::iterator it2 = it1->first.begin(); it2 != it1->first.end(); ++it2) { s << *it2 << ","; } s << _constantsMap["NEEDAFFIX"]; _afMap.insert(std::pair(counter, s.str())); for (std::set::iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2) { _stemMap.insert(std::pair(*it2, counter)); } } } std::string HunspellPrinterCompounding::getTime() { time_t rawtime; struct tm * timeinfo; char buffer [80]; time ( &rawtime ); timeinfo = localtime ( &rawtime ); strftime (buffer,80,"%Y-%m-%d",timeinfo); return std::string(buffer); } void HunspellPrinterCompounding::addSet(std::map< std::set, std::set > inmap, std::set& affSet, int index) { for (std::map< std::set, std::set >::iterator it1 = inmap.begin(); it1 != inmap.end(); it1++) { std::set::iterator stemId = it1->second.find(index); if (stemId != it1->second.end()) { affSet.insert(it1->first.begin(), it1->first.end()); it1->second.erase(stemId); break; } } } std::string HunspellPrinterCompounding::getVersion() { std::string version; std::string versionName; versionName += _gthome; versionName += std::string("/gt/common/hunspell/version.txt"); std::ifstream versionFile; versionFile.exceptions ( std::ifstream::failbit | std::ifstream::badbit ); try { if (_gthome.empty()) { std::cerr << "GTHOME NOT DEFINED" << std::endl; exit(2); } versionFile.open(versionName.c_str()); getline(versionFile, version); } catch (std::ifstream::failure e) { std::cerr << "Exception opening/reading file: "; std::cerr << versionName << std::endl; exit(3); } versionFile.close(); return version; } std::string HunspellPrinterCompounding::makeDicEasterEgg() { std::ostringstream easterEgg; easterEgg << "Divvun" << std::endl; easterEgg << getLangString() << " "; easterEgg << getVersion() << "-"; easterEgg << getTime() << std::endl; return easterEgg.str(); } void HunspellPrinterCompounding::printDic(std::map origStem) { std::string dicFileName = _language + "/hunspell/" + _language + "-comp.dic"; std::ofstream dicFile(dicFileName.c_str()); dicFile << makeDicString(origStem) << std::endl; dicFile.close(); } std::string HunspellPrinterCompounding::makeAffIntroString() { std::stringstream affintro; std::string introName; introName += _gthome; introName += std::string("/gt/common/hunspell/aff_intro"); std::ifstream intro; try { intro.open(introName.c_str()); affintro << "SET UTF-8" << std::endl; affintro << "FLAG num" << std::endl; for (std::map::iterator it = _constantsMap.begin(); it != _constantsMap.end(); ++it) { affintro << it->first << " " << it->second << std::endl; } std::string str; while (getline(intro, str)) { affintro << str << std::endl; } affintro << std::endl; } catch (std::ifstream::failure e) { std::cerr << "Exception opening/reading file "; std::cerr << introName << std::endl; exit(3); } intro.close(); return affintro.str(); } std::string HunspellPrinterCompounding::makeAffRepString() { std::string phonrulesName = _language + "/hunspell/phonrules"; std::ifstream phonrules(phonrulesName.c_str()); std::stringstream introstream; std::stringstream repstream; std::string str; int repCounter = 0; while (getline(phonrules, str)) { if (str.find("REP") != std::string::npos) { repCounter++; repstream << str << std::endl; } else { introstream << str << std::endl; } } repstream << makeAffEasterEgg() << std::endl << std::endl; repCounter += 2; introstream << "REP " << repCounter << std::endl; return introstream.str() + repstream.str(); } std::string HunspellPrinterCompounding::makeAffEasterEgg() { std::string easterEgg; easterEgg = easterEgg + "REP nuvviDspeller Divvun\n"; easterEgg = easterEgg + "REP nuvviDspeller " + getLangString() + "_" + getVersion() + "-" + getTime(); return easterEgg; } std::string HunspellPrinterCompounding::getLangString() { std::string langString; if (_language == "sme") { langString = "Davvisámi"; } else if (_language == "sma") { langString = "Åarjelsáme"; } else if (_language == "smj") { langString = "Julevsáme"; } else { langString = _language; } return langString; } void HunspellPrinterCompounding::printAffFile(std::map< std::string, int > hmhm, std::map origSuffixMap, std::map > newSuffixMap) { std::string affFileName = _language + "/hunspell/" + _language + "-comp.aff"; std::ofstream suffixFile(affFileName.c_str()); suffixFile << makeAffIntroString(); suffixFile << makeAffRepString(); suffixFile << printAfflist(); suffixFile << printSuffixes(hmhm, origSuffixMap, newSuffixMap); } std::string HunspellPrinterCompounding::printSuffixes(std::map hmhm, std::map origSuffixMap, std::map > newSuffixMap) { std::map rSuffixMap; for(std::map::iterator it = origSuffixMap.begin(); it != origSuffixMap.end(); ++it) { rSuffixMap[it->second] = it->first; } std::ostringstream outstream; for(Suffixes::iterator it = newSuffixMap.begin(); it != newSuffixMap.end(); ++it) { outstream << "SFX " << it->first << " Y " << it->second.size() << std::endl; for(std::set::iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2) { std::string::size_type tab = rSuffixMap[*it2].find_first_of("\t"); std::string gluff = rSuffixMap[*it2].substr(tab + 1, rSuffixMap[*it2].size() - tab); outstream << "SFX " << it->first << " 0 "; outstream << rSuffixMap[*it2].substr(0, tab); if (hmhm[gluff] > 0) { outstream << "/" << hmhm[gluff]; } outstream << " . " << gluff << std::endl; } outstream << std::endl; } std::set cliticsSet= readClitics(); outstream << "SFX " << _hunspellMax << " Y " << cliticsSet.size() << std::endl; for (std::set::iterator clitic = cliticsSet.begin(); clitic != cliticsSet.end(); ++clitic) { outstream << "SFX " << _hunspellMax << " 0 " << *clitic << " ." << std::endl; } return outstream.str(); } std::set HunspellPrinterCompounding::readClitics() { std::string cliticsFilename = std::string(_language); cliticsFilename += std::string("/polderland/clitics-"); cliticsFilename += std::string(_language); cliticsFilename += std::string("-plx.txt"); std::istream* cliticsFile = new std::ifstream(cliticsFilename.c_str()); std::set cliticsSet; std::string clitic; while (getline(*cliticsFile, clitic)) { std::string::size_type tab = clitic.find_first_of("\t"); cliticsSet.insert(clitic.substr(0, tab)); } delete cliticsFile; return cliticsSet; } std::string HunspellPrinterCompounding::printAfflist() { std::ostringstream outstream; outstream << "AF " << _afMap.size() << std::endl; for (std::map::iterator it1 = _afMap.begin(); it1 != _afMap.end(); ++it1) { outstream << "AF " << it1->second << " # " << it1->first << std::endl;; } outstream << std::endl; return outstream.str(); } std::string HunspellPrinterCompounding::makeDicString(std::map origStem) { std::map revStem; for (std::map::iterator it = origStem.begin(); it != origStem.end(); ++it) { revStem.insert(std::pair(it->second, it->first)); } std::ostringstream stemStringStream; stemStringStream << _stemMap.size() + 2 << std::endl; for (std::map::iterator it = _stemMap.begin(); it != _stemMap.end(); ++it) { stemStringStream << revStem[it->first] << "/" << it->second << std::endl; } stemStringStream << makeDicEasterEgg(); return stemStringStream.str(); }