/* Copyright (C) 2008-2013 Børre Gaup This file is part of the program wordlist2hunspell. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include // needed for output #include // needed for exit #include // needed for sort #include // needed for converting int to string #include "wordlistparsernocompound.h" /*! * Each unique stem is mapped to a unique number * Each unique suffix is mapped to a unique number * Each unique derivation is mapped to a unique number * * This mapping is done because the input file can be huge, so instead * of storing the strings representing stems, suffixes and derivations, each * string is assigned a number. * * Each stem has a set of suffixes and derivations. Because different stems * may have the same set of suffixes and derivations, this is recorded in * maps. * * The _suffixSetStemSet maps a suffixset to a list of stems * The _derivationSetStemSet maps a derivationset to a list of stems * */ void WordlistParserNoCompound::processFile(std::istream* instream) { std::string str; getline (*instream, str); StringParser strParser(str); setPreviousStem(strParser.getStem()); setFirstChar(); do { StringParser strParser(str); if (strParser.getPlxPart().find("E") != std::string::npos || strParser.getPlxPart().find("I") != std::string::npos) { handleStem(strParser.getStem(), strParser.getSuffix()); handleSuffixes(strParser.getStem(), strParser.getSuffix(), strParser.getPlxPart()); } updateAndPrintLineCount(); } while (getline(*instream, str)); // Add the last stem handleStem(strParser.getStem(), strParser.getSuffix()); handleSuffixes(strParser.getStem(), strParser.getSuffix(), strParser.getPlxPart()); } /** * @brief Add suffix and plxclasses if necessary * * Don't add empty suffixes when stem length is 1. * * @param currentStem stem of the line * @param suffixes suffixes of the line * @return void */ void WordlistParserNoCompound::handleSuffixes(std::string currentStem, std::string suffix, std::string plx) { if (! (currentStem.length() == 1 && suffix == "0")) { std::set plxParts = getPlxParts(plx); for (std::set::iterator plxPart = plxParts.begin(); plxPart != plxParts.end(); ++plxPart) { if (plxPart->find("E") != std::string::npos || plxPart->find("I") != std::string::npos) { if (plxPart->find("X") != std::string::npos) { addSuffix(suffix + "\tX"); addPlxClass("X"); } else { addSuffix(suffix + "\tI"); addPlxClass("I"); } } } } }