/* Copyright (C) 2008-2013 Børre Gaup This file is part of the program wordlist2hunspell. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ /*! * \brief The StringParser class parses a line written out by a transducer * * A string has one or more > marks. * * An input string is divided in to two parts. * The first part is the stem * The second part is a set of suffix + wordclass */ #include "stringparser.h" #include StringParser::StringParser(std::string inString) : _stem(), _suffix(), _plxPart() { parseString(cleanCruft(inString)); } /** * Parse the string, assign the parts of the string to the variables * _stem and _suffixes */ void StringParser::parseString(std::string inString) { _firstDivisionMark = inString.find_first_of(">"); _tab = inString.find_first_of("\t"); setStem(inString); setSuffix(inString); setPlxPart(inString); } /** * @brief Extracts the stem from \a inString, assign the result to \a stem * * @param inString A line from the wordlist */ void StringParser::setStem(std::string inString) { if (_firstDivisionMark < _tab) { _stem = rinse(inString.substr(0, _firstDivisionMark)); } else { _stem = rinse(inString.substr(0, _tab)); } } /** * @brief Extracts the suffix part from \a inString * * @param inString A line from the wordlist */ void StringParser::setSuffix(std::string inString) { std::string result = ""; if (_firstDivisionMark < _tab) { result = rinse(inString.substr(_firstDivisionMark, _tab - _firstDivisionMark)); } if (result.length() == 0) { _suffix = "0"; } else { _suffix = result; } } /** * @brief Extracts the plxpart from \a inString * * @param inString A line from the wordlist */ void StringParser::setPlxPart(std::string inString) { _plxPart = inString.substr(_tab + 1, inString.size() - _tab + 1); } /*! * Return the stem part of the result */ std::string StringParser::getStem() const { return _stem; } /*! * Return the suffix of the result */ std::string StringParser::getSuffix() const { return _suffix; } /*! * Return the suffix of the result */ std::string StringParser::getPlxPart() const { return _plxPart; } /*! * Removes instances of two > chars from the incoming string * Also removes > chars at the end or in the start of the string */ std::string StringParser::cleanCruft(std::string dirty) { std::string::size_type t; while ((t = dirty.find(">>")) != std::string::npos) { dirty = dirty.substr(0, t) + dirty.substr(t + 1, dirty.size()); } while (dirty.find_last_of(">") == (dirty.size() - 1)) { dirty = dirty.substr(0, dirty.size() - 1); } while (dirty.find_first_of(">") == 0) { dirty = dirty.substr(1, dirty.size()); } return dirty; } /** * Remove all > chars from \a inString */ std::string StringParser::rinse(std::string inString) { std::string::size_type t; while ((t = inString.find(">")) != std::string::npos) { inString = inString.substr(0, t) + inString.substr(t + 1, inString.size()); } return inString; }