/* * This file is part of makedict - convertor from any * dictionary format to any http://xdxf.sourceforge.net * * Copyright (C) Evgeniy Dushistov, 2005 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #include #include #include #include #include #include #include "charset_conv.hpp" #include "mapfile.hpp" #include "utils.hpp" #include "parser.hpp" using std::string; using std::vector; using std::cerr; using std::cout; using std::endl; //#define DEBUG class dictionary { public: explicit dictionary(const string& tmpfilename); ~dictionary(); void add_article(const string& keyval, const string& data); size_t narticles() const { return keylist.size(); } void get_article(size_t i, string &key, string& data); private: string datafilename; std::fstream datafile; struct key { string val; gulong off, size; key(const string& v, gulong o, gulong s) : val(v), off(o), size(s) {} }; vector keylist; }; void dictionary::add_article(const string& keyval, const string& data) { keylist.push_back(key(keyval, datafile.tellp(), data.length())); if (!datafile.write(data.c_str(), data.length())) cerr<<_("WARRNING: dictionary::add_article: write failed")<=keylist.size()) return; if (!datafile.seekg(keylist[i].off, std::ios::beg)) { cerr<<_("WARRNING: dictionary::get_article: seek failed")< data_buffer; static Str2StrTable replace_table; }; Str2StrTable dictd_parser::replace_table; dictd_parser::dictd_parser() { set_parser_info("format", "dictd"); set_parser_info("version", "dictd_parser, version 0.1"); parser_options["encoding"]=""; parser_options["lang_from"]=""; parser_options["lang_to"]=""; not_valid_chars.insert(0x1A); not_valid_chars.insert(0x12); not_valid_chars.insert(0x15); not_valid_chars.insert(0x07); not_valid_chars.insert(0x0E); not_valid_chars.insert(0x0B); not_valid_chars.insert(0x01); not_valid_chars.insert(0x08); not_valid_chars.insert(0x1F); not_valid_chars.insert(0x04); not_valid_chars.insert(0x0C); not_valid_chars.insert(0x18); if (!replace_table.empty()) return; replace_table["<"]="<"; replace_table[">"]=">"; replace_table["&"]="&"; replace_table["\""]="""; } static inline const char *skip(const char *str, const char *skip) { if (g_str_has_prefix(str, skip)) str+=strlen(skip); while (*str && (*str==' ' || *str=='\t' || *str=='\r' || *str=='\n')) ++str; return str; } int dictd_parser::parse(const string& filename) { int res=EXIT_FAILURE; dictionary dict(filename+".tmp"); if (filename.substr(filename.length()-6, 6)!=".index") return res; if (!is_file_exist(filename)) { cerr<::reverse_iterator ri; //remove last not neeadable characters for (ri=data_buffer.rbegin(); ri!=data_buffer.rend() && (*ri=='\r' || *ri=='\n' || *ri==' ' || *ri=='\t'); ++ri) ; if (ri!=data_buffer.rbegin() && ri!=data_buffer.rend()) *(ri-1)='\0'; else data_buffer.push_back('\0'); if (encoding) conv.convert(&data_buffer[0], conv_str); else conv_str=&data_buffer[0]; if (!g_utf8_validate(conv_str.c_str(), -1, NULL)) { cerr<<_("Not valid utf8 string: ")<<&data_buffer[0]< key_list(1, key); article(key_list, dictdata); } } res=EXIT_SUCCESS; return res; } int main(int argc, char *argv[]) { dictd_parser parser; return parser.run(argc, argv); }