// // C++ Implementation: hunspell_maker // // Description: // // // Author: Børre Gaup , (C) 2008 // // Copyright: See COPYING file that comes with this distribution // // #include "hunspell_maker.h" hunspell_maker::hunspell_maker(string file_prefix) { string dic_file_name = file_prefix + "/hunspell/dics"; string der_file_name = file_prefix + "/hunspell/ders"; string aff_file_name = file_prefix + "/hunspell/affs"; dic_file.open(dic_file_name.c_str(), fstream::out); der_file.open(der_file_name.c_str(), fstream::out); aff_file.open(aff_file_name.c_str(), fstream::out); } string hunspell_maker::clean_cruft(string dirty) { // clean away some cruft string::size_type t; while ((t = dirty.find(">>")) != string::npos) { dirty = dirty.substr(0, t) + dirty.substr(t + 1, dirty.size()); } if ((t = dirty.find(">\t")) != string::npos) { dirty = dirty.substr(0, t) + dirty.substr(t + 1, dirty.size()); } if ((t = dirty.find(">-\t")) != string::npos) { dirty = dirty.substr(0, t) + dirty.substr(t + 1, dirty.size()); } return dirty; } string rinse(string str) { string::size_type t; while ((t = str.find(">")) != string::npos) { str = str.substr(0, t) + str.substr(t + 1, str.size()); } return str; } vector hunspell_maker::parse_string(string str) { // parse the string string::size_type first_token = str.find_first_of(">"); vector strings; if(first_token == string::npos) { first_token = str.find_first_of("\t"); strings.push_back(rinse(str.substr(0, first_token))); strings.push_back(rinse(str.substr(first_token, str.size() - first_token))); } else { string::size_type second_token = str.find_last_of(">"); if(first_token == second_token) { strings.push_back(rinse(str.substr(0, first_token))); strings.push_back(rinse(str.substr(first_token, str.size() - first_token))); } else { strings.push_back(rinse(str.substr(0, first_token))); strings.push_back(rinse(str.substr(first_token, second_token - first_token))); strings.push_back(rinse(str.substr(second_token, str.size() - second_token))); } } return strings; } void hunspell_maker::add_to_dic(vector strings) { if (strings.size() == 2) { int id = affixes.add_int_vec(strings[1]); dicstem_to_affixes.add_index_map(strings[0], id); } else { int id = affixes.add_int_vec(strings[2]); derstem_to_affixes.add_index_map(strings[1], id); id = derstems.add_int_vec(strings[1]); dicstem_to_affixes.add_index_map(strings[0],id); } } void hunspell_maker::flush() { dicstem_to_affixes.print_dic(&dic_file); cout.flush(); cout << "dics ..." << endl; dicstem_to_affixes.clear(); } void hunspell_maker::print_files() { cout << "ders ..." << endl; derstems.print_ders(&der_file, derstem_to_affixes.get_map()); affixes.print_affixes(&aff_file); } hunspell_maker::~hunspell_maker() { dic_file.close(); aff_file.close(); }