1 #ifndef _HFST_TRANSITION_GRAPH_H_
2 #define _HFST_TRANSITION_GRAPH_H_
15 #include "../HfstSymbolDefs.h"
16 #include "../HfstExceptionDefs.h"
17 #include "../HfstDataTypes.h"
18 #include "../HarmonizeUnknownAndIdentitySymbols.h"
19 #include "../HfstFlagDiacritics.h"
20 #include "../HfstEpsilonHandler.h"
23 #include "HfstTropicalTransducerTransitionData.h"
24 #include "HfstFastTransitionData.h"
34 void set_file(FILE * f);
37 void write(
const char * str);
49 namespace implementations {
121 template <
class C>
class HfstTransitionGraph
133 typedef std::pair<HfstSymbol, HfstSymbol>
148 typedef std::vector<HfstTransitions> HfstStates;
150 HfstStates state_vector;
153 static const HfstState INITIAL_STATE = 0;
156 typedef std::map<HfstState,typename C::WeightType> FinalWeightMap;
158 FinalWeightMap final_weight_map;
164 typedef unsigned int HfstNumber;
165 typedef std::vector<HfstNumber> HfstNumberVector;
166 typedef std::pair<HfstNumber, HfstNumber> HfstNumberPair;
167 typedef std::map<HfstNumberPair, HfstNumberPair> HfstNumberPairSubstitutions;
173 typedef typename HfstStates::iterator iterator;
199 initialize_alphabet(alphabet);
201 state_vector.push_back(tr);
205 initialize_alphabet(alphabet);
207 state_vector.push_back(tr);
208 unsigned int linecount=0;
209 this->assign(read_in_att_format(file,
"@0@", linecount));
213 initialize_alphabet(alphabet);
215 state_vector.push_back(tr);
216 unsigned int linecount=0;
217 this->assign(read_in_att_format(file.get_file(),
"@0@", linecount));
226 state_vector = graph.state_vector;
227 final_weight_map = graph.final_weight_map;
228 alphabet = graph.alphabet;
240 state_vector = graph.state_vector;
241 final_weight_map = graph.final_weight_map;
242 alphabet = graph.alphabet;
250 *fsm = ConversionFunctions::
251 hfst_transducer_to_hfst_basic_transducer(transducer);
252 state_vector = fsm->state_vector;
253 final_weight_map = fsm->final_weight_map;
254 alphabet = fsm->alphabet;
267 alpha.insert(C::get_epsilon());
268 alpha.insert(C::get_unknown());
269 alpha.insert(C::get_identity());
274 bool check_alphabet()
276 for (iterator it =
begin(); it !=
end(); it++)
278 for (
typename HfstTransitions::iterator tr_it
280 tr_it != it->end(); tr_it++)
282 C data = tr_it->get_transition_data();
284 if(alphabet.find(data.get_input_symbol())
288 if(alphabet.find(data.get_output_symbol())
299 void print_alphabet()
const
301 for (
typename HfstTransitionGraphAlphabet::const_iterator it
302 = alphabet.begin(); it != alphabet.end(); it++)
304 if (it != alphabet.begin())
308 std::cerr << std::endl;
313 unsigned int get_symbol_number
315 return C::get_number(symbol);
320 void initialize_state_vector
321 (
unsigned int number_of_states)
323 state_vector.reserve(number_of_states);
329 void initialize_transition_vector
330 (
unsigned int state_number,
unsigned int number_of_transitions)
333 state_vector[state_number].reserve(number_of_transitions);
347 alphabet.insert(symbol);
355 alphabet.erase(symbol);
358 void remove_symbols_from_alphabet(
const HfstSymbolSet &symbols) {
359 for (
typename HfstSymbolSet::const_iterator it = symbols.begin();
360 it != symbols.end(); it++)
370 for (
typename HfstSymbolSet::const_iterator it = symbols.begin();
371 it != symbols.end(); it++)
373 alphabet.insert(*it);
379 for (
typename HfstSymbolPairSet::const_iterator it = symbols.begin();
380 it != symbols.end(); it++)
382 alphabet.insert(it->first);
383 alphabet.insert(it->second);
389 void prune_alphabet_after_substitution(
const std::set<unsigned int> &symbols)
391 if (symbols.size() == 0)
394 std::vector<bool> symbols_found;
396 (C::get_max_number()+1,
false);
399 for (iterator it =
begin(); it !=
end(); it++)
401 for (
typename HfstTransitions::iterator tr_it
403 tr_it != it->end(); tr_it++)
405 const C & data = tr_it->get_transition_data();
406 symbols_found.at(data.get_input_number()) =
true;
407 symbols_found.at(data.get_output_number()) =
true;
413 for (std::set<unsigned int>::const_iterator it = symbols.begin();
414 it != symbols.end(); it++)
416 if (! symbols_found.at(*it))
417 alphabet.erase(C::get_symbol(*it));
425 for (iterator it =
begin(); it !=
end(); it++)
427 for (
typename HfstTransitions::iterator tr_it
429 tr_it != it->end(); tr_it++)
431 C data = tr_it->get_transition_data();
433 retval.insert(data.get_input_symbol());
434 retval.insert(data.get_output_symbol());
454 bool unknowns_or_identities_used =
455 ( (symbols_found.find(
"@_UNKNOWN_SYMBOL_@") != symbols_found.end()) ||
456 (symbols_found.find(
"@_IDENTITY_SYMBOL_@") != symbols_found.end()) );
460 if (!force && unknowns_or_identities_used)
464 symbols_found.insert(
"@_EPSILON_SYMBOL_@");
465 symbols_found.insert(
"@_UNKNOWN_SYMBOL_@");
466 symbols_found.insert(
"@_IDENTITY_SYMBOL_@");
472 for (
typename HfstTransitionGraphAlphabet::iterator it
474 it != alphabet.end(); it++)
476 if (symbols_found.find(*it) == symbols_found.end())
477 symbols_not_found.insert(*it);
482 for (
typename HfstTransitionGraphAlphabet::iterator it
483 = symbols_not_found.begin();
484 it != symbols_not_found.end(); it++)
505 for (
typename HfstTransitions::const_iterator tr_it
507 tr_it != it->end(); tr_it++)
509 C data = tr_it->get_transition_data();
511 retval.insert(
StringPair(data.get_input_symbol(),
512 data.get_output_symbol()));
528 state_vector.push_back(tr);
529 return state_vector.size()-1;
539 while(state_vector.size() <= s) {
541 state_vector.push_back(tr);
548 return state_vector.size()-1;
562 alphabet.insert(data.get_input_symbol());
563 alphabet.insert(data.get_output_symbol());
565 state_vector[s].push_back(transition);
575 bool remove_symbols_from_alphabet=
false)
577 if (! (state_vector.size() > s))
586 std::stack<typename HfstTransitions::iterator> elements_to_remove;
589 for (
typename HfstTransitions::iterator it = transitions.begin();
590 it != transitions.end(); it++)
598 elements_to_remove.push(it);
602 while (!elements_to_remove.empty())
604 state_vector[s].erase(elements_to_remove.top());
605 elements_to_remove.pop();
608 if (remove_symbols_from_alphabet)
621 return (final_weight_map.find(s) != final_weight_map.end());
626 if (final_weight_map.find(s) != final_weight_map.end())
627 return final_weight_map.find(s)->second;
636 const typename C::WeightType & weight) {
638 final_weight_map[s] = weight;
645 for (
typename HfstStates::iterator it = state_vector.begin();
646 it != state_vector.end();
650 std::sort<typename HfstTransitions::iterator>
651 (transitions.begin(),transitions.end());
660 iterator
begin() {
return state_vector.begin(); }
668 iterator
end() {
return state_vector.end(); }
682 if (s >= state_vector.size()) {
684 return state_vector[s];
707 state_vector[s1] = state_vector[s2];
708 state_vector[s2] = s1_copy;
711 for (iterator it =
begin(); it !=
end(); it++)
714 for (
unsigned int i=0; i < it->size(); i++)
732 it->operator[](i) = tr;
740 typename FinalWeightMap::iterator s1_it = final_weight_map.find(s1);
741 typename FinalWeightMap::iterator s2_it = final_weight_map.find(s2);
742 typename FinalWeightMap::iterator end_it = final_weight_map.end();
744 if (s1_it != end_it && s2_it != end_it) {
745 typename C::WeightType s1_weight = s1_it->second;
746 final_weight_map[s1] = s2_it->second;
747 final_weight_map[s2] = s1_weight;
749 if (s1_it != end_it) {
750 typename C::WeightType w = s1_it->second;
751 final_weight_map.erase(s1);
752 final_weight_map[s2] = w;
754 if (s2_it != end_it) {
755 typename C::WeightType w = s2_it->second;
756 final_weight_map.erase(s2);
757 final_weight_map[s1] = w;
764 static void write_weight(FILE * file,
float weight)
769 fprintf(file,
"%f", weight);
772 static void write_weight(std::ostream & os,
float weight)
781 static void replace_all(std::string & symbol,
782 const std::string &str1,
783 const std::string &str2)
785 size_t pos = symbol.find(str1);
786 while (pos != std::string::npos)
788 symbol.erase(pos, str1.size());
789 symbol.insert(pos, str2);
791 (str1, pos+str2.size());
796 static void xfstize(std::string & symbol)
798 std::string escaped_symbol;
799 for (
size_t pos = 0; pos < symbol.size(); pos++)
801 if (symbol[pos] ==
'%')
802 escaped_symbol.append(
"\"%\"");
803 else if (symbol[pos] ==
'"')
804 escaped_symbol.append(
"%\"");
805 else if (symbol[pos] ==
'?')
806 escaped_symbol.append(
"\"?\"");
808 escaped_symbol.append(1, symbol[pos]);
810 symbol = escaped_symbol;
813 static void xfstize_symbol(std::string & symbol)
816 replace_all(symbol,
"@_EPSILON_SYMBOL_@",
"0");
817 replace_all(symbol,
"@_UNKNOWN_SYMBOL_@",
"?");
818 replace_all(symbol,
"@_IDENTITY_SYMBOL_@",
"?");
819 replace_all(symbol,
"\t",
"@_TAB_@");
822 void print_xfst_state(std::ostream & os,
HfstState state)
824 if (state == INITIAL_STATE) { os <<
"S"; }
829 void print_xfst_state(FILE * file,
HfstState state)
831 if (state == INITIAL_STATE) { fprintf(file,
"S"); }
833 fprintf(file,
"s%i", state);
836 void print_xfst_arc(std::ostream & os, C data)
839 if (data.get_input_symbol() !=
840 data.get_output_symbol())
844 std::string s = data.get_input_symbol();
847 if (data.get_input_symbol() !=
848 data.get_output_symbol() ||
849 data.get_output_symbol() ==
"@_UNKNOWN_SYMBOL_@")
851 s = data.get_output_symbol();
855 if (data.get_input_symbol() !=
856 data.get_output_symbol())
862 void print_xfst_arc(FILE * file, C data)
864 if (data.get_input_symbol() !=
865 data.get_output_symbol())
870 std::string s = data.get_input_symbol();
872 fprintf(file,
"%s", s.c_str());
874 if (data.get_input_symbol() !=
875 data.get_output_symbol() ||
876 data.get_output_symbol() ==
"@_UNKNOWN_SYMBOL_@")
878 s = data.get_output_symbol();
880 fprintf(file,
":%s", s.c_str());
882 if (data.get_input_symbol() !=
883 data.get_output_symbol())
896 unsigned int source_state=0;
897 for (iterator it =
begin(); it !=
end(); it++)
899 print_xfst_state(os, source_state);
902 if (it->begin() == it->end())
908 for (
typename HfstTransitions::iterator tr_it
910 tr_it != it->end(); tr_it++)
912 if (tr_it != it->begin())
917 print_xfst_arc(os, data);
923 os <<
"." << std::endl;
929 static std::string prologize_symbol(
const std::string & symbol)
935 if (symbol ==
"@_EPSILON_SYMBOL_@")
937 if (symbol ==
"@_UNKNOWN_SYMBOL_@")
939 if(symbol ==
"@_IDENTITY_SYMBOL_@")
942 std::string retval(symbol);
943 replace_all(retval,
"\"",
"\\\"");
948 static std::string deprologize_symbol(
const std::string & symbol)
955 return "@_EPSILON_SYMBOL_@";
957 return "@_UNKNOWN_SYMBOL_@";
959 std::string retval(symbol);
960 replace_all(retval,
"\\\"",
"\"");
964 static void print_prolog_arc_symbols(FILE * file, C data)
966 std::string symbol = prologize_symbol(data.get_input_symbol());
967 fprintf(file,
"\"%s\"", symbol.c_str());
969 if (data.get_input_symbol() !=
970 data.get_output_symbol() ||
971 data.get_input_symbol() ==
"@_UNKNOWN_SYMBOL_@")
973 symbol = prologize_symbol(data.get_output_symbol());
974 fprintf(file,
":\"%s\"", symbol.c_str());
978 static void print_prolog_arc_symbols(std::ostream & os, C data)
980 std::string symbol = prologize_symbol(data.get_input_symbol());
981 os <<
"\"" << symbol <<
"\"";
983 if (data.get_input_symbol() !=
984 data.get_output_symbol() ||
985 data.get_input_symbol() ==
"@_UNKNOWN_SYMBOL_@")
987 symbol = prologize_symbol(data.get_output_symbol());
988 os <<
":\"" << symbol <<
"\"";
995 bool write_weights=
true)
997 unsigned int source_state=0;
998 const char * identifier = name.c_str();
1000 if (name.find(
",") != std::string::npos)
1002 std::string msg(
"no commas allowed in the name of prolog networks");
1005 fprintf(file,
"network(%s).\n", identifier);
1009 initialize_alphabet(symbols_used_);
1010 for (
typename HfstTransitionGraphAlphabet::const_iterator it
1011 = alphabet.begin(); it != alphabet.end(); it++)
1013 if (symbols_used_.find(*it) == symbols_used_.end())
1015 fprintf(file,
"symbol(%s, \"%s\").\n", identifier, it->c_str());
1020 for (iterator it =
begin(); it !=
end(); it++)
1022 for (
typename HfstTransitions::iterator tr_it
1024 tr_it != it->end(); tr_it++)
1026 fprintf(file,
"arc(%s, %i, %i, ",
1029 print_prolog_arc_symbols(file, data);
1030 if (write_weights) {
1031 fprintf(file,
", ");
1032 write_weight(file, data.get_weight());
1034 fprintf(file,
").\n");
1040 for (
typename FinalWeightMap::const_iterator it
1041 = this->final_weight_map.begin();
1042 it != this->final_weight_map.end(); it++)
1044 fprintf(file,
"final(%s, %i", identifier, it->first);
1047 fprintf(file,
", ");
1048 write_weight(file, it->second);
1050 fprintf(file,
").\n");
1057 bool write_weights=
true)
1059 unsigned int source_state=0;
1062 if (name.find(
",") != std::string::npos)
1064 std::string msg(
"no commas allowed in the name of prolog networks");
1067 os <<
"network(" << name <<
")." << std::endl;
1071 initialize_alphabet(symbols_used_);
1072 for (
typename HfstTransitionGraphAlphabet::const_iterator it
1073 = alphabet.begin(); it != alphabet.end(); it++)
1075 if (symbols_used_.find(*it) == symbols_used_.end())
1077 os <<
"symbol(" << name <<
", \"" << *it <<
"\")" << std::endl;
1082 for (iterator it =
begin(); it !=
end(); it++)
1084 for (
typename HfstTransitions::iterator tr_it
1086 tr_it != it->end(); tr_it++)
1088 os <<
"arc(" << name <<
", " << source_state <<
", " << tr_it->
get_target_state() <<
", ";
1090 print_prolog_arc_symbols(os, data);
1091 if (write_weights) {
1093 write_weight(os, data.get_weight());
1095 os <<
")." << std::endl;
1101 for (
typename FinalWeightMap::const_iterator it
1102 = this->final_weight_map.begin();
1103 it != this->final_weight_map.end(); it++)
1105 os <<
"final(" << name <<
", " << it->first;
1106 if (write_weights) {
1108 write_weight(os, it->second);
1110 os <<
")." << std::endl;
1116 static bool strip_quotes_from_both_sides(std::string & str)
1120 if (str[0] !=
'"' || str[str.length()-1] !=
'"')
1123 str.erase(str.length()-1, 1);
1129 static bool strip_ending_parenthesis_and_comma(std::string & str)
1133 if (str[str.length()-2] !=
')' || str[str.length()-1] !=
'.')
1135 str.erase(str.length()-2);
1139 static bool parse_prolog_network_line(
const std::string & line, std::string &
name)
1143 int n = sscanf(line.c_str(),
"network(%s", namearr);
1147 std::string namestr(namearr);
1149 if (!strip_ending_parenthesis_and_comma(namestr))
1158 static std::vector<unsigned int> get_positions_of_unescaped_char
1159 (
const std::string & str,
char c,
char esc)
1161 std::vector<unsigned int> retval;
1162 for (
size_t i=0; i < str.length(); i++)
1167 retval.push_back(i);
1168 else if (str[i-1] == esc)
1171 retval.push_back(i);
1181 static bool get_prolog_arc_symbols
1182 (
const std::string & str, std::string & isymbol, std::string & osymbol)
1185 std::vector<unsigned int> quote_positions
1186 = get_positions_of_unescaped_char(str,
'"',
'\\');
1189 if (quote_positions.size() == 2)
1191 if (quote_positions[0] != 0 ||
1192 quote_positions[1] != str.length()-1)
1196 else if (quote_positions.size() == 4)
1198 if (quote_positions[0] != 0 ||
1199 quote_positions[3] != str.length()-1)
1203 if (quote_positions[2] - quote_positions[1] != 2)
1207 if (str[quote_positions[1] + 1] !=
':')
1219 if (quote_positions.size() == 2)
1222 std::string symbol(str, quote_positions[0]+1, quote_positions[1]-quote_positions[0]-1);
1223 isymbol = deprologize_symbol(symbol);
1224 if (isymbol ==
"@_UNKNOWN_SYMBOL_@")
1225 isymbol =
"@_IDENTITY_SYMBOL_@";
1232 std::string insymbol(str, quote_positions[0]+1, quote_positions[1]-quote_positions[0]-1);
1233 std::string outsymbol(str, quote_positions[2]+1, quote_positions[3]-quote_positions[2]-1);
1234 isymbol = deprologize_symbol(insymbol);
1235 osymbol = deprologize_symbol(outsymbol);
1241 static bool extract_weight(std::string & symbol,
float & weight)
1243 size_t last_double_quote = symbol.find_last_of(
'"');
1244 size_t last_space = symbol.find_last_of(
' ');
1247 if (last_double_quote == std::string::npos)
1250 if (last_space == std::string::npos) {
1253 else if (last_double_quote > last_space) {
1256 else if (last_double_quote + 2 == last_space && last_space < symbol.size()-1)
1258 std::istringstream buffer(symbol.substr(last_space+1));
1262 symbol.resize(last_space-1);
1273 char namestr[100];
char sourcestr[100];
1274 char targetstr[100];
char symbolstr[100];
1276 int n = sscanf(line.c_str(),
"arc(%[^,], %[^,], %[^,], %[^\t\n]",
1277 namestr, sourcestr, targetstr, symbolstr);
1279 std::string symbol(symbolstr);
1282 if (!strip_ending_parenthesis_and_comma(symbol))
1287 if (std::string(namestr) != graph.name)
1290 unsigned int source = atoi(sourcestr);
1291 unsigned int target = atoi(targetstr);
1295 if (! extract_weight(symbol, weight))
1298 std::string isymbol =
"";
1299 std::string osymbol =
"";
1301 if (!get_prolog_arc_symbols(symbol, isymbol, osymbol))
1304 graph.add_transition(source, HfstTransition<C>(target, isymbol, osymbol, weight));
1308 static bool parse_prolog_final_line(
const std::string & line,
HfstTransitionGraph & graph)
1313 char weightstr[100];
1316 unsigned int number_of_commas = 0;
1317 size_t pos = line.find(
',');
1318 while (pos != std::string::npos)
1321 pos = line.find(
',', pos+1);
1324 if (number_of_commas == 1)
1326 int n = sscanf(line.c_str(),
"final(%[^,], %[^)]).", namestr, finalstr);
1330 else if (number_of_commas == 2)
1332 int n = sscanf(line.c_str(),
"final(%[^,], %[^,], %[^)]).", namestr, finalstr, weightstr);
1335 std::istringstream buffer(weightstr);
1345 if (std::string(namestr) != graph.name)
1348 graph.set_final_weight(atoi(finalstr), weight);
1352 static bool parse_prolog_symbol_line(
const std::string & line,
HfstTransitionGraph & graph)
1356 char symbolarr[100];
1357 int n = sscanf(line.c_str(),
"symbol(%[^,], %s", namearr, symbolarr);
1362 std::string namestr(namearr);
1363 std::string symbolstr(symbolarr);
1365 if (namestr != graph.name)
1368 if (! strip_ending_parenthesis_and_comma(symbolstr))
1371 if (! strip_quotes_from_both_sides(symbolstr))
1374 graph.add_symbol_to_alphabet(symbolstr);
1379 static std::string strip_newlines(std::string & str)
1381 for (
signed int i=(
signed int)str.length()-1; i >= 0; --i)
1383 if (str[i] ==
'\n' || str[i] ==
'\r')
1395 static std::string get_stripped_line
1396 (std::istream & is, FILE * file,
unsigned int & linecount)
1401 if (not is.getline(line,255).eof())
1405 if (NULL == fgets(line, 255, file))
1410 std::string linestr(line);
1411 return strip_newlines(linestr);
1423 (std::istream &is, FILE *file,
unsigned int & linecount)
1427 std::string linestr;
1433 linestr = get_stripped_line(is, file, linecount);
1440 if (linestr.length() != 0 && linestr[0] ==
'#')
1451 if (! parse_prolog_network_line(linestr, retval.name))
1453 std::string message(
"first line not valid prolog: ");
1454 message.append(linestr);
1462 linestr = get_stripped_line(is, file, linecount);
1473 if (! (parse_prolog_arc_line(linestr, retval) ||
1474 parse_prolog_final_line(linestr, retval) ||
1475 parse_prolog_symbol_line(linestr, retval)) )
1477 std::string message(
"line not valid prolog: ");
1478 message.append(linestr);
1487 unsigned int & linecount)
1489 return read_in_prolog_format
1496 unsigned int & linecount)
1498 return read_in_prolog_format
1505 unsigned int & linecount)
1507 return read_in_att_format(std::cin , file.get_file(),
1517 (void)write_weights;
1518 unsigned int source_state=0;
1519 for (iterator it =
begin(); it !=
end(); it++)
1521 print_xfst_state(file, source_state);
1522 fprintf(file,
":\t");
1524 if (it->begin() == it->end())
1526 fprintf(file,
"(no arcs)");
1530 for (
typename HfstTransitions::iterator tr_it
1532 tr_it != it->end(); tr_it++)
1534 if (tr_it != it->begin())
1536 fprintf(file,
", ");
1540 print_xfst_arc(file, data);
1542 fprintf(file,
" -> ");
1546 fprintf(file,
".\n");
1558 unsigned int source_state=0;
1559 for (iterator it =
begin(); it !=
end(); it++)
1561 for (
typename HfstTransitions::iterator tr_it
1563 tr_it != it->end(); tr_it++)
1567 std::string isymbol = data.get_input_symbol();
1568 replace_all(isymbol,
" ",
"@_SPACE_@");
1569 replace_all(isymbol,
"@_EPSILON_SYMBOL_@",
"@0@");
1570 replace_all(isymbol,
"\t",
"@_TAB_@");
1572 std::string osymbol = data.get_output_symbol();
1573 replace_all(osymbol,
" ",
"@_SPACE_@");
1574 replace_all(osymbol,
"@_EPSILON_SYMBOL_@",
"@0@");
1575 replace_all(osymbol,
"\t",
"@_TAB_@");
1577 os << source_state <<
"\t"
1582 if (write_weights) {
1584 write_weight(os, data.get_weight());
1591 if (write_weights) {
1605 unsigned int source_state=0;
1606 for (iterator it =
begin(); it !=
end(); it++)
1608 for (
typename HfstTransitions::iterator tr_it
1610 tr_it != it->end(); tr_it++)
1614 std::string isymbol = data.get_input_symbol();
1615 replace_all(isymbol,
" ",
"@_SPACE_@");
1616 replace_all(isymbol,
"@_EPSILON_SYMBOL_@",
"@0@");
1617 replace_all(isymbol,
"\t",
"@_TAB_@");
1619 std::string osymbol = data.get_output_symbol();
1620 replace_all(osymbol,
" ",
"@_SPACE_@");
1621 replace_all(osymbol,
"@_EPSILON_SYMBOL_@",
"@0@");
1622 replace_all(osymbol,
"\t",
"@_TAB_@");
1624 fprintf(file,
"%i\t%i\t%s\t%s",
1630 if (write_weights) {
1631 fprintf(file,
"\t");
1632 write_weight(file, data.get_weight());
1634 fprintf(file,
"\n");
1638 fprintf(file,
"%i", source_state);
1639 if (write_weights) {
1640 fprintf(file,
"\t");
1643 fprintf(file,
"\n");
1651 unsigned int source_state=0;
1654 for (iterator it =
begin(); it !=
end(); it++)
1656 for (
typename HfstTransitions::iterator tr_it
1658 tr_it != it->end(); tr_it++)
1662 std::string isymbol = data.get_input_symbol();
1663 replace_all(isymbol,
" ",
"@_SPACE_@");
1664 replace_all(isymbol,
"@_EPSILON_SYMBOL_@",
"@0@");
1665 replace_all(isymbol,
"\t",
"@_TAB_@");
1667 std::string osymbol = data.get_output_symbol();
1668 replace_all(osymbol,
" ",
"@_SPACE_@");
1669 replace_all(osymbol,
"@_EPSILON_SYMBOL_@",
"@0@");
1670 replace_all(osymbol,
"\t",
"@_TAB_@");
1672 cw = sprintf(ptr + cwt,
"%i\t%i\t%s\t%s",
1681 cw = sprintf(ptr + cwt,
"\t%f",
1684 cw = sprintf(ptr + cwt,
"\n");
1689 cw = sprintf(ptr + cwt,
"%i", source_state);
1692 cw = sprintf(ptr + cwt,
"\t%f",
1695 cw = sprintf(ptr + cwt,
"\n");
1708 unsigned int source_state=0;
1709 for (iterator it =
begin(); it !=
end(); it++)
1711 for (
typename HfstTransitions::iterator tr_it
1713 tr_it != it->end(); tr_it++)
1717 fprintf(file,
"%i\t%i\t%i\t%i",
1720 tr_it->get_input_number(),
1721 tr_it->get_output_number());
1724 fprintf(file,
"\t%f",
1726 fprintf(file,
"\n");
1730 fprintf(file,
"%i", source_state);
1732 fprintf(file,
"\t%f",
1734 fprintf(file,
"\n");
1754 std::string epsilon_symbol,
1755 unsigned int & linecount) {
1773 if (not is.getline(line,255).eof())
1777 if (NULL == fgets(line, 255, file))
1787 (line[0] ==
'\0') ||
1788 (line[0] ==
'\n' && line[1] ==
'\0') ||
1790 (line[0] ==
'\r' && line[1] ==
'\n' && line[2] ==
'\0')
1805 char a1 [100];
char a2 [100];
char a3 [100];
1806 char a4 [100];
char a5 [100];
1808 int n = sscanf(line,
"%s%s%s%s%s", a1, a2, a3, a4, a5);
1817 if (n == 1 || n == 2)
1818 retval.set_final_weight( atoi(a1), weight );
1820 else if (n == 4 || n == 5) {
1821 std::string input_symbol=std::string(a3);
1822 std::string output_symbol=std::string(a4);
1826 replace_all(input_symbol,
"@_SPACE_@",
" ");
1827 replace_all(input_symbol,
"@0@",
"@_EPSILON_SYMBOL_@");
1828 replace_all(input_symbol,
"@_TAB_@",
"\t");
1829 replace_all(input_symbol,
"@_COLON_@",
":");
1831 replace_all(output_symbol,
"@_SPACE_@",
" ");
1832 replace_all(output_symbol,
"@0@",
"@_EPSILON_SYMBOL_@");
1833 replace_all(output_symbol,
"@_TAB_@",
"\t");
1834 replace_all(output_symbol,
"@_COLON_@",
":");
1836 if (epsilon_symbol.compare(input_symbol) == 0)
1837 input_symbol=
"@_EPSILON_SYMBOL_@";
1838 if (epsilon_symbol.compare(output_symbol) == 0)
1839 output_symbol=
"@_EPSILON_SYMBOL_@";
1841 HfstTransition <C> tr( atoi(a2), input_symbol,
1842 output_symbol, weight );
1843 retval.add_transition( atoi(a1), tr );
1847 std::string message(line);
1865 std::string epsilon_symbol,
1866 unsigned int & linecount)
1868 return read_in_att_format
1870 epsilon_symbol, linecount);
1881 std::string epsilon_symbol,
1882 unsigned int & linecount)
1884 return read_in_att_format
1886 file, epsilon_symbol, linecount);
1891 std::string epsilon_symbol,
1892 unsigned int & linecount)
1894 return read_in_att_format(std::cin , file.get_file(),
1895 epsilon_symbol, linecount);
1909 bool input_side=
true,
1910 bool output_side=
true)
1913 for (iterator it =
begin(); it !=
end(); it++)
1916 for (
unsigned int i=0; i < it->size(); i++)
1918 HfstTransition<C> &tr_it = it->operator[](i);
1923 = tr_it.get_input_symbol();
1925 = tr_it.get_output_symbol();
1928 bool substitution_made=
false;
1931 tr_it.get_input_symbol() == old_symbol) {
1932 substituting_input_symbol = new_symbol;
1933 substitution_made=
true;
1936 tr_it.get_output_symbol() == old_symbol) {
1937 substituting_output_symbol = new_symbol;
1938 substitution_made=
true;
1942 if (substitution_made) {
1947 HfstTransition<C> tr
1948 (tr_it.get_target_state(),
1949 substituting_input_symbol,
1950 substituting_output_symbol,
1951 tr_it.get_weight());
1953 it->operator[](i) = tr;
1968 void substitute_(
const HfstNumberVector &substitutions,
1969 unsigned int no_substitution)
1972 for (iterator it =
begin(); it !=
end(); it++)
1975 for (
unsigned int i=0; i < it->size(); i++)
1977 HfstTransition<C> &tr_it = it->operator[](i);
1979 HfstNumber old_inumber = tr_it.get_input_number();
1980 HfstNumber old_onumber = tr_it.get_output_number();
1982 HfstNumber new_inumber = substitutions.at(old_inumber);
1983 HfstNumber new_onumber = substitutions.at(old_onumber);
1986 if (new_inumber != no_substitution ||
1987 new_onumber != no_substitution)
1989 if (new_inumber != no_substitution)
1992 new_inumber = old_inumber;
1994 if (new_onumber != no_substitution)
1997 new_onumber = old_onumber;
2000 HfstTransition<C> tr
2001 (tr_it.get_target_state(),
2004 tr_it.get_weight(),
false);
2006 it->operator[](i) = tr;
2018 void substitute_(
const HfstNumberPairSubstitutions &substitutions)
2021 for (iterator it =
begin(); it !=
end(); it++)
2024 for (
unsigned int i=0; i < it->size(); i++)
2026 HfstTransition<C> &tr_it = it->operator[](i);
2028 HfstNumberPair old_number_pair
2029 ( tr_it.get_input_number(),
2030 tr_it.get_output_number() );
2032 HfstNumberPairSubstitutions::const_iterator subst_it
2033 = substitutions.find(old_number_pair);
2036 if (subst_it != substitutions.end()) {
2038 HfstNumber new_input_number = subst_it->second.first;
2039 HfstNumber new_output_number = subst_it->second.second;
2042 get_symbol(new_input_number));
2044 get_symbol(new_output_number));
2047 HfstTransition<C> tr
2048 (tr_it.get_target_state(),
2051 tr_it.get_weight(),
false);
2053 it->operator[](i) = tr;
2069 unsigned int in_match = C::get_number(sp.first);
2070 unsigned int out_match = C::get_number(sp.second);
2072 bool in_match_used =
false;
2073 bool out_match_used =
false;
2076 for (iterator it =
begin(); it !=
end(); it++)
2079 for (
unsigned int i=0; i < it->size(); i++)
2081 HfstTransition<C> &tr_it = it->operator[](i);
2084 unsigned int in_tr = tr_it.get_input_number();
2085 unsigned int out_tr = tr_it.get_output_number();
2086 if (in_tr == in_match && out_tr == out_match) {
2087 it->erase(it->begin()+i); }
2090 if (in_tr == in_match || out_tr == in_match) {
2091 in_match_used=
true; }
2092 if (in_tr == out_match || out_tr == out_match) {
2093 out_match_used=
true; }
2099 if (!in_match_used) {
2100 alphabet.erase(sp.first); }
2101 if (!out_match_used) {
2102 alphabet.erase(sp.second); }
2111 if (new_sps.empty())
2113 return remove_transitions(old_sp);
2116 unsigned int old_input_number = C::get_number(old_sp.first);
2117 unsigned int old_output_number = C::get_number(old_sp.second);
2120 bool substitution_performed=
false;
2123 for (iterator it =
begin(); it !=
end(); it++)
2129 for (
unsigned int i=0; i < it->size(); i++)
2131 HfstTransition<C> &tr_it = it->operator[](i);
2134 if (tr_it.get_input_number() == old_input_number &&
2135 tr_it.get_output_number() == old_output_number)
2137 substitution_performed=
true;
2141 typename HfstSymbolPairSet::const_iterator IT
2144 HfstTransition<C> tr
2145 (tr_it.get_target_state(),
2146 C::get_number(IT->first),
2147 C::get_number(IT->second),
2151 it->operator[](i) = tr;
2155 while (IT != new_sps.end())
2157 HfstTransition<C> TR
2158 (tr_it.get_target_state(),
2159 C::get_number(IT->first),
2160 C::get_number(IT->second),
2164 new_transitions.push_back(TR);
2175 NIT != new_transitions.end(); NIT++)
2177 it->push_back(*NIT);
2185 if (substitution_performed) {
2191 std::set<unsigned int> syms;
2197 syms.insert(old_input_number);
2198 syms.insert(old_output_number);
2199 prune_alphabet_after_substitution(syms);
2206 void substitute_(
bool (*func)
2210 for (iterator it =
begin(); it !=
end(); it++)
2216 for (
unsigned int i=0; i < it->size(); i++)
2218 HfstTransition<C> &tr_it = it->operator[](i);
2221 (tr_it.get_input_symbol(),
2222 tr_it.get_output_symbol());
2226 bool perform_substitution=
false;
2228 perform_substitution =
2229 (*func)(transition_symbol_pair, substituting_transitions);
2235 if (perform_substitution)
2239 typename HfstSymbolPairSet::const_iterator IT
2240 = substituting_transitions.begin();
2242 if (not C::is_valid_symbol(IT->first) ||
2243 not C::is_valid_symbol(IT->second) )
2245 (EmptyStringException,
2246 "HfstTransitionGraph::substitute");
2248 HfstTransition<C> tr
2249 (tr_it.get_target_state(),
2252 tr_it.get_weight());
2254 it->operator[](i) = tr;
2261 while (IT != substituting_transitions.end())
2264 if (not C::is_valid_symbol(IT->first) ||
2265 not C::is_valid_symbol(IT->second) )
2267 (EmptyStringException,
2268 "HfstTransitionGraph::substitute");
2270 HfstTransition<C> TR
2271 (tr_it.get_target_state(),
2274 tr_it.get_weight());
2276 new_transitions.push_back(TR);
2291 NIT != new_transitions.end(); NIT++)
2293 it->push_back(*NIT);
2313 bool input_side=
true,
2314 bool output_side=
true) {
2316 if (not C::is_valid_symbol(old_symbol) ||
2317 not C::is_valid_symbol(new_symbol) ) {
2319 (EmptyStringException,
2320 "HfstTransitionGraph::substitute"); }
2323 if (old_symbol == new_symbol)
2326 if (alphabet.find(old_symbol) == alphabet.end())
2331 if (input_side && output_side) {
2333 if (not is_epsilon(old_symbol) &&
2334 not is_unknown(old_symbol) &&
2335 not is_identity(old_symbol)) {
2336 alphabet.erase(old_symbol); }
2339 alphabet.insert(new_symbol);
2341 substitute_(old_symbol, new_symbol, input_side, output_side);
2355 for (HfstSymbolSubstitutions::const_iterator it
2356 = substitutions.begin();
2357 it != substitutions.end(); it++)
2359 (void)get_symbol_number(it->first);
2360 (void)get_symbol_number(it->second);
2365 std::vector<unsigned int> substitutions_;
2367 unsigned int no_substitution = C::get_max_number()+substitutions.size()+1;
2368 substitutions_.resize
2369 (C::get_max_number()+1, no_substitution);
2370 for (HfstSymbolSubstitutions::const_iterator it
2371 = substitutions.begin();
2372 it != substitutions.end(); it++)
2374 HfstNumber from_symbol = get_symbol_number(it->first);
2375 HfstNumber to_symbol = get_symbol_number(it->second);
2377 substitutions_.at(from_symbol) = to_symbol;
2380 substitute_(substitutions_, no_substitution);
2399 HfstNumberPairSubstitutions substitutions_;
2400 for (HfstSymbolPairSubstitutions::const_iterator it
2401 = substitutions.begin();
2402 it != substitutions.end(); it++)
2404 HfstNumberPair from_transition
2405 (get_symbol_number(it->first.first),
2406 get_symbol_number(it->first.second));
2407 HfstNumberPair to_transition
2408 (get_symbol_number(it->second.first),
2409 get_symbol_number(it->second.second));
2410 substitutions_[from_transition] = to_transition;
2413 substitute_(substitutions_);
2423 if (not C::is_valid_symbol(sp.first) ||
2424 not C::is_valid_symbol(sp.second) ) {
2426 (EmptyStringException,
2427 "HfstTransitionGraph::substitute"); }
2429 for (
typename HfstSymbolPairSet::const_iterator it = sps.begin();
2430 it != sps.end(); it++)
2432 if (not C::is_valid_symbol(it->first) ||
2433 not C::is_valid_symbol(it->second) ) {
2435 (EmptyStringException,
2436 "HfstTransitionGraph::substitute"); }
2439 substitute_(sp, sps);
2450 if (not C::is_valid_symbol(old_pair.first) ||
2451 not C::is_valid_symbol(new_pair.first) ||
2452 not C::is_valid_symbol(old_pair.second) ||
2453 not C::is_valid_symbol(new_pair.second) ) {
2455 (EmptyStringException,
2456 "HfstTransitionGraph::substitute"); }
2459 new_pair_set.insert(new_pair);
2460 substitute_(old_pair, new_pair_set);
2491 struct substitution_data
2495 typename C::WeightType weight;
2500 typename C::WeightType weight,
2503 origin_state=origin;
2504 target_state=target;
2505 this->weight=weight;
2506 substituting_graph=substituting;
2514 void add_substitution(
const substitution_data &sub) {
2517 HfstTransition <C> epsilon_transition
2518 (s, C::get_epsilon(), C::get_epsilon(),
2523 unsigned int offset = s;
2529 it != graph->end(); it++)
2531 for (
typename HfstTransitions::const_iterator tr_it
2533 tr_it != it->end(); tr_it++)
2535 C data = tr_it->get_transition_data();
2537 HfstTransition <C> transition
2538 (tr_it->get_target_state() + offset,
2539 data.get_input_symbol(),
2540 data.get_output_symbol(),
2549 for (
typename FinalWeightMap::const_iterator it
2550 = graph->final_weight_map.begin();
2551 it != graph->final_weight_map.end(); it++)
2553 HfstTransition <C> epsilon_transition
2554 (sub.target_state, C::get_epsilon(), C::get_epsilon(),
2585 if ( not ( C::is_valid_symbol(sp.first) &&
2586 C::is_valid_symbol(sp.second) ) ) {
2588 (EmptyStringException,
2589 "HfstTransitionGraph::substitute(const HfstSymbolPair&, "
2590 "const HfstTransitionGraph&)");
2596 if (alphabet.find(sp.first) == alphabet.end() &&
2597 alphabet.find(sp.second) == alphabet.end())
2602 std::vector<substitution_data> substitutions;
2606 for (iterator it =
begin(); it !=
end(); it++)
2610 std::vector<typename HfstTransitions::iterator>
2614 for (
typename HfstTransitions::iterator tr_it
2616 tr_it != it->end(); tr_it++)
2618 C data = tr_it->get_transition_data();
2622 if (data.get_input_symbol() == sp.first &&
2623 data.get_output_symbol() == sp.second)
2626 substitutions.push_back(substitution_data
2628 tr_it->get_target_state(),
2632 old_transitions.push_back(tr_it);
2639 for (
typename std::vector<
typename
2640 HfstTransitions::iterator>::iterator IT =
2641 old_transitions.begin();
2642 IT != old_transitions.end(); IT++) {
2651 for (
typename std::vector<substitution_data>::iterator IT
2652 = substitutions.begin();
2653 IT != substitutions.end(); IT++)
2655 add_substitution(*IT);
2673 std::string weight2marker(
float weight)
2675 std::ostringstream o;
2677 return std::string(
"@") + o.str() + std::string(
"@");
2685 for (
HfstState state = 0; state < limit; state++)
2688 std::stack<typename HfstTransitions::iterator>
2691 std::vector<HfstTransition <C> > new_transitions;
2694 for (
typename HfstTransitions::iterator tr_it
2695 = state_vector[state].
begin();
2696 tr_it != state_vector[state].end(); tr_it++)
2698 C data = tr_it->get_transition_data();
2702 if (data.get_weight() != 0 )
2705 new_transitions.push_back
2706 (HfstTransition <C> (tr_it->get_target_state(),
2707 data.get_input_symbol(),
2708 data.get_output_symbol(),
2709 data.get_weight()));
2711 old_transitions.push(tr_it);
2718 while (! old_transitions.empty()) {
2719 state_vector[state].erase(old_transitions.top());
2720 old_transitions.pop();
2724 for (
typename std::vector<HfstTransition <C> >::iterator IT
2725 = new_transitions.begin();
2726 IT != new_transitions.end(); IT++)
2729 std::string marker = weight2marker(IT->get_weight());
2730 std::cerr <<
"got marker '" << marker <<
"'" << std::endl;
2731 HfstTransition <C> marker_transition(IT->get_target_state(),
2735 HfstTransition <C> new_transition(new_state,
2736 IT->get_input_symbol(),
2737 IT->get_output_symbol(),
2747 std::set<HfstState> final_states_to_remove;
2749 for (
typename FinalWeightMap::iterator fin_it = final_weight_map.begin();
2750 fin_it != final_weight_map.end(); fin_it++)
2752 if (fin_it->second != 0)
2756 std::string marker = weight2marker(fin_it->second);
2757 HfstTransition <C> epsilon_transition(new_state,
2762 final_states_to_remove.insert(fin_it->first);
2765 for (std::set<HfstState>::iterator it = final_states_to_remove.begin();
2766 it != final_states_to_remove.end(); it++)
2768 final_weight_map.erase(*it);
2777 typedef std::map<HfstSymbol, HfstTransitionGraph> SubstMap;
2783 bool symbol_found =
false;
2784 for (
typename SubstMap::const_iterator it = substitution_map.begin();
2785 it != substitution_map.end(); it++)
2787 if ( not ( C::is_valid_symbol(it->first) ))
2790 "HfstTransitionGraph::substitute "
2791 "(const std::map<HfstSymbol, HfstTransitionGraph> &)");
2793 if (!symbol_found && alphabet.find(it->first) != alphabet.end())
2795 symbol_found =
true;
2806 std::set<String> substitutions_performed_for_symbols;
2810 std::vector<substitution_data> substitutions;
2814 for (iterator it =
begin(); it !=
end(); it++)
2818 std::stack<typename HfstTransitions::iterator>
2822 for (
typename HfstTransitions::iterator tr_it
2824 tr_it != it->end(); tr_it++)
2826 C data = tr_it->get_transition_data();
2830 String istr = data.get_input_symbol();
2831 String ostr = data.get_output_symbol();
2832 typename SubstMap::iterator map_it_input = substitution_map.find(istr);
2833 typename SubstMap::iterator map_it_output = substitution_map.find(ostr);
2835 if (map_it_input == substitution_map.end() &&
2836 map_it_output == substitution_map.end())
2840 else if (istr != ostr)
2842 std::string msg(
"symbol to be substituted must not occur only on one side of transition");
2848 substitution_data sd
2850 tr_it->get_target_state(),
2852 &(map_it_input->second));
2853 substitutions.push_back(sd);
2855 old_transitions.push(tr_it);
2857 substitutions_performed_for_symbols.insert(istr);
2864 while (!old_transitions.empty())
2866 it->erase(old_transitions.top());
2867 old_transitions.pop();
2875 for (StringSet::const_iterator sym_it = substitutions_performed_for_symbols.begin();
2876 sym_it != substitutions_performed_for_symbols.end(); sym_it++)
2878 if (*sym_it !=
"@_EPSILON_SYMBOL_@" && *sym_it !=
"@_UNKNOWN_SYMBOL_@" && *sym_it !=
"@_IDENTITY_SYMBOL_@")
2885 for (StringSet::iterator sym_it = substitutions_performed_for_symbols.begin();
2886 sym_it != substitutions_performed_for_symbols.end(); sym_it++)
2888 this->
harmonize(substitution_map.at(*sym_it));
2893 for (
typename std::vector<substitution_data>::iterator IT
2894 = substitutions.begin();
2895 IT != substitutions.end(); IT++)
2897 add_substitution(*IT);
2905 bool marker2weight(
const std::string & str,
float & weight)
2909 if (str.at(0) !=
'@' || str.at(str.size()-1) !=
'@')
2912 std::string weight_string = str.substr(1, str.size()-2);
2913 std::stringstream sstream(weight_string);
2915 if (sstream.fail()) {
2926 for (
HfstState state = 0; state < limit; state++)
2929 std::stack<typename HfstTransitions::iterator>
2932 std::vector<HfstTransition <C> > new_transitions;
2935 for (
typename HfstTransitions::iterator tr_it
2936 = state_vector[state].
begin();
2937 tr_it != state_vector[state].end(); tr_it++)
2939 C data = tr_it->get_transition_data();
2944 if ( (!marker2weight(data.get_input_symbol(), weight)) &&
2945 marker2weight(data.get_output_symbol(), weight) )
2947 std::cerr <<
"got weight '" << weight <<
"'" << std::endl;
2949 new_transitions.push_back
2950 (HfstTransition <C> (tr_it->get_target_state(),
2951 data.get_input_symbol(),
2952 hfst::internal_epsilon,
2955 old_transitions.push(tr_it);
2958 else if (marker2weight(data.get_input_symbol(), weight) &&
2959 marker2weight(data.get_output_symbol(), weight) )
2961 std::cerr <<
"got weight '" << weight <<
"'" << std::endl;
2963 old_transitions.push(tr_it);
2971 while (! old_transitions.empty()) {
2972 state_vector[state].erase(old_transitions.top());
2973 old_transitions.pop();
2977 for (
typename std::vector<HfstTransition <C> >::iterator IT
2978 = new_transitions.begin();
2979 IT != new_transitions.end(); IT++)
2981 state_vector[state].push_back(*IT);
2987 std::stack<StringSet::iterator> weight_markers;
2988 for (StringSet::iterator it = alphabet.begin();
2989 it != alphabet.end(); it++)
2992 if (marker2weight(*it, foo))
2994 weight_markers.push(it);
2997 while (! weight_markers.empty())
2999 alphabet.erase(weight_markers.top());
3000 weight_markers.pop();
3019 if ( not ( C::is_valid_symbol(symbol_pair.first) &&
3020 C::is_valid_symbol(symbol_pair.second) ) ) {
3022 (EmptyStringException,
3023 "HfstTransitionGraph::insert_freely"
3024 "(const HfstSymbolPair&, W)");
3027 alphabet.insert(symbol_pair.first);
3028 alphabet.insert(symbol_pair.second);
3031 for (iterator it =
begin(); it !=
end(); it++) {
3033 symbol_pair.second, weight );
3045 typename C::WeightType weight)
3047 for (
typename HfstSymbolPairSet::const_iterator symbols_it
3048 = symbol_pairs.begin();
3049 symbols_it != symbol_pairs.end(); symbols_it++)
3051 if ( not ( C::is_valid_symbol(symbols_it->first) &&
3052 C::is_valid_symbol(symbols_it->second) ) ) {
3054 (EmptyStringException,
3055 "HfstTransitionGraph::insert_freely"
3056 "(const HfstSymbolPairSet&, W)");
3059 alphabet.insert(symbols_it->first);
3060 alphabet.insert(symbols_it->second);
3064 for (iterator it =
begin(); it !=
end(); it++)
3066 for (
typename HfstSymbolPairSet::const_iterator symbols_it
3067 = symbol_pairs.begin();
3068 symbols_it != symbol_pairs.end(); symbols_it++)
3071 symbols_it->second, weight );
3085 HfstSymbol marker_this = C::get_marker(alphabet);
3086 HfstSymbol marker_graph = C::get_marker(alphabet);
3088 if (marker_graph > marker)
3089 marker = marker_graph;
3094 alphabet.erase(marker);
3132 HarmonizeUnknownAndIdentitySymbols foo(*
this, another);
3148 StringPairVector::const_iterator &it,
3152 if (it == spv.end()) {
3157 bool transition_found=
false;
3163 for (
typename HfstTransitions::iterator tr_it = tr.begin();
3164 tr_it != tr.end(); tr_it++)
3166 C data = tr_it->get_transition_data();
3167 if (data.get_input_symbol().compare(it->first) == 0 &&
3168 data.get_output_symbol().compare(it->second) == 0)
3170 transition_found=
true;
3171 next_state = tr_it->get_target_state();
3177 if (not transition_found)
3180 HfstTransition <C> transition(next_state, it->first,
3187 return disjunct(spv, it, next_state);
3214 StringPairVector::const_iterator it = spv.begin();
3215 HfstState final_state = disjunct(spv, it, INITIAL_STATE);
3221 if (old_weight < weight)
3228 bool is_special_symbol(
const std::string & symbol)
3230 if (symbol.size() < 2)
3232 if (symbol[0] ==
'@' && symbol[1] ==
'_')
3242 for (iterator it =
begin(); it !=
end(); it++)
3244 std::set<HfstSymbol> symbols_present;
3246 for (
typename HfstTransitions::iterator tr_it
3248 tr_it != it->end(); tr_it++)
3250 C data = tr_it->get_transition_data();
3252 if (data.get_input_symbol() != data.get_output_symbol())
3256 symbols_present.insert(data.get_input_symbol());
3258 for (std::set<std::string>::const_iterator alpha_it = alphabet.begin();
3259 alpha_it != alphabet.end(); alpha_it++)
3261 if (symbols_present.find(*alpha_it) ==
3262 symbols_present.end() &&
3263 ! is_special_symbol(*alpha_it))
3274 StringSet get_flags()
const
3277 for (StringSet::const_iterator it = alphabet.begin();
3278 it != alphabet.end(); it++)
3280 if (FdOperation::is_diacritic(*it)) {
3290 bool purge_symbol(
const std::string & symbol,
const std::string & flag)
3292 if (! FdOperation::is_diacritic(symbol))
3296 else if (FdOperation::get_feature(symbol) == flag)
3304 void flag_purge(
const std::string & flag)
3307 for (iterator it =
begin(); it !=
end(); it++)
3309 for (
unsigned int i=0; i < it->size(); i++)
3311 HfstTransition<C> &tr_it = it->operator[](i);
3313 if ( purge_symbol(tr_it.get_input_symbol(), flag) ||
3314 purge_symbol(tr_it.get_output_symbol(), flag) )
3317 HfstTransition<C> tr
3318 (tr_it.get_target_state(),
"@_EPSILON_SYMBOL_@",
3319 "@_EPSILON_SYMBOL_@", tr_it.get_weight());
3320 it->operator[](i) = tr;
3325 StringSet extra_symbols;
3326 for (StringSet::const_iterator it = alphabet.begin();
3327 it != alphabet.end(); it++)
3329 if (purge_symbol(*it, flag))
3330 extra_symbols.insert(*it);
3333 remove_symbols_from_alphabet(extra_symbols);
3337 struct TopologicalSort
3339 std::vector<int> distance_of_state;
3340 std::vector<std::set<HfstState> > states_at_distance;
3344 void set_biggest_state_number(
unsigned int biggest_state_number)
3346 distance_of_state = std::vector<int>(biggest_state_number+1, -1);
3350 void set_state_at_distance(
HfstState state,
unsigned int distance,
3354 if (state > (distance_of_state.size() - 1))
3356 std::cerr <<
"ERROR in TopologicalSort::set_state_at_distance: first argument ("
3357 << state <<
") is out of range (should be < " << distance_of_state.size()
3358 <<
")" << std::endl;
3362 while (distance + 1 > (
unsigned int)states_at_distance.size())
3364 std::set<HfstState> empty_set;
3365 states_at_distance.push_back(empty_set);
3368 int previous_distance = distance_of_state.at(state);
3369 if (previous_distance != -1 && previous_distance != distance && overwrite)
3371 states_at_distance.at(previous_distance).erase(state);
3374 states_at_distance.at(distance).insert(state);
3375 distance_of_state.at(state) = distance;
3379 const std::set<HfstState> & get_states_at_distance(
unsigned int distance)
3383 while (distance > (states_at_distance.size() - 1))
3385 std::set<HfstState> empty_set;
3386 states_at_distance.push_back(empty_set);
3388 return states_at_distance.at(distance);
3392 enum SortDistance { MaximumDistance, MinimumDistance };
3400 std::vector<std::set<HfstState> > topsort(SortDistance dist)
const
3402 typedef std::set<HfstState>::const_iterator StateIt;
3403 unsigned int current_distance = 0;
3404 TopologicalSort TopSort;
3405 TopSort.set_biggest_state_number(state_vector.size()-1);
3406 TopSort.set_state_at_distance(0,current_distance,(dist == MaximumDistance));
3407 bool new_states_found =
false;
3411 new_states_found =
false;
3413 std::set<HfstState> new_states;
3416 const std::set<HfstState> &
states =
3417 TopSort.get_states_at_distance(current_distance);
3418 for (StateIt state_it = states.begin();
3419 state_it != states.end(); state_it++)
3423 = this->state_vector.at(*state_it);
3424 for (
typename HfstTransitions::const_iterator transition_it
3425 = transitions.begin();
3426 transition_it != transitions.end(); transition_it++)
3428 new_states_found =
true;
3429 new_states.insert(transition_it->get_target_state());
3437 for (StateIt it = new_states.begin();
3438 it != new_states.end(); it++)
3440 TopSort.set_state_at_distance(*it, current_distance + 1, (dist == MaximumDistance));
3444 while (new_states_found);
3446 return TopSort.states_at_distance;
3454 std::vector<std::set<HfstState> > states_sorted = this->topsort(MaximumDistance);
3456 for (
int distance = states_sorted.size() - 1; distance >= 0; distance--)
3458 const std::set<HfstState> & states
3459 = states_sorted.at((
unsigned int)distance);
3461 for (std::set<HfstState>::const_iterator it = states.begin();
3462 it != states.end(); it++)
3480 std::vector<unsigned int> result;
3482 std::vector<std::set<HfstState> > states_sorted = this->topsort(MinimumDistance);
3484 for (
int distance = states_sorted.size() - 1; distance >= 0; distance--)
3486 const std::set<HfstState> & states
3487 = states_sorted.at((
unsigned int)distance);
3489 for (std::set<HfstState>::const_iterator it = states.begin();
3490 it != states.end(); it++)
3496 result.push_back((
unsigned int)distance);
3504 bool is_infinitely_ambiguous
3506 std::set<HfstState> &epsilon_path_states,
3507 std::vector<unsigned int> &states_handled)
3509 if (states_handled[state] != 0)
3516 = transitions.begin();
3517 it != transitions.end(); it++)
3522 if ( is_epsilon(it->get_input_symbol()) ||
3523 FdOperation::is_diacritic(it->get_input_symbol()) )
3525 epsilon_path_states.insert(state);
3526 if (epsilon_path_states.find(it->get_target_state())
3527 != epsilon_path_states.end())
3531 if (is_infinitely_ambiguous
3532 (it->get_target_state(), epsilon_path_states, states_handled))
3536 epsilon_path_states.erase(state);
3540 states_handled[state] = 1;
3544 bool is_infinitely_ambiguous()
3546 std::set<HfstState> epsilon_path_states;
3548 std::vector<unsigned int> states_handled(max_state+1, 0);
3550 for (
unsigned int state = INITIAL_STATE; state < (max_state+1); state++)
3552 if (is_infinitely_ambiguous(state, epsilon_path_states, states_handled))
3558 bool is_lookup_infinitely_ambiguous
3561 std::set<HfstState> &epsilon_path_states
3565 bool only_epsilons=
false;
3566 if ((
unsigned int)s.second.size() == index)
3574 for (HfstBasicTransducer::HfstTransitions::const_iterator it
3575 = transitions.begin();
3576 it != transitions.end(); it++)
3583 if ( is_epsilon(it->get_input_symbol()) ||
3584 FdOperation::is_diacritic(it->get_input_symbol()) )
3586 epsilon_path_states.insert(state);
3587 if (epsilon_path_states.find(it->get_target_state())
3588 != epsilon_path_states.end())
3592 if (is_lookup_infinitely_ambiguous
3593 (s, index, it->get_target_state(), epsilon_path_states))
3597 epsilon_path_states.erase(state);
3603 else if (not only_epsilons)
3605 bool continu =
false;
3606 if (it->get_input_symbol().compare(s.second.at(index)) == 0)
3608 else if ((it->get_input_symbol().compare(
"@_UNKNOWN_SYMBOL_@") ||
3609 it->get_input_symbol().compare(
"@_IDENTITY_SYMBOL_@"))
3611 (alphabet.find(s.second.at(index)) == alphabet.end()))
3619 std::set<HfstState> empty_set;
3620 if (is_lookup_infinitely_ambiguous
3621 (s, index, it->get_target_state(), empty_set))
3634 std::set<HfstState> epsilon_path_states;
3635 epsilon_path_states.insert(0);
3636 unsigned int index=0;
3638 return is_lookup_infinitely_ambiguous(s, index, INITIAL_STATE,
3639 epsilon_path_states);
3642 bool is_lookup_infinitely_ambiguous(
const StringVector & s)
3644 std::set<HfstState> epsilon_path_states;
3645 epsilon_path_states.insert(0);
3646 unsigned int index=0;
3649 return is_lookup_infinitely_ambiguous(path, index, INITIAL_STATE,
3650 epsilon_path_states);
3655 static void push_back_to_two_level_path
3658 const float &weight)
3660 path.second.push_back(sp);
3661 path.first = path.first + weight;
3664 static void pop_back_from_two_level_path
3666 const float &weight)
3668 path.second.pop_back();
3669 path.first = path.first - weight;
3672 static void add_to_results
3675 const float &final_weight,
3678 path_so_far.first = path_so_far.first + final_weight;
3680 if (max_weight == NULL)
3682 results.insert(path_so_far);
3684 else if (!(path_so_far.first > *max_weight))
3686 results.insert(path_so_far);
3692 path_so_far.first = path_so_far.first - final_weight;
3695 static bool is_possible_transition
3697 const StringVector &lookup_path,
3698 const unsigned int &lookup_index,
3699 const StringSet &alphabet,
3700 bool &input_symbol_consumed)
3702 std::string isymbol = transition.get_input_symbol();
3705 if (not (lookup_index == (
unsigned int)lookup_path.size()))
3710 if ( isymbol.compare(lookup_path.at(lookup_index)) == 0 ||
3714 ( (is_identity(isymbol) || is_unknown(isymbol)) &&
3715 (alphabet.find(lookup_path.at(lookup_index))
3716 == alphabet.end()) )
3719 input_symbol_consumed=
true;
3726 if ( is_epsilon(isymbol) ||
3727 FdOperation::is_diacritic(isymbol) )
3729 input_symbol_consumed=
false;
3738 (
const StringVector &lookup_path,
3741 unsigned int lookup_index,
3743 StringSet &alphabet,
3744 HfstEpsilonHandler Eh,
3745 size_t infinite_cutoff,
3746 float * max_weight = NULL)
3749 if (not Eh.can_continue(state)) {
3753 if (max_weight != NULL && path_so_far.first > *max_weight) {
3758 if (lookup_index == lookup_path.size())
3773 for (HfstBasicTransducer::HfstTransitions::const_iterator it
3774 = transitions.begin();
3775 it != transitions.end(); it++)
3777 bool input_symbol_consumed=
false;
3778 if ( is_possible_transition
3779 (*it, lookup_path, lookup_index, alphabet,
3780 input_symbol_consumed) )
3787 if (is_identity(it->get_input_symbol()))
3789 istr = lookup_path.at(lookup_index);
3794 if (is_unknown(it->get_input_symbol()))
3795 istr = lookup_path.at(lookup_index);
3797 istr = it->get_input_symbol();
3802 ostr = it->get_output_symbol();
3805 push_back_to_two_level_path
3810 HfstEpsilonHandler * Ehp = NULL;
3811 if (input_symbol_consumed) {
3813 Ehp =
new HfstEpsilonHandler(infinite_cutoff);
3816 Eh.push_back(state);
3821 lookup_fd(lookup_path, results, it->get_target_state(),
3822 lookup_index, path_so_far, alphabet, *Ehp, infinite_cutoff, max_weight);
3826 if (input_symbol_consumed) {
3835 pop_back_from_two_level_path(path_so_far, it->get_weight());
3842 (
const StringVector &lookup_path,
3844 size_t infinite_cutoff,
3845 float * max_weight = NULL)
3848 unsigned int lookup_index = 0;
3851 HfstEpsilonHandler Eh(infinite_cutoff);
3852 lookup_fd(lookup_path, results, state, lookup_index, path_so_far,
3853 alphabet, Eh, infinite_cutoff, max_weight);
3879 friend class ConversionFunctions;
3880 friend class hfst::HarmonizeUnknownAndIdentitySymbols;
3887 typedef HfstTransitionGraph <HfstTropicalTransducerTransitionData>
3891 typedef HfstTransitionGraph <HfstFastTransitionData>
3899 #endif // #ifndef _HFST_TRANSITION_GRAPH_H_
C::SymbolType get_output_symbol() const
Get the output symbol of the transition.
Definition: HfstTransition.h:96
C::SymbolType get_input_symbol() const
Get the input symbol of the transition.
Definition: HfstTransition.h:91
iterator begin()
Get an iterator to the beginning of the states in the graph.
Definition: HfstTransitionGraph.h:660
HfstTransitionGraph & insert_freely(const HfstSymbolPair &symbol_pair, typename C::WeightType weight)
Insert freely any number of symbol_pair in the graph with weight weight.
Definition: HfstTransitionGraph.h:3017
int longest_path_size()
Definition: HfstTransitionGraph.h:3451
std::pair< String, String > StringPair
A symbol pair in a transition.
Definition: HfstSymbolDefs.h:71
HfstTransitionGraph< HfstFastTransitionData > HfstFastTransducer
A specialization for faster conversion.
Definition: HfstDataTypes.h:117
HfstTransitionGraph(void)
Create a graph with one initial state that has state number zero and is not a final state...
Definition: HfstTransitionGraph.h:198
std::string String
A UTF-8 symbol in a transition.
Definition: HfstSymbolDefs.h:60
HfstTransitionGraph & sort_arcs(void)
Sort the arcs of this transducer according to input and output symbols.
Definition: HfstTransitionGraph.h:643
const HfstTransitions & operator[](HfstState s) const
Get the set of transitions of state s in this graph.
Definition: HfstTransitionGraph.h:680
std::vector< std::pair< std::string, std::string > > StringPairVector
A vector of string pairs.
Definition: HfstDataTypes.h:106
HfstTransitionGraph(const HfstTransitionGraph &graph)
Create a deep copy of HfstTransitionGraph graph.
Definition: HfstTransitionGraph.h:239
void prune_alphabet(bool force=true)
Remove all symbols that do not occur in transitions of the graph from its alphabet.
Definition: HfstTransitionGraph.h:448
HfstState get_max_state() const
Get the biggest state number in use.
Definition: HfstTransitionGraph.h:547
unsigned int HfstState
The number of a state in an HfstTransitionGraph.
Definition: HfstDataTypes.h:120
A synchronous finite-state transducer.
Definition: HfstTransducer.h:227
const HfstTransitionGraphAlphabet & get_alphabet() const
Get the set of HfstSymbols in the alphabet of the graph.
Definition: HfstTransitionGraph.h:496
HfstState get_target_state() const
Get the target state of the transition.
Definition: HfstTransition.h:81
void write_in_xfst_format(std::ostream &os, bool write_weights=true)
Write the graph in xfst text format to ostream os. write_weights defines whether weights are printed ...
Definition: HfstTransitionGraph.h:893
void write_in_att_format_number(FILE *file, bool write_weights=true)
Write the graph in AT&T format to FILE file using numbers instead of symbol names. write_weights defines whether weights are printed.
Definition: HfstTransitionGraph.h:1706
State is not final (and cannot have a final weight).
Definition: HfstExceptionDefs.h:250
std::vector< unsigned int > path_sizes()
Definition: HfstTransitionGraph.h:3478
bool is_final_state(HfstState s) const
Whether state s is final. FIXME: return positive infinity instead if not final.
Definition: HfstTransitionGraph.h:620
std::set< HfstSymbol > HfstSymbolSet
A set of symbol pairs.
Definition: HfstTransitionGraph.h:138
HfstTransitionGraph(const hfst::HfstTransducer &transducer)
Create an HfstTransitionGraph equivalent to HfstTransducer transducer. FIXME: move to a separate file...
Definition: HfstTransitionGraph.h:248
void add_transition(HfstState s, const HfstTransition< C > &transition, bool add_symbols_to_alphabet=true)
Add a transition transition to state s.
Definition: HfstTransitionGraph.h:554
Base class for HfstExceptions. Holds its own name and the file and line number where it was thrown...
Definition: HfstExceptionDefs.h:15
A simple transition graph format that consists of states and transitions between those states...
Definition: HfstDataTypes.h:113
std::vector< HfstTransition< C > > HfstTransitions
Datatype for the states of a transition in a graph.
Definition: HfstTransitionGraph.h:128
HfstStates::const_iterator const_iterator
A const iterator type that points a state in a graph.
Definition: HfstTransitionGraph.h:179
void write_in_att_format(FILE *file, bool write_weights=true)
Write the graph in AT&T format to FILE file. write_weights defines whether weights are printed...
Definition: HfstTransitionGraph.h:1603
void add_symbol_to_alphabet(const HfstSymbol &symbol)
Explicitly add symbol to the alphabet of the graph.
Definition: HfstTransitionGraph.h:346
void write_in_att_format(std::ostream &os, bool write_weights=true)
Write the graph in AT&T format to ostream os. write_weights defines whether weights are printed...
Definition: HfstTransitionGraph.h:1556
const C & get_transition_data() const
Get the transition data of the transition.
Definition: HfstTransition.h:86
#define HFST_THROW(E)
Macro to throw an exception of type E. Use THROW instead of regular throw with subclasses of HfstExce...
Definition: HfstExceptionDefs.h:28
std::set< HfstTwoLevelPath > HfstTwoLevelPaths
A set of two-level weighted paths.
Definition: HfstDataTypes.h:110
std::string name
The name of the graph.
Definition: HfstTransitionGraph.h:182
The stream is at end.
Definition: HfstExceptionDefs.h:149
std::vector< HfstSymbolPair > HfstSymbolPairVector
A vector of symbol pairs.
Definition: HfstTransitionGraph.h:140
The StateId argument is not valid.
Definition: HfstExceptionDefs.h:304
const_iterator end() const
Get a const iterator to the end of states (last state + 1) in the graph.
Definition: HfstTransitionGraph.h:672
HfstTransitionGraph & substitute(const HfstSymbol &old_symbol, const HfstSymbol &new_symbol, bool input_side=true, bool output_side=true)
Substitute old_symbol with new_symbol in all transitions. input_side and output_side define whether t...
Definition: HfstTransitionGraph.h:2311
C::WeightType get_final_weight(HfstState s) const
Definition: HfstTransitionGraph.h:625
C::SymbolType HfstSymbol
Datatype for a symbol in a transition.
Definition: HfstTransitionGraph.h:131
void remove_transition(HfstState s, const HfstTransition< C > &transition, bool remove_symbols_from_alphabet=false)
Remove transition transition from state s. remove_symbols_from_alphabet defines whether symbols in tr...
Definition: HfstTransitionGraph.h:574
std::set< StringPair > StringPairSet
A set of symbol pairs used in substituting symbol pairs and in rule functions.
Definition: HfstSymbolDefs.h:83
void add_symbols_to_alphabet(const HfstSymbolSet &symbols)
Same as add_symbol_to_alphabet for each symbol in symbols.
Definition: HfstTransitionGraph.h:368
HfstTransitionGraph & harmonize(HfstTransitionGraph &another)
Harmonize this HfstTransitionGraph and another.
Definition: HfstTransitionGraph.h:3130
const HfstTransitions & transitions(HfstState s) const
Alternative name for operator[].
Definition: HfstTransitionGraph.h:693
std::pair< float, StringVector > HfstOneLevelPath
A path of one level of arcs with collected weight.
Definition: HfstDataTypes.h:97
Transducers are not automata.
Definition: HfstExceptionDefs.h:291
void write_in_prolog_format(std::ostream &os, const std::string &name, bool write_weights=true)
Write the graph in prolog format to ostream os. write_weights defines whether weights are printed (to...
Definition: HfstTransitionGraph.h:1056
std::pair< HfstSymbol, HfstSymbol > HfstSymbolPair
Datatype for a symbol pair in a transition.
Definition: HfstTransitionGraph.h:134
void write_in_prolog_format(FILE *file, const std::string &name, bool write_weights=true)
Write the graph in prolog format to FILE file. write_weights defines whether weights are printed (tod...
Definition: HfstTransitionGraph.h:994
std::vector< HfstState > states() const
The states of the graph.
Definition: HfstTransitionGraph.h:185
#define HFST_THROW_MESSAGE(E, M)
Macro to throw an exception of type E with message M. Use THROW instead of regular throw with subclas...
Definition: HfstExceptionDefs.h:35
std::map< String, String > HfstSymbolSubstitutions
A map of substitutions used when performing multiple symbol-to-symbol substitutions.
Definition: HfstSymbolDefs.h:89
iterator end()
Get an iterator to the end of states (last state + 1) in the graph.
Definition: HfstTransitionGraph.h:668
std::pair< float, StringPairVector > HfstTwoLevelPath
A path of two level of arcs with collected weight.
Definition: HfstDataTypes.h:108
HfstTransition< HfstTropicalTransducerTransitionData > HfstBasicTransition
An HfstTransition with transition data of type HfstTropicalTransducerTransitionData.
Definition: HfstDataTypes.h:122
C::WeightType get_weight() const
Get the weight of the transition.
Definition: HfstTransition.h:111
HfstTransitionGraph & substitute(bool(*func)(const HfstSymbolPair &sp, HfstSymbolPairSet &sps))
Substitute all transitions with a set of transitions as defined by function func. ...
Definition: HfstTransitionGraph.h:2474
const_iterator begin() const
Get a const iterator to the beginning of states in the graph.
Definition: HfstTransitionGraph.h:664
HfstState add_state(HfstState s)
Add a state s to this graph.
Definition: HfstTransitionGraph.h:538
void remove_symbol_from_alphabet(const HfstSymbol &symbol)
Remove symbol symbol from the alphabet of the graph.
Definition: HfstTransitionGraph.h:354
std::set< HfstSymbol > HfstTransitionGraphAlphabet
Datatype for the alphabet of a graph.
Definition: HfstTransitionGraph.h:142
HfstTransitionGraph & substitute(const HfstSymbolPair &sp, const HfstTransitionGraph &graph)
Substitute all transitions old_symbol : new_symbol with a copy of graph.
Definition: HfstTransitionGraph.h:2582
HfstTransitionGraph< HfstTropicalTransducerTransitionData > HfstBasicTransducer
An HfstTransitionGraph with transitions of type HfstTropicalTransducerTransitionData and weight type ...
Definition: HfstDataTypes.h:114
A transition that consists of a target state and transition data represented by class C...
Definition: HfstDataTypes.h:122
void write_in_xfst_format(FILE *file, bool write_weights=true)
Write the graph in xfst text format to FILE file. write_weights defines whether weights are printed (...
Definition: HfstTransitionGraph.h:1515
void set_final_weight(HfstState s, const typename C::WeightType &weight)
Set the final weight of state s in this graph to weight.
Definition: HfstTransitionGraph.h:635
HfstTransitionGraph & operator=(const HfstTransitionGraph &graph)
The assignment operator.
Definition: HfstTransitionGraph.h:222
std::map< StringPair, StringPair > HfstSymbolPairSubstitutions
A map of substitutions used when performing multiple symbol pair-to-symbol pair substitutions.
Definition: HfstSymbolDefs.h:95
HfstState add_state(void)
Add a new state to this graph and return its number.
Definition: HfstTransitionGraph.h:526
std::set< HfstSymbolPair > HfstSymbolPairSet
A set of symbol pairs.
Definition: HfstTransitionGraph.h:136