HFST - Helsinki Finite-State Transducer Technology API  version 3.7.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HfstTransitionGraph.h
Go to the documentation of this file.
1  #ifndef _HFST_TRANSITION_GRAPH_H_
2  #define _HFST_TRANSITION_GRAPH_H_
3 
7  #include <cstdio>
8  #include <string>
9  #include <set>
10  #include <cassert>
11  #include <iostream>
12  #include <algorithm>
13  #include <stack>
14 
15  #include "../HfstSymbolDefs.h"
16  #include "../HfstExceptionDefs.h"
17  #include "../HfstDataTypes.h"
18  #include "../HarmonizeUnknownAndIdentitySymbols.h"
19  #include "../HfstFlagDiacritics.h"
20  #include "../HfstEpsilonHandler.h"
21  #include "ConvertTransducerFormat.h"
22  #include "HfstTransition.h"
23  #include "HfstTropicalTransducerTransitionData.h"
24  #include "HfstFastTransitionData.h"
25 
26  namespace hfst {
27 
28  class HfstFile {
29  private:
30  FILE * file;
31  public:
32  HfstFile();
33  ~HfstFile();
34  void set_file(FILE * f);
35  FILE * get_file();
36  void close();
37  void write(const char * str);
38  };
39 
40 
49  namespace implementations {
50 
52  typedef unsigned int HfstState;
53 
121  template <class C> class HfstTransitionGraph
122  {
123 
124  // --- Datatypes and variables ---
125 
126  public:
128  typedef std::vector<HfstTransition<C> > HfstTransitions;
129 
131  typedef typename C::SymbolType HfstSymbol;
133  typedef std::pair<HfstSymbol, HfstSymbol>
136  typedef std::set<HfstSymbolPair> HfstSymbolPairSet;
138  typedef std::set<HfstSymbol> HfstSymbolSet;
140  typedef std::vector<HfstSymbolPair> HfstSymbolPairVector;
142  typedef std::set<HfstSymbol> HfstTransitionGraphAlphabet;
143 
144  protected:
145  /* Datatype for the states of a graph and their transitions.
146  Each index of the vector is a state and the transitions
147  on that index are the transitions of that state. */
148  typedef std::vector<HfstTransitions> HfstStates;
149  /* States of the graph and their transitions. */
150  HfstStates state_vector;
151 
152  /* The initial state number. */
153  static const HfstState INITIAL_STATE = 0;
154 
155  /* Datatype for the final states and their weights in a graph. */
156  typedef std::map<HfstState,typename C::WeightType> FinalWeightMap;
157  /* The final states and their weights in the graph. */
158  FinalWeightMap final_weight_map;
159 
160  /* The alphabet of the graph. */
162 
163  /* Used by substitute function. */
164  typedef unsigned int HfstNumber;
165  typedef std::vector<HfstNumber> HfstNumberVector;
166  typedef std::pair<HfstNumber, HfstNumber> HfstNumberPair;
167  typedef std::map<HfstNumberPair, HfstNumberPair> HfstNumberPairSubstitutions;
168 
169  protected:
170  /* @brief An iterator type that points to a state in a graph.
171 
172  The value pointed by the iterator is of type HfstTransitions. */
173  typedef typename HfstStates::iterator iterator;
174 
175  public:
179  typedef typename HfstStates::const_iterator const_iterator;
180 
182  std::string name;
183 
185  std::vector<HfstState> states() const {
186  std::vector<HfstState> retval(this->get_max_state()+1, 0);
187  for (unsigned int i=0; i<(this->get_max_state()+1); i++)
188  retval[i] = i;
189  return retval;
190  }
191 
192  // --------------------------------------------------------
193  // --- Construction, assignment, copying and conversion ---
194  // --------------------------------------------------------
195 
199  initialize_alphabet(alphabet);
200  HfstTransitions tr;
201  state_vector.push_back(tr);
202  }
203 
204  HfstTransitionGraph(FILE *file) {
205  initialize_alphabet(alphabet);
206  HfstTransitions tr;
207  state_vector.push_back(tr);
208  unsigned int linecount=0;
209  this->assign(read_in_att_format(file, "@0@", linecount));
210  }
211 
212  HfstTransitionGraph(HfstFile &file) {
213  initialize_alphabet(alphabet);
214  HfstTransitions tr;
215  state_vector.push_back(tr);
216  unsigned int linecount=0;
217  this->assign(read_in_att_format(file.get_file(), "@0@", linecount));
218  }
219 
220 
223  {
224  if (this == &graph)
225  return *this;
226  state_vector = graph.state_vector;
227  final_weight_map = graph.final_weight_map;
228  alphabet = graph.alphabet;
229  assert(alphabet.count(HfstSymbol()) == 0);
230  return *this;
231  }
232 
233  HfstTransitionGraph &assign(const HfstTransitionGraph &graph)
234  {
235  return this->operator=(graph);
236  }
237 
240  state_vector = graph.state_vector;
241  final_weight_map = graph.final_weight_map;
242  alphabet = graph.alphabet;
243  assert(alphabet.count(HfstSymbol()) == 0);
244  }
245 
250  *fsm = ConversionFunctions::
251  hfst_transducer_to_hfst_basic_transducer(transducer);
252  state_vector = fsm->state_vector;
253  final_weight_map = fsm->final_weight_map;
254  alphabet = fsm->alphabet;
255  delete fsm;
256  }
257 
258 
259  // --------------------------------------------------
260  // --- Initialization, optimization and debugging ---
261  // --------------------------------------------------
262 
263  protected:
264  /* Add epsilon, unknown and identity symbols to the alphabet
265  \a alpha. */
266  void initialize_alphabet(HfstTransitionGraphAlphabet &alpha) {
267  alpha.insert(C::get_epsilon());
268  alpha.insert(C::get_unknown());
269  alpha.insert(C::get_identity());
270  }
271 
272  /* Check that all symbols that occur in the transitions of the graph
273  are also in the alphabet. */
274  bool check_alphabet()
275  {
276  for (iterator it = begin(); it != end(); it++)
277  {
278  for (typename HfstTransitions::iterator tr_it
279  = it->begin();
280  tr_it != it->end(); tr_it++)
281  {
282  C data = tr_it->get_transition_data();
283 
284  if(alphabet.find(data.get_input_symbol())
285  == alphabet.end()) {
286  return false;
287  }
288  if(alphabet.find(data.get_output_symbol())
289  == alphabet.end()) {
290  return false;
291  }
292  }
293  }
294  return true;
295  }
296 
297  public:
298  /* Print the alphabet of the graph to standard error stream. */
299  void print_alphabet() const
300  {
301  for (typename HfstTransitionGraphAlphabet::const_iterator it
302  = alphabet.begin(); it != alphabet.end(); it++)
303  {
304  if (it != alphabet.begin())
305  std::cerr << ", ";
306  std::cerr << *it;
307  }
308  std::cerr << std::endl;
309  }
310 
311  protected:
312  /* Get the number of the \a symbol. */
313  unsigned int get_symbol_number
314  (const HfstSymbol &symbol) const {
315  return C::get_number(symbol);
316  }
317 
318  /* For internal optimization: Reserve space for
319  \a number_of_states states. */
320  void initialize_state_vector
321  (unsigned int number_of_states)
322  {
323  state_vector.reserve(number_of_states);
324  }
325 
326  /* For internal optimization: Reserve space for
327  \a number_of_transitions transitions for state number
328  \a state_number. */
329  void initialize_transition_vector
330  (unsigned int state_number, unsigned int number_of_transitions)
331  {
332  add_state(state_number);
333  state_vector[state_number].reserve(number_of_transitions);
334  }
335 
336 
337  // -----------------------------------
338  // ---------- The alphabet -----------
339  // -----------------------------------
340 
341  public:
346  void add_symbol_to_alphabet(const HfstSymbol &symbol) {
347  alphabet.insert(symbol);
348  }
349 
355  alphabet.erase(symbol);
356  }
357 
358  void remove_symbols_from_alphabet(const HfstSymbolSet &symbols) {
359  for (typename HfstSymbolSet::const_iterator it = symbols.begin();
360  it != symbols.end(); it++)
361  {
362  alphabet.erase(*it);
363  }
364  }
365 
369  {
370  for (typename HfstSymbolSet::const_iterator it = symbols.begin();
371  it != symbols.end(); it++)
372  {
373  alphabet.insert(*it);
374  }
375  }
376 
377  void add_symbols_to_alphabet(const HfstSymbolPairSet &symbols)
378  {
379  for (typename HfstSymbolPairSet::const_iterator it = symbols.begin();
380  it != symbols.end(); it++)
381  {
382  alphabet.insert(it->first);
383  alphabet.insert(it->second);
384  }
385  }
386 
387  /* Remove all symbols that are given in \a symbols but do not occur
388  in transitions of the graph from its alphabet. */
389  void prune_alphabet_after_substitution(const std::set<unsigned int> &symbols)
390  {
391  if (symbols.size() == 0)
392  return;
393 
394  std::vector<bool> symbols_found;
395  symbols_found.resize
396  (C::get_max_number()+1, false);
397 
398  // Go through all transitions
399  for (iterator it = begin(); it != end(); it++)
400  {
401  for (typename HfstTransitions::iterator tr_it
402  = it->begin();
403  tr_it != it->end(); tr_it++)
404  {
405  const C & data = tr_it->get_transition_data();
406  symbols_found.at(data.get_input_number()) = true;
407  symbols_found.at(data.get_output_number()) = true;
408  }
409  }
410 
411  // Remove symbols in \a symbols from the alphabet if they did not
412  // occur in any transitions
413  for (std::set<unsigned int>::const_iterator it = symbols.begin();
414  it != symbols.end(); it++)
415  {
416  if (! symbols_found.at(*it))
417  alphabet.erase(C::get_symbol(*it));
418  }
419 
420  }
421 
422  HfstTransitionGraphAlphabet symbols_used()
423  {
425  for (iterator it = begin(); it != end(); it++)
426  {
427  for (typename HfstTransitions::iterator tr_it
428  = it->begin();
429  tr_it != it->end(); tr_it++)
430  {
431  C data = tr_it->get_transition_data();
432 
433  retval.insert(data.get_input_symbol());
434  retval.insert(data.get_output_symbol());
435  }
436  }
437  return retval;
438  }
439 
448  void prune_alphabet(bool force=true) {
449 
450  // Which symbols occur in the graph
451  HfstTransitionGraphAlphabet symbols_found = symbols_used();
452 
453  // Whether unknown or identity symbols are used
454  bool unknowns_or_identities_used =
455  ( (symbols_found.find("@_UNKNOWN_SYMBOL_@") != symbols_found.end()) ||
456  (symbols_found.find("@_IDENTITY_SYMBOL_@") != symbols_found.end()) );
457 
458  // We cannot prune the transducer because unknowns or identities
459  // are used in its transitions.
460  if (!force && unknowns_or_identities_used)
461  return;
462 
463  // Special symbols are always known
464  symbols_found.insert("@_EPSILON_SYMBOL_@");
465  symbols_found.insert("@_UNKNOWN_SYMBOL_@");
466  symbols_found.insert("@_IDENTITY_SYMBOL_@");
467 
468  // Which symbols in the graph's alphabet did not occur in
469  // the graph
470  HfstTransitionGraphAlphabet symbols_not_found;
471 
472  for (typename HfstTransitionGraphAlphabet::iterator it
473  = alphabet.begin();
474  it != alphabet.end(); it++)
475  {
476  if (symbols_found.find(*it) == symbols_found.end())
477  symbols_not_found.insert(*it);
478  }
479 
480  // Remove the symbols that did not occur in the graph
481  // from its alphabet
482  for (typename HfstTransitionGraphAlphabet::iterator it
483  = symbols_not_found.begin();
484  it != symbols_not_found.end(); it++)
485  {
486  alphabet.erase(*it);
487  }
488  }
489 
497  return alphabet;
498  }
499 
500  StringPairSet get_transition_pairs() const {
501 
502  StringPairSet retval;
503  for (const_iterator it = begin(); it != end(); it++)
504  {
505  for (typename HfstTransitions::const_iterator tr_it
506  = it->begin();
507  tr_it != it->end(); tr_it++)
508  {
509  C data = tr_it->get_transition_data();
510 
511  retval.insert(StringPair(data.get_input_symbol(),
512  data.get_output_symbol()));
513  }
514  }
515  return retval;
516  }
517 
518 
519  // ----------------------------------------------------------------
520  // --- Adding states and transitions and iterating through them ---
521  // ----------------------------------------------------------------
522 
527  HfstTransitions tr;
528  state_vector.push_back(tr);
529  return state_vector.size()-1;
530  }
531 
539  while(state_vector.size() <= s) {
540  HfstTransitions tr;
541  state_vector.push_back(tr);
542  }
543  return s;
544  }
545 
548  return state_vector.size()-1;
549  }
550 
554  void add_transition(HfstState s, const HfstTransition<C> & transition,
555  bool add_symbols_to_alphabet=true) {
556 
557  C data = transition.get_transition_data();
558 
559  add_state(s);
560  add_state(transition.get_target_state());
562  alphabet.insert(data.get_input_symbol());
563  alphabet.insert(data.get_output_symbol());
564  }
565  state_vector[s].push_back(transition);
566  }
567 
574  void remove_transition(HfstState s, const HfstTransition<C> & transition,
575  bool remove_symbols_from_alphabet=false)
576  {
577  if (! (state_vector.size() > s))
578  {
579  return;
580  }
581 
582  HfstTransitions & transitions = state_vector[s];
583  // iterators to transitions to be removed
584  // transitions must be removed in reverse order so that iterators
585  // are not invalidated
586  std::stack<typename HfstTransitions::iterator> elements_to_remove;
587 
588  // find the transitions to be removed
589  for (typename HfstTransitions::iterator it = transitions.begin();
590  it != transitions.end(); it++)
591  {
592  // weight is ignored
593  if (it->get_input_symbol() == transition.get_input_symbol() &&
594  it->get_output_symbol() == transition.get_output_symbol() &&
595  it->get_target_state() == transition.get_target_state())
596  {
597  // schedule transition to be removed
598  elements_to_remove.push(it);
599  }
600  }
601  // remove the transitions in reverse order
602  while (!elements_to_remove.empty())
603  {
604  state_vector[s].erase(elements_to_remove.top());
605  elements_to_remove.pop();
606  }
607 
608  if (remove_symbols_from_alphabet)
609  {
610  HfstTransitionGraphAlphabet alpha = this->symbols_used();
611  if (alpha.find(transition.get_input_symbol()) == alpha.end())
612  this->remove_symbol_from_alphabet(transition.get_input_symbol());
613  if (alpha.find(transition.get_output_symbol()) == alpha.end())
615  }
616  }
617 
620  bool is_final_state(HfstState s) const {
621  return (final_weight_map.find(s) != final_weight_map.end());
622  }
623 
625  typename C::WeightType get_final_weight(HfstState s) const {
626  if (final_weight_map.find(s) != final_weight_map.end())
627  return final_weight_map.find(s)->second;
629  }
630 
636  const typename C::WeightType & weight) {
637  add_state(s);
638  final_weight_map[s] = weight;
639  }
640 
644  {
645  for (typename HfstStates::iterator it = state_vector.begin();
646  it != state_vector.end();
647  ++it)
648  {
650  std::sort<typename HfstTransitions::iterator>
651  (transitions.begin(),transitions.end());
652  }
653  return *this;
654  }
655 
660  iterator begin() { return state_vector.begin(); }
661 
664  const_iterator begin() const { return state_vector.begin(); }
665 
668  iterator end() { return state_vector.end(); }
669 
672  const_iterator end() const { return state_vector.end(); }
673 
674 
681  {
682  if (s >= state_vector.size()) {
684  return state_vector[s];
685  }
686 
694  {
695  return this->operator[](s);
696  }
697 
698  // --------------------------------------------------
699  // ----- Reading and writing in AT&T format -----
700  // --------------------------------------------------
701 
702  protected:
703  /* Change state numbers s1 to s2 and vice versa. */
704  void swap_state_numbers(HfstState s1, HfstState s2) {
705 
706  HfstTransitions s1_copy = state_vector[s1];
707  state_vector[s1] = state_vector[s2];
708  state_vector[s2] = s1_copy;
709 
710  // ----- Go through all states -----
711  for (iterator it = begin(); it != end(); it++)
712  {
713  // Go through all transitions
714  for (unsigned int i=0; i < it->size(); i++)
715  {
716  HfstTransition<C> &tr_it = it->operator[](i);
717 
718  HfstState new_target=tr_it.get_target_state();
719  if (tr_it.get_target_state() == s1)
720  new_target = s2;
721  if (tr_it.get_target_state() == s2)
722  new_target = s1;
723 
724  if (new_target != tr_it.get_target_state())
725  {
727  (new_target,
728  tr_it.get_input_symbol(),
729  tr_it.get_output_symbol(),
730  tr_it.get_weight());
731 
732  it->operator[](i) = tr;
733  }
734 
735  } // all transitions gone through
736 
737  } // ----- all states gone through -----
738 
739  // Swap final states, if needed
740  typename FinalWeightMap::iterator s1_it = final_weight_map.find(s1);
741  typename FinalWeightMap::iterator s2_it = final_weight_map.find(s2);
742  typename FinalWeightMap::iterator end_it = final_weight_map.end();
743 
744  if (s1_it != end_it && s2_it != end_it) {
745  typename C::WeightType s1_weight = s1_it->second;
746  final_weight_map[s1] = s2_it->second;
747  final_weight_map[s2] = s1_weight;
748  }
749  if (s1_it != end_it) {
750  typename C::WeightType w = s1_it->second;
751  final_weight_map.erase(s1);
752  final_weight_map[s2] = w;
753  }
754  if (s2_it != end_it) {
755  typename C::WeightType w = s2_it->second;
756  final_weight_map.erase(s2);
757  final_weight_map[s1] = w;
758  }
759 
760  return;
761 
762  }
763 
764  static void write_weight(FILE * file, float weight)
765  {
766  //if (weight == 0) // avoid unnecessary 0.000000's
767  // fprintf(file, "%i", 0);
768  //else
769  fprintf(file, "%f", weight);
770  }
771 
772  static void write_weight(std::ostream & os, float weight)
773  {
774  //if (weight == 0) // avoid unnecessary 0.000000's
775  // os << 0;
776  //else
777  os << weight;
778  }
779 
780  /* Replace all strings \a str1 in \a symbol with \a str2. */
781  static void replace_all(std::string & symbol,
782  const std::string &str1,
783  const std::string &str2)
784  {
785  size_t pos = symbol.find(str1);
786  while (pos != std::string::npos) // while there are str1:s to replace
787  {
788  symbol.erase(pos, str1.size()); // erase str1
789  symbol.insert(pos, str2); // insert str2 instead
790  pos = symbol.find // find next str1
791  (str1, pos+str2.size());
792  }
793  return;
794  }
795 
796  static void xfstize(std::string & symbol)
797  {
798  std::string escaped_symbol;
799  for (size_t pos = 0; pos < symbol.size(); pos++)
800  {
801  if (symbol[pos] == '%')
802  escaped_symbol.append("\"%\"");
803  else if (symbol[pos] == '"')
804  escaped_symbol.append("%\"");
805  else if (symbol[pos] == '?')
806  escaped_symbol.append("\"?\"");
807  else
808  escaped_symbol.append(1, symbol[pos]);
809  }
810  symbol = escaped_symbol;
811  }
812 
813  static void xfstize_symbol(std::string & symbol)
814  {
815  xfstize(symbol);
816  replace_all(symbol, "@_EPSILON_SYMBOL_@", "0");
817  replace_all(symbol, "@_UNKNOWN_SYMBOL_@", "?");
818  replace_all(symbol, "@_IDENTITY_SYMBOL_@", "?");
819  replace_all(symbol, "\t", "@_TAB_@");
820  }
821 
822  void print_xfst_state(std::ostream & os, HfstState state)
823  {
824  if (state == INITIAL_STATE) { os << "S"; }
825  if (is_final_state(state)) { os << "f"; }
826  os << "s" << state;
827  }
828 
829  void print_xfst_state(FILE * file, HfstState state)
830  {
831  if (state == INITIAL_STATE) { fprintf(file, "S"); }
832  if (is_final_state(state)) { fprintf(file, "f"); }
833  fprintf(file, "s%i", state);
834  }
835 
836  void print_xfst_arc(std::ostream & os, C data)
837  {
838  // replace all spaces, epsilons and tabs
839  if (data.get_input_symbol() !=
840  data.get_output_symbol())
841  {
842  os << "<";
843  }
844  std::string s = data.get_input_symbol();
845  xfstize_symbol(s);
846  os << s;
847  if (data.get_input_symbol() !=
848  data.get_output_symbol() ||
849  data.get_output_symbol() == "@_UNKNOWN_SYMBOL_@")
850  {
851  s = data.get_output_symbol();
852  xfstize_symbol(s);
853  os << ":" << s;
854  }
855  if (data.get_input_symbol() !=
856  data.get_output_symbol())
857  {
858  os << ">";
859  }
860  }
861 
862  void print_xfst_arc(FILE * file, C data)
863  {
864  if (data.get_input_symbol() !=
865  data.get_output_symbol())
866  {
867  fprintf(file, "<");
868  }
869  // replace all spaces, epsilons and tabs
870  std::string s = data.get_input_symbol();
871  xfstize_symbol(s);
872  fprintf(file, "%s", s.c_str());
873 
874  if (data.get_input_symbol() !=
875  data.get_output_symbol() ||
876  data.get_output_symbol() == "@_UNKNOWN_SYMBOL_@")
877  {
878  s = data.get_output_symbol();
879  xfstize_symbol(s);
880  fprintf(file, ":%s", s.c_str());
881  }
882  if (data.get_input_symbol() !=
883  data.get_output_symbol())
884  {
885  fprintf(file, ">");
886  }
887  }
888 
889  public:
890 
893  void write_in_xfst_format(std::ostream &os, bool write_weights=true)
894  {
895  (void)write_weights; // todo
896  unsigned int source_state=0;
897  for (iterator it = begin(); it != end(); it++)
898  {
899  print_xfst_state(os, source_state);
900  os << ":\t";
901 
902  if (it->begin() == it->end())
903  {
904  os << "(no arcs)";
905  }
906  else
907  {
908  for (typename HfstTransitions::iterator tr_it
909  = it->begin();
910  tr_it != it->end(); tr_it++)
911  {
912  if (tr_it != it->begin())
913  {
914  os << ", ";
915  }
916  C data = tr_it->get_transition_data();
917  print_xfst_arc(os, data);
918 
919  os << " -> ";
920  print_xfst_state(os, tr_it->get_target_state());
921  }
922  }
923  os << "." << std::endl;
924  source_state++;
925  }
926  }
927 
928  // note: unknown and identity are both '?'
929  static std::string prologize_symbol(const std::string & symbol)
930  {
931  if (symbol == "0")
932  return "%0";
933  if (symbol == "?")
934  return "%?";
935  if (symbol == "@_EPSILON_SYMBOL_@")
936  return "0";
937  if (symbol == "@_UNKNOWN_SYMBOL_@")
938  return "?";
939  if(symbol == "@_IDENTITY_SYMBOL_@")
940  return "?";
941  // prepend a backslash to a double quote
942  std::string retval(symbol);
943  replace_all(retval, "\"", "\\\"");
944  return retval;
945  }
946 
947  // caveat: '?' is always unknown
948  static std::string deprologize_symbol(const std::string & symbol)
949  {
950  if (symbol == "%0")
951  return "0";
952  if (symbol == "%?")
953  return "?";
954  if (symbol == "0")
955  return "@_EPSILON_SYMBOL_@";
956  if (symbol == "?")
957  return "@_UNKNOWN_SYMBOL_@";
958  // remove the escaping backslash in front of a double quote
959  std::string retval(symbol);
960  replace_all(retval, "\\\"", "\"");
961  return retval;
962  }
963 
964  static void print_prolog_arc_symbols(FILE * file, C data)
965  {
966  std::string symbol = prologize_symbol(data.get_input_symbol());
967  fprintf(file, "\"%s\"", symbol.c_str());
968 
969  if (data.get_input_symbol() !=
970  data.get_output_symbol() ||
971  data.get_input_symbol() == "@_UNKNOWN_SYMBOL_@")
972  {
973  symbol = prologize_symbol(data.get_output_symbol());
974  fprintf(file, ":\"%s\"", symbol.c_str());
975  }
976  }
977 
978  static void print_prolog_arc_symbols(std::ostream & os, C data)
979  {
980  std::string symbol = prologize_symbol(data.get_input_symbol());
981  os << "\"" << symbol << "\"";
982 
983  if (data.get_input_symbol() !=
984  data.get_output_symbol() ||
985  data.get_input_symbol() == "@_UNKNOWN_SYMBOL_@")
986  {
987  symbol = prologize_symbol(data.get_output_symbol());
988  os << ":\"" << symbol << "\"";
989  }
990  }
991 
994  void write_in_prolog_format(FILE * file, const std::string & name,
995  bool write_weights=true)
996  {
997  unsigned int source_state=0;
998  const char * identifier = name.c_str();
999  // Print the name.
1000  if (name.find(",") != std::string::npos)
1001  {
1002  std::string msg("no commas allowed in the name of prolog networks");
1004  }
1005  fprintf(file, "network(%s).\n", identifier);
1006 
1007  // Print symbols that are in the alphabet but not used in arcs.
1008  HfstTransitionGraphAlphabet symbols_used_ = symbols_used();
1009  initialize_alphabet(symbols_used_); // exclude special symbols
1010  for (typename HfstTransitionGraphAlphabet::const_iterator it
1011  = alphabet.begin(); it != alphabet.end(); it++)
1012  {
1013  if (symbols_used_.find(*it) == symbols_used_.end())
1014  {
1015  fprintf(file, "symbol(%s, \"%s\").\n", identifier, it->c_str());
1016  }
1017  }
1018 
1019  // Print arcs.
1020  for (iterator it = begin(); it != end(); it++)
1021  {
1022  for (typename HfstTransitions::iterator tr_it
1023  = it->begin();
1024  tr_it != it->end(); tr_it++)
1025  {
1026  fprintf(file, "arc(%s, %i, %i, ",
1027  identifier, source_state, tr_it->get_target_state());
1028  C data = tr_it->get_transition_data();
1029  print_prolog_arc_symbols(file, data);
1030  if (write_weights) {
1031  fprintf(file, ", ");
1032  write_weight(file, data.get_weight());
1033  }
1034  fprintf(file, ").\n");
1035  }
1036  source_state++;
1037  }
1038 
1039  // Print final states.
1040  for (typename FinalWeightMap::const_iterator it
1041  = this->final_weight_map.begin();
1042  it != this->final_weight_map.end(); it++)
1043  {
1044  fprintf(file, "final(%s, %i", identifier, it->first);
1045  if (write_weights)
1046  {
1047  fprintf(file, ", ");
1048  write_weight(file, it->second);
1049  }
1050  fprintf(file, ").\n");
1051  }
1052  }
1053 
1056  void write_in_prolog_format(std::ostream & os, const std::string & name,
1057  bool write_weights=true)
1058  {
1059  unsigned int source_state=0;
1060 
1061  // Print the name.
1062  if (name.find(",") != std::string::npos)
1063  {
1064  std::string msg("no commas allowed in the name of prolog networks");
1066  }
1067  os << "network(" << name << ")." << std::endl;
1068 
1069  // Print symbols that are in the alphabet but not used in arcs.
1070  HfstTransitionGraphAlphabet symbols_used_ = symbols_used();
1071  initialize_alphabet(symbols_used_); // exclude special symbols
1072  for (typename HfstTransitionGraphAlphabet::const_iterator it
1073  = alphabet.begin(); it != alphabet.end(); it++)
1074  {
1075  if (symbols_used_.find(*it) == symbols_used_.end())
1076  {
1077  os << "symbol(" << name << ", \"" << *it << "\")" << std::endl;
1078  }
1079  }
1080 
1081  // Print arcs.
1082  for (iterator it = begin(); it != end(); it++)
1083  {
1084  for (typename HfstTransitions::iterator tr_it
1085  = it->begin();
1086  tr_it != it->end(); tr_it++)
1087  {
1088  os << "arc(" << name << ", " << source_state << ", " << tr_it->get_target_state() << ", ";
1089  C data = tr_it->get_transition_data();
1090  print_prolog_arc_symbols(os, data);
1091  if (write_weights) {
1092  os << ", ";
1093  write_weight(os, data.get_weight());
1094  }
1095  os << ")." << std::endl;
1096  }
1097  source_state++;
1098  }
1099 
1100  // Print final states.
1101  for (typename FinalWeightMap::const_iterator it
1102  = this->final_weight_map.begin();
1103  it != this->final_weight_map.end(); it++)
1104  {
1105  os << "final(" << name << ", " << it->first;
1106  if (write_weights) {
1107  os << ", ";
1108  write_weight(os, it->second);
1109  }
1110  os << ")." << std::endl;
1111  }
1112  }
1113 
1114  // If \a str is of format ".+", change it to .+ and return true.
1115  // Else, return false.
1116  static bool strip_quotes_from_both_sides(std::string & str)
1117  {
1118  if (str.size() < 3)
1119  return false;
1120  if (str[0] != '"' || str[str.length()-1] != '"')
1121  return false;
1122  str.erase(0, 1);
1123  str.erase(str.length()-1, 1);
1124  return true;
1125  }
1126 
1127  // If \a str is of format .+)\.", change it to .+ and return true.
1128  // Else, return false.
1129  static bool strip_ending_parenthesis_and_comma(std::string & str)
1130  {
1131  if (str.size() < 3)
1132  return false;
1133  if (str[str.length()-2] != ')' || str[str.length()-1] != '.')
1134  return false;
1135  str.erase(str.length()-2);
1136  return true;
1137  }
1138 
1139  static bool parse_prolog_network_line(const std::string & line, std::string & name)
1140  {
1141  // 'network(NAME).'
1142  char namearr[100];
1143  int n = sscanf(line.c_str(), "network(%s", namearr);
1144  if (n != 1)
1145  return false;
1146 
1147  std::string namestr(namearr);
1148  // strip the ending ")." from namestr
1149  if (!strip_ending_parenthesis_and_comma(namestr))
1150  return false;
1151 
1152  name = namestr;
1153  return true;
1154  }
1155 
1156  // Get positions of \a c in \a str. If \a esc is precedes
1157  // \a c, \a c is not included.
1158  static std::vector<unsigned int> get_positions_of_unescaped_char
1159  (const std::string & str, char c, char esc)
1160  {
1161  std::vector<unsigned int> retval;
1162  for (size_t i=0; i < str.length(); i++)
1163  {
1164  if (str[i] == c)
1165  {
1166  if (i == 0)
1167  retval.push_back(i);
1168  else if (str[i-1] == esc)
1169  ; // skip escaped chars
1170  else
1171  retval.push_back(i);
1172  }
1173  }
1174  return retval;
1175  }
1176 
1177  // Extract input and output symbols, if possible, from prolog arc
1178  // \a str and store them to \a isymbol and \a osymbol.
1179  // Return whether symbols were succesfully extracted.
1180  // \a str must be of format "foo":"bar" or "foo"
1181  static bool get_prolog_arc_symbols
1182  (const std::string & str, std::string & isymbol, std::string & osymbol)
1183  {
1184  // find positions of non-escaped double quotes (todo: double double-quote?)
1185  std::vector<unsigned int> quote_positions
1186  = get_positions_of_unescaped_char(str, '"', '\\');
1187 
1188  // "foo"
1189  if (quote_positions.size() == 2)
1190  {
1191  if (quote_positions[0] != 0 ||
1192  quote_positions[1] != str.length()-1)
1193  return false; // extra characters outside quotes
1194  }
1195  // "foo":"bar"
1196  else if (quote_positions.size() == 4)
1197  {
1198  if (quote_positions[0] != 0 ||
1199  quote_positions[3] != str.length()-1)
1200  {
1201  return false; // extra characters outside quotes
1202  }
1203  if (quote_positions[2] - quote_positions[1] != 2)
1204  {
1205  return false; // missing colon between inner quotes
1206  }
1207  if (str[quote_positions[1] + 1] != ':')
1208  {
1209  return false; // else than colon between inner quotes
1210  }
1211  }
1212  // not valid prolog arc
1213  else
1214  {
1215  return false;
1216  }
1217 
1218  // "foo"
1219  if (quote_positions.size() == 2)
1220  {
1221  // "foo" -> foo
1222  std::string symbol(str, quote_positions[0]+1, quote_positions[1]-quote_positions[0]-1);
1223  isymbol = deprologize_symbol(symbol);
1224  if (isymbol == "@_UNKNOWN_SYMBOL_@") // single unknown -> identity
1225  isymbol = "@_IDENTITY_SYMBOL_@";
1226  osymbol = isymbol;
1227  }
1228  // "foo":"bar"
1229  else
1230  {
1231  // "foo" -> foo, "bar" -> bar
1232  std::string insymbol(str, quote_positions[0]+1, quote_positions[1]-quote_positions[0]-1);
1233  std::string outsymbol(str, quote_positions[2]+1, quote_positions[3]-quote_positions[2]-1);
1234  isymbol = deprologize_symbol(insymbol);
1235  osymbol = deprologize_symbol(outsymbol);
1236  }
1237 
1238  return true;
1239  }
1240 
1241  static bool extract_weight(std::string & symbol, float & weight)
1242  {
1243  size_t last_double_quote = symbol.find_last_of('"');
1244  size_t last_space = symbol.find_last_of(' ');
1245 
1246  // at least two double quotes should be found
1247  if (last_double_quote == std::string::npos)
1248  { return false; }
1249 
1250  if (last_space == std::string::npos) {
1251  ; // no weight
1252  }
1253  else if (last_double_quote > last_space) {
1254  ; // no weight, last space is part of a symbol
1255  }
1256  else if (last_double_quote + 2 == last_space && last_space < symbol.size()-1) // + 2 because of the comma
1257  {
1258  std::istringstream buffer(symbol.substr(last_space+1));
1259  buffer >> weight;
1260  if (buffer.fail()) // a float could not be read
1261  { return false; }
1262  symbol.resize(last_space-1); // get rid of the comma and weight
1263  }
1264  else {
1265  return false; // not valid symbol and weight
1266  }
1267  return true;
1268  }
1269 
1270  static bool parse_prolog_arc_line(const std::string & line, HfstTransitionGraph & graph)
1271  {
1272  // symbolstr can also contain the weight
1273  char namestr[100]; char sourcestr[100];
1274  char targetstr[100]; char symbolstr[100];
1275 
1276  int n = sscanf(line.c_str(), "arc(%[^,], %[^,], %[^,], %[^\t\n]",
1277  namestr, sourcestr, targetstr, symbolstr);
1278 
1279  std::string symbol(symbolstr);
1280 
1281  // strip the ending ")." from symbolstr
1282  if (!strip_ending_parenthesis_and_comma(symbol))
1283  { return false; }
1284 
1285  if (n != 4)
1286  { return false; }
1287  if (std::string(namestr) != graph.name)
1288  { return false; }
1289 
1290  unsigned int source = atoi(sourcestr);
1291  unsigned int target = atoi(targetstr);
1292 
1293  // handle the weight that might be included in symbol string
1294  float weight = 0;
1295  if (! extract_weight(symbol, weight))
1296  { return false; }
1297 
1298  std::string isymbol = "";
1299  std::string osymbol = "";
1300 
1301  if (!get_prolog_arc_symbols(symbol, isymbol, osymbol))
1302  return false;
1303 
1304  graph.add_transition(source, HfstTransition<C>(target, isymbol, osymbol, weight));
1305  return true;
1306  }
1307 
1308  static bool parse_prolog_final_line(const std::string & line, HfstTransitionGraph & graph)
1309  {
1310  // 'final(NAME, number).' or 'final(NAME, number, weight).'
1311  char namestr[100];
1312  char finalstr[100];
1313  char weightstr[100];
1314  float weight = 0;
1315 
1316  unsigned int number_of_commas = 0;
1317  size_t pos = line.find(',');
1318  while (pos != std::string::npos)
1319  {
1320  number_of_commas++;
1321  pos = line.find(',', pos+1);
1322  }
1323 
1324  if (number_of_commas == 1)
1325  {
1326  int n = sscanf(line.c_str(), "final(%[^,], %[^)]).", namestr, finalstr);
1327  if (n != 2)
1328  { return false; }
1329  }
1330  else if (number_of_commas == 2)
1331  {
1332  int n = sscanf(line.c_str(), "final(%[^,], %[^,], %[^)]).", namestr, finalstr, weightstr);
1333  if (n != 3)
1334  { return false; }
1335  std::istringstream buffer(weightstr);
1336  buffer >> weight;
1337  if (buffer.fail()) // a float could not be read
1338  { return false; }
1339  }
1340  else
1341  {
1342  return false;
1343  }
1344 
1345  if (std::string(namestr) != graph.name)
1346  return false;
1347 
1348  graph.set_final_weight(atoi(finalstr), weight);
1349  return true;
1350  }
1351 
1352  static bool parse_prolog_symbol_line(const std::string & line, HfstTransitionGraph & graph)
1353  {
1354  // 'symbol(NAME, "foo").'
1355  char namearr[100];
1356  char symbolarr[100];
1357  int n = sscanf(line.c_str(), "symbol(%[^,], %s", namearr, symbolarr);
1358 
1359  if (n != 2)
1360  return false;
1361 
1362  std::string namestr(namearr);
1363  std::string symbolstr(symbolarr);
1364 
1365  if (namestr != graph.name)
1366  return false;
1367 
1368  if (! strip_ending_parenthesis_and_comma(symbolstr))
1369  return false;
1370 
1371  if (! strip_quotes_from_both_sides(symbolstr))
1372  return false;
1373 
1374  graph.add_symbol_to_alphabet(symbolstr);
1375  return true;
1376  }
1377 
1378  // Erase newlines from the end of \a str and return \a str.
1379  static std::string strip_newlines(std::string & str)
1380  {
1381  for (signed int i=(signed int)str.length()-1; i >= 0; --i)
1382  {
1383  if (str[i] == '\n' || str[i] == '\r')
1384  str.erase(i, 1);
1385  else
1386  break;
1387  }
1388  return str;
1389  }
1390 
1391  // Try to get a line from \a is (if \a file == NULL)
1392  // or from \a file. If succesfull, strip the line from newlines,
1393  // increment \a linecount by one and return the line.
1394  // Else, throw an EndOfStreamException.
1395  static std::string get_stripped_line
1396  (std::istream & is, FILE * file, unsigned int & linecount)
1397  {
1398  char line [255];
1399 
1400  if (file == NULL) { /* we use streams */
1401  if (not is.getline(line,255).eof())
1403  }
1404  else { /* we use FILEs */
1405  if (NULL == fgets(line, 255, file))
1407  }
1408  linecount++;
1409 
1410  std::string linestr(line);
1411  return strip_newlines(linestr);
1412  }
1413 
1414  /* Create an HfstTransitionGraph as defined in prolog format
1415  in istream \a is or FILE \a file.
1416 
1417  The functions is called by functions
1418  read_in_prolog_format(istream&) and
1419  read_in_prolog_format(FILE*).
1420  If \a file is NULL, it is ignored and \a is is used.
1421  If \a file is not NULL, it is used and \a is is ignored. */
1422  static HfstTransitionGraph read_in_prolog_format
1423  (std::istream &is, FILE *file, unsigned int & linecount)
1424  {
1425 
1426  HfstTransitionGraph retval;
1427  std::string linestr;
1428 
1429  while(true)
1430  {
1431  try
1432  {
1433  linestr = get_stripped_line(is, file, linecount);
1434  }
1435  catch (const EndOfStreamException & e)
1436  {
1437  HFST_THROW(NotValidPrologFormatException);
1438  }
1439 
1440  if (linestr.length() != 0 && linestr[0] == '#')
1441  {
1442  continue; // comment line
1443  }
1444  else
1445  {
1446  break; // first non-comment line
1447  }
1448  }
1449 
1450 
1451  if (! parse_prolog_network_line(linestr, retval.name))
1452  {
1453  std::string message("first line not valid prolog: ");
1454  message.append(linestr);
1455  HFST_THROW_MESSAGE(NotValidPrologFormatException, message);
1456  }
1457 
1458  while(true)
1459  {
1460  try
1461  {
1462  linestr = get_stripped_line(is, file, linecount);
1463  if (linestr == "") // prolog separator
1464  {
1465  return retval;
1466  }
1467  }
1468  catch (const EndOfStreamException & e)
1469  {
1470  return retval;
1471  }
1472 
1473  if (! (parse_prolog_arc_line(linestr, retval) ||
1474  parse_prolog_final_line(linestr, retval) ||
1475  parse_prolog_symbol_line(linestr, retval)) )
1476  {
1477  std::string message("line not valid prolog: ");
1478  message.append(linestr);
1479  HFST_THROW_MESSAGE(NotValidPrologFormatException, message);
1480  }
1481  }
1482  HFST_THROW(NotValidPrologFormatException); // this should not happen
1483  }
1484 
1485  static HfstTransitionGraph read_in_prolog_format
1486  (std::istream &is,
1487  unsigned int & linecount)
1488  {
1489  return read_in_prolog_format
1490  (is, NULL /* a dummy variable */,
1491  linecount);
1492  }
1493 
1494  static HfstTransitionGraph read_in_prolog_format
1495  (FILE *file,
1496  unsigned int & linecount)
1497  {
1498  return read_in_prolog_format
1499  (std::cin /* a dummy variable */,
1500  file, linecount);
1501  }
1502 
1503  static HfstTransitionGraph read_in_prolog_format
1504  (HfstFile &file,
1505  unsigned int & linecount)
1506  {
1507  return read_in_att_format(std::cin /* a dummy variable */, file.get_file(),
1508  linecount);
1509  }
1510 
1511 
1512 
1515  void write_in_xfst_format(FILE * file, bool write_weights=true)
1516  {
1517  (void)write_weights;
1518  unsigned int source_state=0;
1519  for (iterator it = begin(); it != end(); it++)
1520  {
1521  print_xfst_state(file, source_state);
1522  fprintf(file, ":\t");
1523 
1524  if (it->begin() == it->end())
1525  {
1526  fprintf(file, "(no arcs)");
1527  }
1528  else
1529  {
1530  for (typename HfstTransitions::iterator tr_it
1531  = it->begin();
1532  tr_it != it->end(); tr_it++)
1533  {
1534  if (tr_it != it->begin())
1535  {
1536  fprintf(file, ", ");
1537  }
1538  C data = tr_it->get_transition_data();
1539 
1540  print_xfst_arc(file, data);
1541 
1542  fprintf(file, " -> ");
1543  print_xfst_state(file, tr_it->get_target_state());
1544  }
1545  }
1546  fprintf(file, ".\n");
1547  source_state++;
1548  }
1549  }
1550 
1551 
1552 
1553 
1556  void write_in_att_format(std::ostream &os, bool write_weights=true)
1557  {
1558  unsigned int source_state=0;
1559  for (iterator it = begin(); it != end(); it++)
1560  {
1561  for (typename HfstTransitions::iterator tr_it
1562  = it->begin();
1563  tr_it != it->end(); tr_it++)
1564  {
1565  C data = tr_it->get_transition_data();
1566 
1567  std::string isymbol = data.get_input_symbol();
1568  replace_all(isymbol, " ", "@_SPACE_@");
1569  replace_all(isymbol, "@_EPSILON_SYMBOL_@", "@0@");
1570  replace_all(isymbol, "\t", "@_TAB_@");
1571 
1572  std::string osymbol = data.get_output_symbol();
1573  replace_all(osymbol, " ", "@_SPACE_@");
1574  replace_all(osymbol, "@_EPSILON_SYMBOL_@", "@0@");
1575  replace_all(osymbol, "\t", "@_TAB_@");
1576 
1577  os << source_state << "\t"
1578  << tr_it->get_target_state() << "\t"
1579  << isymbol << "\t"
1580  << osymbol;
1581 
1582  if (write_weights) {
1583  os << "\t";
1584  write_weight(os, data.get_weight());
1585  }
1586  os << "\n";
1587  }
1588  if (is_final_state(source_state))
1589  {
1590  os << source_state;
1591  if (write_weights) {
1592  os << "\t";
1593  write_weight(os, get_final_weight(source_state));
1594  }
1595  os << "\n";
1596  }
1597  source_state++;
1598  }
1599  }
1600 
1603  void write_in_att_format(FILE *file, bool write_weights=true)
1604  {
1605  unsigned int source_state=0;
1606  for (iterator it = begin(); it != end(); it++)
1607  {
1608  for (typename HfstTransitions::iterator tr_it
1609  = it->begin();
1610  tr_it != it->end(); tr_it++)
1611  {
1612  C data = tr_it->get_transition_data();
1613 
1614  std::string isymbol = data.get_input_symbol();
1615  replace_all(isymbol, " ", "@_SPACE_@");
1616  replace_all(isymbol, "@_EPSILON_SYMBOL_@", "@0@");
1617  replace_all(isymbol, "\t", "@_TAB_@");
1618 
1619  std::string osymbol = data.get_output_symbol();
1620  replace_all(osymbol, " ", "@_SPACE_@");
1621  replace_all(osymbol, "@_EPSILON_SYMBOL_@", "@0@");
1622  replace_all(osymbol, "\t", "@_TAB_@");
1623 
1624  fprintf(file, "%i\t%i\t%s\t%s",
1625  source_state,
1626  tr_it->get_target_state(),
1627  isymbol.c_str(),
1628  osymbol.c_str());
1629 
1630  if (write_weights) {
1631  fprintf(file, "\t");
1632  write_weight(file, data.get_weight());
1633  }
1634  fprintf(file, "\n");
1635  }
1636  if (is_final_state(source_state))
1637  {
1638  fprintf(file, "%i", source_state);
1639  if (write_weights) {
1640  fprintf(file, "\t");
1641  write_weight(file, get_final_weight(source_state));
1642  }
1643  fprintf(file, "\n");
1644  }
1645  source_state++;
1646  }
1647  }
1648 
1649  void write_in_att_format(char * ptr, bool write_weights=true)
1650  {
1651  unsigned int source_state=0;
1652  size_t cwt = 0; // characters written in total
1653  size_t cw = 0; // characters written in latest call to sprintf
1654  for (iterator it = begin(); it != end(); it++)
1655  {
1656  for (typename HfstTransitions::iterator tr_it
1657  = it->begin();
1658  tr_it != it->end(); tr_it++)
1659  {
1660  C data = tr_it->get_transition_data();
1661 
1662  std::string isymbol = data.get_input_symbol();
1663  replace_all(isymbol, " ", "@_SPACE_@");
1664  replace_all(isymbol, "@_EPSILON_SYMBOL_@", "@0@");
1665  replace_all(isymbol, "\t", "@_TAB_@");
1666 
1667  std::string osymbol = data.get_output_symbol();
1668  replace_all(osymbol, " ", "@_SPACE_@");
1669  replace_all(osymbol, "@_EPSILON_SYMBOL_@", "@0@");
1670  replace_all(osymbol, "\t", "@_TAB_@");
1671 
1672  cw = sprintf(ptr + cwt, "%i\t%i\t%s\t%s",
1673  source_state,
1674  tr_it->get_target_state(),
1675  isymbol.c_str(),
1676  osymbol.c_str());
1677 
1678  cwt = cwt + cw;
1679 
1680  if (write_weights)
1681  cw = sprintf(ptr + cwt, "\t%f",
1682  data.get_weight());
1683  cwt = cwt + cw;
1684  cw = sprintf(ptr + cwt, "\n");
1685  cwt = cwt + cw;
1686  }
1687  if (is_final_state(source_state))
1688  {
1689  cw = sprintf(ptr + cwt, "%i", source_state);
1690  cwt = cwt + cw;
1691  if (write_weights)
1692  cw = sprintf(ptr + cwt, "\t%f",
1693  get_final_weight(source_state));
1694  cwt = cwt + cw;
1695  cw = sprintf(ptr + cwt, "\n");
1696  cwt = cwt + cw;
1697  }
1698  source_state++;
1699  }
1700  }
1701 
1702 
1706  void write_in_att_format_number(FILE *file, bool write_weights=true)
1707  {
1708  unsigned int source_state=0;
1709  for (iterator it = begin(); it != end(); it++)
1710  {
1711  for (typename HfstTransitions::iterator tr_it
1712  = it->begin();
1713  tr_it != it->end(); tr_it++)
1714  {
1715  C data = tr_it->get_transition_data();
1716 
1717  fprintf(file, "%i\t%i\t%i\t%i",
1718  source_state,
1719  tr_it->get_target_state(),
1720  tr_it->get_input_number(),
1721  tr_it->get_output_number());
1722 
1723  if (write_weights)
1724  fprintf(file, "\t%f",
1725  data.get_weight());
1726  fprintf(file, "\n");
1727 
1728  if (is_final_state(source_state))
1729  {
1730  fprintf(file, "%i", source_state);
1731  if (write_weights)
1732  fprintf(file, "\t%f",
1733  get_final_weight(source_state));
1734  fprintf(file, "\n");
1735  }
1736  }
1737  source_state++;
1738  }
1739  }
1740 
1741 
1742  /* Create an HfstTransitionGraph as defined in AT&T format
1743  in istream \a is or FILE \a file. \a epsilon_symbol defines
1744  how epsilon is represented.
1745 
1746  The functions is called by functions
1747  read_in_att_format(istream&, std::string) and
1748  read_in_att_format(FILE*, std::string).
1749  If \a file is NULL, it is ignored and \a is is used.
1750  If \a file is not NULL, it is used and \a is is ignored. */
1751  static HfstTransitionGraph read_in_att_format
1752  (std::istream &is,
1753  FILE *file,
1754  std::string epsilon_symbol,
1755  unsigned int & linecount) {
1756 
1757  if (file == NULL) {
1758  if (is.eof()) {
1760  }
1761  }
1762  else {
1763  if (feof(file)) {
1765  }
1766  }
1767 
1768  HfstTransitionGraph retval;
1769  char line [255];
1770  while(true) {
1771 
1772  if (file == NULL) { /* we use streams */
1773  if (not is.getline(line,255).eof())
1774  break;
1775  }
1776  else { /* we use FILEs */
1777  if (NULL == fgets(line, 255, file))
1778  break;
1779  }
1780 
1781  linecount++;
1782 
1783  // an empty line signifying an empty transducer,
1784  // a special case that is accepted if it is the only
1785  // transducer in the stream
1786  if ( // empty line with or without a newline
1787  (line[0] == '\0') ||
1788  (line[0] == '\n' && line[1] == '\0') ||
1789  // windows newline
1790  (line[0] == '\r' && line[1] == '\n' && line[2] == '\0')
1791  ) {
1792 
1793  // make sure that the end-of-file is reached
1794  if (file == NULL)
1795  is.get();
1796  else
1797  fgetc(file);
1798  break;
1799  }
1800 
1801  if (*line == '-') // transducer separator line is "--"
1802  return retval;
1803 
1804  // scan one line that can have a maximum of five fields
1805  char a1 [100]; char a2 [100]; char a3 [100];
1806  char a4 [100]; char a5 [100];
1807  // how many fields could be parsed
1808  int n = sscanf(line, "%s%s%s%s%s", a1, a2, a3, a4, a5);
1809 
1810  // set value of weight
1811  float weight = 0;
1812  if (n == 2) // a final state line with weight
1813  weight = atof(a2);
1814  if (n == 5) // a transition line with weight
1815  weight = atof(a5);
1816 
1817  if (n == 1 || n == 2) // a final state line
1818  retval.set_final_weight( atoi(a1), weight );
1819 
1820  else if (n == 4 || n == 5) { // a transition line
1821  std::string input_symbol=std::string(a3);
1822  std::string output_symbol=std::string(a4);
1823 
1824  // replace "@_SPACE_@"s with " " and "@0@"s with
1825  // "@_EPSILON_SYMBOL_@"
1826  replace_all(input_symbol, "@_SPACE_@", " ");
1827  replace_all(input_symbol, "@0@", "@_EPSILON_SYMBOL_@");
1828  replace_all(input_symbol, "@_TAB_@", "\t");
1829  replace_all(input_symbol, "@_COLON_@", ":");
1830 
1831  replace_all(output_symbol, "@_SPACE_@", " ");
1832  replace_all(output_symbol, "@0@", "@_EPSILON_SYMBOL_@");
1833  replace_all(output_symbol, "@_TAB_@", "\t");
1834  replace_all(output_symbol, "@_COLON_@", ":");
1835 
1836  if (epsilon_symbol.compare(input_symbol) == 0)
1837  input_symbol="@_EPSILON_SYMBOL_@";
1838  if (epsilon_symbol.compare(output_symbol) == 0)
1839  output_symbol="@_EPSILON_SYMBOL_@";
1840 
1841  HfstTransition <C> tr( atoi(a2), input_symbol,
1842  output_symbol, weight );
1843  retval.add_transition( atoi(a1), tr );
1844  }
1845 
1846  else { // line could not be parsed
1847  std::string message(line);
1850  message);
1851  }
1852  }
1853  return retval;
1854  }
1855 
1856 
1863  static HfstTransitionGraph read_in_att_format
1864  (std::istream &is,
1865  std::string epsilon_symbol,
1866  unsigned int & linecount)
1867  {
1868  return read_in_att_format
1869  (is, NULL /* a dummy variable */,
1870  epsilon_symbol, linecount);
1871  }
1872 
1879  static HfstTransitionGraph read_in_att_format
1880  (FILE *file,
1881  std::string epsilon_symbol,
1882  unsigned int & linecount)
1883  {
1884  return read_in_att_format
1885  (std::cin /* a dummy variable */,
1886  file, epsilon_symbol, linecount);
1887  }
1888 
1889  static HfstTransitionGraph read_in_att_format
1890  (HfstFile &file,
1891  std::string epsilon_symbol,
1892  unsigned int & linecount)
1893  {
1894  return read_in_att_format(std::cin /* a dummy variable */, file.get_file(),
1895  epsilon_symbol, linecount);
1896  }
1897 
1898 
1899  // ----------------------------------------------
1900  // ----- Substitution functions -----
1901  // ----------------------------------------------
1902 
1903  protected:
1904 
1905  /* A function that performs in-place-substitution in the graph. */
1906 
1907  void substitute_(HfstSymbol old_symbol,
1908  HfstSymbol new_symbol,
1909  bool input_side=true,
1910  bool output_side=true)
1911  {
1912  // ----- Go through all states -----
1913  for (iterator it = begin(); it != end(); it++)
1914  {
1915  // Go through all transitions
1916  for (unsigned int i=0; i < it->size(); i++)
1917  {
1918  HfstTransition<C> &tr_it = it->operator[](i);
1919 
1920  // The substituting input and output symbols for the
1921  // current transition.
1922  HfstSymbol substituting_input_symbol
1923  = tr_it.get_input_symbol();
1924  HfstSymbol substituting_output_symbol
1925  = tr_it.get_output_symbol();
1926 
1927  // Whether a substitution will be performed.
1928  bool substitution_made=false;
1929 
1930  if (input_side &&
1931  tr_it.get_input_symbol() == old_symbol) {
1932  substituting_input_symbol = new_symbol;
1933  substitution_made=true;
1934  }
1935  if (output_side &&
1936  tr_it.get_output_symbol() == old_symbol) {
1937  substituting_output_symbol = new_symbol;
1938  substitution_made=true;
1939  }
1940 
1941  // If a substitution is to be performed,
1942  if (substitution_made) {
1943 
1944  add_symbol_to_alphabet(new_symbol);
1945 
1946  // change the current transition accordingly.
1947  HfstTransition<C> tr
1948  (tr_it.get_target_state(),
1949  substituting_input_symbol,
1950  substituting_output_symbol,
1951  tr_it.get_weight());
1952 
1953  it->operator[](i) = tr;
1954  }
1955 
1956  } // all transitions gone through
1957 
1958  } // ----- all states gone through -----
1959 
1960  return;
1961  }
1962 
1963  /* A function that performs in-place substitutions in the graph
1964  as defined in \a substitutions.
1965 
1966  substitutions[from_number] = to_number,
1967  if substitutions[from_number] = no_substitution, no substitution is made */
1968  void substitute_(const HfstNumberVector &substitutions,
1969  unsigned int no_substitution)
1970  {
1971  // ----- Go through all states -----
1972  for (iterator it = begin(); it != end(); it++)
1973  {
1974  // Go through all transitions
1975  for (unsigned int i=0; i < it->size(); i++)
1976  {
1977  HfstTransition<C> &tr_it = it->operator[](i);
1978 
1979  HfstNumber old_inumber = tr_it.get_input_number();
1980  HfstNumber old_onumber = tr_it.get_output_number();
1981 
1982  HfstNumber new_inumber = substitutions.at(old_inumber);
1983  HfstNumber new_onumber = substitutions.at(old_onumber);
1984 
1985  // If a substitution is to be performed,
1986  if (new_inumber != no_substitution ||
1987  new_onumber != no_substitution)
1988  {
1989  if (new_inumber != no_substitution)
1990  add_symbol_to_alphabet(C::get_symbol(new_inumber));
1991  else
1992  new_inumber = old_inumber;
1993 
1994  if (new_onumber != no_substitution)
1995  add_symbol_to_alphabet(C::get_symbol(new_onumber));
1996  else
1997  new_onumber = old_onumber;
1998 
1999  // change the current transition accordingly.
2000  HfstTransition<C> tr
2001  (tr_it.get_target_state(),
2002  new_inumber,
2003  new_onumber,
2004  tr_it.get_weight(), false);
2005 
2006  it->operator[](i) = tr;
2007  }
2008 
2009  } // all transitions gone through
2010 
2011  } // ----- all states gone through -----
2012 
2013  return;
2014  }
2015 
2016  /* A function that performs in-place substitutions in the graph
2017  as defined in \a substitutions. */
2018  void substitute_(const HfstNumberPairSubstitutions &substitutions)
2019  {
2020  // ----- Go through all states -----
2021  for (iterator it = begin(); it != end(); it++)
2022  {
2023  // Go through all transitions
2024  for (unsigned int i=0; i < it->size(); i++)
2025  {
2026  HfstTransition<C> &tr_it = it->operator[](i);
2027 
2028  HfstNumberPair old_number_pair
2029  ( tr_it.get_input_number(),
2030  tr_it.get_output_number() );
2031 
2032  HfstNumberPairSubstitutions::const_iterator subst_it
2033  = substitutions.find(old_number_pair);
2034 
2035  // If a substitution is to be performed,
2036  if (subst_it != substitutions.end()) {
2037 
2038  HfstNumber new_input_number = subst_it->second.first;
2039  HfstNumber new_output_number = subst_it->second.second;
2040 
2041  add_symbol_to_alphabet(HfstTropicalTransducerTransitionData::
2042  get_symbol(new_input_number));
2043  add_symbol_to_alphabet(HfstTropicalTransducerTransitionData::
2044  get_symbol(new_output_number));
2045 
2046  // change the current transition accordingly.
2047  HfstTransition<C> tr
2048  (tr_it.get_target_state(),
2049  new_input_number,
2050  new_output_number,
2051  tr_it.get_weight(), false);
2052 
2053  it->operator[](i) = tr;
2054  }
2055 
2056  } // all transitions gone through
2057 
2058  } // ----- all states gone through -----
2059 
2060  return;
2061  }
2062 
2063  public:
2064 
2065  /* A function that performs in-place removal of all transitions
2066  equivalent to \a sp in the graph. */
2067  void remove_transitions(const HfstSymbolPair &sp)
2068  {
2069  unsigned int in_match = C::get_number(sp.first);
2070  unsigned int out_match = C::get_number(sp.second);
2071 
2072  bool in_match_used = false;
2073  bool out_match_used = false;
2074 
2075  // ----- Go through all states -----
2076  for (iterator it = begin(); it != end(); it++)
2077  {
2078  // Go through all transitions of the current state
2079  for (unsigned int i=0; i < it->size(); i++)
2080  {
2081  HfstTransition<C> &tr_it = it->operator[](i);
2082 
2083  // If a match was found, remove the transition:
2084  unsigned int in_tr = tr_it.get_input_number();
2085  unsigned int out_tr = tr_it.get_output_number();
2086  if (in_tr == in_match && out_tr == out_match) {
2087  it->erase(it->begin()+i); }
2088  else
2089  {
2090  if (in_tr == in_match || out_tr == in_match) {
2091  in_match_used=true; }
2092  if (in_tr == out_match || out_tr == out_match) {
2093  out_match_used=true; }
2094  }
2095  }
2096  }
2097 
2098  // Handle the alphabet
2099  if (!in_match_used) {
2100  alphabet.erase(sp.first); }
2101  if (!out_match_used) {
2102  alphabet.erase(sp.second); }
2103  }
2104 
2105  protected:
2106 
2107  /* A function that performs in-place-substitution in the graph. */
2108  void substitute_(const HfstSymbolPair &old_sp,
2109  const HfstSymbolPairSet &new_sps)
2110  {
2111  if (new_sps.empty())
2112  {
2113  return remove_transitions(old_sp);
2114  }
2115 
2116  unsigned int old_input_number = C::get_number(old_sp.first);
2117  unsigned int old_output_number = C::get_number(old_sp.second);
2118 
2119  // Whether any substitution was performed
2120  bool substitution_performed=false;
2121 
2122  // ----- Go through all states -----
2123  for (iterator it = begin(); it != end(); it++)
2124  {
2125  // The transitions to be added to the current state
2126  HfstTransitions new_transitions;
2127 
2128  // Go through all transitions of the current state
2129  for (unsigned int i=0; i < it->size(); i++)
2130  {
2131  HfstTransition<C> &tr_it = it->operator[](i);
2132 
2133  // If a match was found, substitute:
2134  if (tr_it.get_input_number() == old_input_number &&
2135  tr_it.get_output_number() == old_output_number)
2136  {
2137  substitution_performed=true;
2138 
2139  // change the current transition so that it is equivalent
2140  // to the first substituting transition in new_sps
2141  typename HfstSymbolPairSet::const_iterator IT
2142  = new_sps.begin();
2143 
2144  HfstTransition<C> tr
2145  (tr_it.get_target_state(),
2146  C::get_number(IT->first),
2147  C::get_number(IT->second),
2148  tr_it.get_weight(),
2149  true);
2150 
2151  it->operator[](i) = tr;
2152 
2153  // and schedule the rest of the substituting transitions
2154  // in new_sps to be added to the current state.
2155  while (IT != new_sps.end())
2156  {
2157  HfstTransition<C> TR
2158  (tr_it.get_target_state(),
2159  C::get_number(IT->first),
2160  C::get_number(IT->second),
2161  tr_it.get_weight(),
2162  true);
2163 
2164  new_transitions.push_back(TR);
2165  IT++;
2166  }
2167 
2168  } // (substitution and scheduling done)
2169 
2170  } // (all transitions of a state gone through)
2171 
2172  // Add the new transitions to the current state
2173  for (typename HfstTransitions
2174  ::const_iterator NIT = new_transitions.begin();
2175  NIT != new_transitions.end(); NIT++)
2176  {
2177  it->push_back(*NIT);
2178  }
2179 
2180  } // ( ----- all states in the graph gone through ----- )
2181 
2182  // If at least one substitution was performed, add all the
2183  // symbols in the substituting transitions to the alphabet of
2184  // the graph.
2185  if (substitution_performed) {
2186  add_symbols_to_alphabet(new_sps);
2187  }
2188 
2189  // Remove symbols that were removed because of substitutions
2190  // (or didn't occur in the graph in the first place)
2191  std::set<unsigned int> syms;
2192  /*for (typename HfstSymbolPairSet::const_iterator it = new_sps.begin();
2193  it != new_sps.end(); it++) {
2194  syms.insert(C::get_number(it->first));
2195  syms.insert(C::get_number(it->second)); ?????????
2196  }*/
2197  syms.insert(old_input_number);
2198  syms.insert(old_output_number);
2199  prune_alphabet_after_substitution(syms);
2200 
2201  return;
2202  }
2203 
2204 
2205  /* A function that performs in-place-substitution in the graph. */
2206  void substitute_(bool (*func)
2207  (const HfstSymbolPair &sp, HfstSymbolPairSet &sps))
2208  {
2209  // ----- Go through all states. -----
2210  for (iterator it = begin(); it != end(); it++)
2211  {
2212  // The transitions to be added to the current state.
2213  HfstTransitions new_transitions;
2214 
2215  // Go through all transitions.
2216  for (unsigned int i=0; i < it->size(); i++)
2217  {
2218  HfstTransition<C> &tr_it = it->operator[](i);
2219 
2220  HfstSymbolPair transition_symbol_pair
2221  (tr_it.get_input_symbol(),
2222  tr_it.get_output_symbol());
2223  HfstSymbolPairSet substituting_transitions;
2224 
2225  // If a substitution is to be performed,
2226  bool perform_substitution=false;
2227  try {
2228  perform_substitution =
2229  (*func)(transition_symbol_pair, substituting_transitions);
2230  }
2231  catch (const HfstException & e)
2232  {
2233  throw e;
2234  }
2235  if (perform_substitution)
2236  {
2237  // change the transition to the first element
2238  // in new_sps
2239  typename HfstSymbolPairSet::const_iterator IT
2240  = substituting_transitions.begin();
2241 
2242  if (not C::is_valid_symbol(IT->first) ||
2243  not C::is_valid_symbol(IT->second) )
2245  (EmptyStringException,
2246  "HfstTransitionGraph::substitute");
2247 
2248  HfstTransition<C> tr
2249  (tr_it.get_target_state(),
2250  IT->first,
2251  IT->second,
2252  tr_it.get_weight());
2253 
2254  it->operator[](i) = tr;
2255 
2256  add_symbol_to_alphabet(IT->first);
2257  add_symbol_to_alphabet(IT->second);
2258 
2259  // and schedule the rest of the elements in new_sps
2260  // to be added to this state.
2261  while (IT != substituting_transitions.end())
2262  {
2263 
2264  if (not C::is_valid_symbol(IT->first) ||
2265  not C::is_valid_symbol(IT->second) )
2267  (EmptyStringException,
2268  "HfstTransitionGraph::substitute");
2269 
2270  HfstTransition<C> TR
2271  (tr_it.get_target_state(),
2272  IT->first,
2273  IT->second,
2274  tr_it.get_weight());
2275 
2276  new_transitions.push_back(TR);
2277 
2278  add_symbol_to_alphabet(IT->first);
2279  add_symbol_to_alphabet(IT->second);
2280 
2281  IT++;
2282  }
2283 
2284  } // Substitution and scheduling performed.
2285 
2286  } // All transitions gone through.
2287 
2288  // Add the new transitions.
2289  for (typename HfstTransitions
2290  ::const_iterator NIT = new_transitions.begin();
2291  NIT != new_transitions.end(); NIT++)
2292  {
2293  it->push_back(*NIT);
2294  }
2295 
2296  } // ----- All states gone through. -----
2297 
2298  return;
2299  }
2300 
2301  public:
2302 
2303  /* ----------------------------------------
2304  The public substitution functions.
2305  ---------------------------------------- */
2306 
2311  substitute(const HfstSymbol &old_symbol,
2312  const HfstSymbol &new_symbol,
2313  bool input_side=true,
2314  bool output_side=true) {
2315 
2316  if (not C::is_valid_symbol(old_symbol) ||
2317  not C::is_valid_symbol(new_symbol) ) {
2319  (EmptyStringException,
2320  "HfstTransitionGraph::substitute"); }
2321 
2322  // If a symbol is substituted with itself, do nothing.
2323  if (old_symbol == new_symbol)
2324  return *this;
2325  // If the old symbol is not known to the graph, do nothing.
2326  if (alphabet.find(old_symbol) == alphabet.end())
2327  return *this;
2328 
2329  // Remove the symbol to be substituted from the alphabet
2330  // if the substitution is made on both sides.
2331  if (input_side && output_side) {
2332  /* Special symbols are always included in the alphabet */
2333  if (not is_epsilon(old_symbol) &&
2334  not is_unknown(old_symbol) &&
2335  not is_identity(old_symbol)) {
2336  alphabet.erase(old_symbol); }
2337  }
2338  // Insert the substituting symbol to the alphabet.
2339  alphabet.insert(new_symbol);
2340 
2341  substitute_(old_symbol, new_symbol, input_side, output_side);
2342 
2343  return *this;
2344  }
2345 
2346  HfstTransitionGraph &substitute_symbols
2347  (const HfstSymbolSubstitutions &substitutions)
2348  { return this->substitute(substitutions); }
2349 
2352  (const HfstSymbolSubstitutions &substitutions)
2353  {
2354  // add symbols to the global HfstTransition alphabet
2355  for (HfstSymbolSubstitutions::const_iterator it
2356  = substitutions.begin();
2357  it != substitutions.end(); it++)
2358  {
2359  (void)get_symbol_number(it->first);
2360  (void)get_symbol_number(it->second);
2361  }
2362 
2363  // how symbol numbers are substituted:
2364  // substitutions_[from_symbol] = to_symbol
2365  std::vector<unsigned int> substitutions_;
2366  // marker that means that no substitution is made
2367  unsigned int no_substitution = C::get_max_number()+substitutions.size()+1;
2368  substitutions_.resize
2369  (C::get_max_number()+1, no_substitution);
2370  for (HfstSymbolSubstitutions::const_iterator it
2371  = substitutions.begin();
2372  it != substitutions.end(); it++)
2373  {
2374  HfstNumber from_symbol = get_symbol_number(it->first);
2375  HfstNumber to_symbol = get_symbol_number(it->second);
2376 
2377  substitutions_.at(from_symbol) = to_symbol;
2378  }
2379 
2380  substitute_(substitutions_, no_substitution);
2381 
2382  return *this;
2383  }
2384 
2385  HfstTransitionGraph &substitute_symbol_pairs
2386  (const HfstSymbolPairSubstitutions &substitutions)
2387  { return this->substitute(substitutions); }
2388 
2396  (const HfstSymbolPairSubstitutions &substitutions)
2397  {
2398  // Convert from symbols to numbers
2399  HfstNumberPairSubstitutions substitutions_;
2400  for (HfstSymbolPairSubstitutions::const_iterator it
2401  = substitutions.begin();
2402  it != substitutions.end(); it++)
2403  {
2404  HfstNumberPair from_transition
2405  (get_symbol_number(it->first.first),
2406  get_symbol_number(it->first.second));
2407  HfstNumberPair to_transition
2408  (get_symbol_number(it->second.first),
2409  get_symbol_number(it->second.second));
2410  substitutions_[from_transition] = to_transition;
2411  }
2412 
2413  substitute_(substitutions_);
2414 
2415  return *this;
2416  }
2417 
2421  (const HfstSymbolPair &sp, const HfstSymbolPairSet &sps)
2422  {
2423  if (not C::is_valid_symbol(sp.first) ||
2424  not C::is_valid_symbol(sp.second) ) {
2426  (EmptyStringException,
2427  "HfstTransitionGraph::substitute"); }
2428 
2429  for (typename HfstSymbolPairSet::const_iterator it = sps.begin();
2430  it != sps.end(); it++)
2431  {
2432  if (not C::is_valid_symbol(it->first) ||
2433  not C::is_valid_symbol(it->second) ) {
2435  (EmptyStringException,
2436  "HfstTransitionGraph::substitute"); }
2437  }
2438 
2439  substitute_(sp, sps);
2440 
2441  return *this;
2442  }
2443 
2447  (const HfstSymbolPair &old_pair,
2448  const HfstSymbolPair &new_pair)
2449  {
2450  if (not C::is_valid_symbol(old_pair.first) ||
2451  not C::is_valid_symbol(new_pair.first) ||
2452  not C::is_valid_symbol(old_pair.second) ||
2453  not C::is_valid_symbol(new_pair.second) ) {
2455  (EmptyStringException,
2456  "HfstTransitionGraph::substitute"); }
2457 
2458  StringPairSet new_pair_set;
2459  new_pair_set.insert(new_pair);
2460  substitute_(old_pair, new_pair_set);
2461 
2462  return *this;
2463  }
2464 
2474  substitute(bool (*func)
2475  (const HfstSymbolPair &sp, HfstSymbolPairSet &sps) )
2476  {
2477  substitute_(func);
2478  return *this;
2479  }
2480 
2481 
2482 
2483 
2484  /* ----------------------------------------------------
2485  Substitute string pair with a transition graph
2486  ---------------------------------------------------- */
2487 
2488  protected:
2489  /* Used in function
2490  substitute(const StringPair&, HfstTransitionGraph&) */
2491  struct substitution_data
2492  {
2493  HfstState origin_state;
2494  HfstState target_state;
2495  typename C::WeightType weight;
2496  HfstTransitionGraph * substituting_graph;
2497 
2498  substitution_data(HfstState origin,
2499  HfstState target,
2500  typename C::WeightType weight,
2501  HfstTransitionGraph * substituting)
2502  {
2503  origin_state=origin;
2504  target_state=target;
2505  this->weight=weight;
2506  substituting_graph=substituting;
2507  }
2508  };
2509 
2510  /* Used in function substitute(const StringPair&,
2511  HfstTransitionGraph&)
2512  Add a copy of substituting graph with epsilon transitions between
2513  states and with weight as defined in \a sub. */
2514  void add_substitution(const substitution_data &sub) {
2515  // Epsilon transition to initial state of \a graph
2516  HfstState s = add_state();
2517  HfstTransition <C> epsilon_transition
2518  (s, C::get_epsilon(), C::get_epsilon(),
2519  sub.weight);
2520  add_transition(sub.origin_state, epsilon_transition);
2521 
2522  /* Offset between state numbers */
2523  unsigned int offset = s;
2524 
2525  // Copy \a graph
2526  const HfstTransitionGraph * graph = sub.substituting_graph;
2527  HfstState source_state=0;
2528  for (const_iterator it = graph->begin();
2529  it != graph->end(); it++)
2530  {
2531  for (typename HfstTransitions::const_iterator tr_it
2532  = it->begin();
2533  tr_it != it->end(); tr_it++)
2534  {
2535  C data = tr_it->get_transition_data();
2536 
2537  HfstTransition <C> transition
2538  (tr_it->get_target_state() + offset,
2539  data.get_input_symbol(),
2540  data.get_output_symbol(),
2541  data.get_weight());
2542 
2543  add_transition(source_state + offset, transition);
2544  }
2545  source_state++;
2546  }
2547 
2548  // Epsilon transitions from final states of \a graph
2549  for (typename FinalWeightMap::const_iterator it
2550  = graph->final_weight_map.begin();
2551  it != graph->final_weight_map.end(); it++)
2552  {
2553  HfstTransition <C> epsilon_transition
2554  (sub.target_state, C::get_epsilon(), C::get_epsilon(),
2555  it->second);
2556  add_transition(it->first + offset, epsilon_transition);
2557  }
2558  }
2559 
2560 
2561  public:
2562 
2583  const HfstTransitionGraph &graph) {
2584 
2585  if ( not ( C::is_valid_symbol(sp.first) &&
2586  C::is_valid_symbol(sp.second) ) ) {
2588  (EmptyStringException,
2589  "HfstTransitionGraph::substitute(const HfstSymbolPair&, "
2590  "const HfstTransitionGraph&)");
2591  }
2592 
2593 
2594  // If neither symbol to be substituted is known to the graph,
2595  // do nothing.
2596  if (alphabet.find(sp.first) == alphabet.end() &&
2597  alphabet.find(sp.second) == alphabet.end())
2598  return *this;
2599 
2600  // Where the substituting copies of substituting graphs
2601  // are inserted (source state, target state, weight)
2602  std::vector<substitution_data> substitutions;
2603 
2604  // Go through all states
2605  HfstState source_state=0;
2606  for (iterator it = begin(); it != end(); it++)
2607  {
2608 
2609  // The transitions that are substituted, i.e. removed
2610  std::vector<typename HfstTransitions::iterator>
2611  old_transitions;
2612 
2613  // Go through all transitions
2614  for (typename HfstTransitions::iterator tr_it
2615  = it->begin();
2616  tr_it != it->end(); tr_it++)
2617  {
2618  C data = tr_it->get_transition_data();
2619 
2620  // Whether there is anything to substitute
2621  // in this transition
2622  if (data.get_input_symbol() == sp.first &&
2623  data.get_output_symbol() == sp.second)
2624  {
2625  // schedule a substitution
2626  substitutions.push_back(substitution_data
2627  (source_state,
2628  tr_it->get_target_state(),
2629  data.get_weight(),
2630  const_cast<HfstTransitionGraph *>(&graph)));
2631  // schedule the old transition to be deleted
2632  old_transitions.push_back(tr_it);
2633  }
2634  // (one transition gone through)
2635  }
2636  // (all transitions in a state gone through)
2637 
2638  // Remove the substituted transitions
2639  for (typename std::vector<typename
2640  HfstTransitions::iterator>::iterator IT =
2641  old_transitions.begin();
2642  IT != old_transitions.end(); IT++) {
2643  it->erase(*IT);
2644  }
2645 
2646  source_state++;
2647  }
2648  // (all states gone trough)
2649 
2650  // Add the substitutions
2651  for (typename std::vector<substitution_data>::iterator IT
2652  = substitutions.begin();
2653  IT != substitutions.end(); IT++)
2654  {
2655  add_substitution(*IT);
2656  }
2657  return *this;
2658  }
2659 
2660 
2661 
2662 
2663 
2664 
2665 
2666 
2667 
2668 
2669 
2670 
2671 
2672 
2673  std::string weight2marker(float weight)
2674  {
2675  std::ostringstream o;
2676  o << weight;
2677  return std::string("@") + o.str() + std::string("@");
2678  }
2679 
2680  // HERE
2681  HfstTransitionGraph & substitute_weights_with_markers() {
2682 
2683  // Go through all current states (we are going to add them)
2684  HfstState limit = state_vector.size();
2685  for (HfstState state = 0; state < limit; state++)
2686  {
2687  // The transitions that are substituted
2688  std::stack<typename HfstTransitions::iterator>
2689  old_transitions;
2690  // The transitions that will substitute
2691  std::vector<HfstTransition <C> > new_transitions;
2692 
2693  // Go through all transitions
2694  for (typename HfstTransitions::iterator tr_it
2695  = state_vector[state].begin();
2696  tr_it != state_vector[state].end(); tr_it++)
2697  {
2698  C data = tr_it->get_transition_data();
2699 
2700  // Whether there is anything to substitute
2701  // in this transition
2702  if (data.get_weight() != 0 )
2703  {
2704  // schedule a substitution
2705  new_transitions.push_back
2706  (HfstTransition <C> (tr_it->get_target_state(),
2707  data.get_input_symbol(),
2708  data.get_output_symbol(),
2709  data.get_weight()));
2710  // schedule the old transition to be deleted
2711  old_transitions.push(tr_it);
2712  }
2713  // (one transition gone through)
2714  }
2715  // (all transitions in a state gone through)
2716 
2717  // Remove the substituted transitions
2718  while (! old_transitions.empty()) {
2719  state_vector[state].erase(old_transitions.top());
2720  old_transitions.pop();
2721  }
2722 
2723  // Add the substituting transitions
2724  for (typename std::vector<HfstTransition <C> >::iterator IT
2725  = new_transitions.begin();
2726  IT != new_transitions.end(); IT++)
2727  {
2728  HfstState new_state = add_state();
2729  std::string marker = weight2marker(IT->get_weight());
2730  std::cerr << "got marker '" << marker << "'" << std::endl;
2731  HfstTransition <C> marker_transition(IT->get_target_state(),
2732  marker,
2733  marker,
2734  0);
2735  HfstTransition <C> new_transition(new_state,
2736  IT->get_input_symbol(),
2737  IT->get_output_symbol(),
2738  0);
2739  add_transition(state, new_transition);
2740  add_transition(new_state, marker_transition);
2741  }
2742 
2743  }
2744  // (all states gone trough)
2745 
2746  // Go through the final states
2747  std::set<HfstState> final_states_to_remove;
2748 
2749  for (typename FinalWeightMap::iterator fin_it = final_weight_map.begin();
2750  fin_it != final_weight_map.end(); fin_it++)
2751  {
2752  if (fin_it->second != 0)
2753  {
2754  HfstState new_state = add_state();
2755  set_final_weight(new_state, 0);
2756  std::string marker = weight2marker(fin_it->second);
2757  HfstTransition <C> epsilon_transition(new_state,
2758  marker,
2759  marker,
2760  0);
2761  add_transition(fin_it->first, epsilon_transition);
2762  final_states_to_remove.insert(fin_it->first);
2763  }
2764  }
2765  for (std::set<HfstState>::iterator it = final_states_to_remove.begin();
2766  it != final_states_to_remove.end(); it++)
2767  {
2768  final_weight_map.erase(*it);
2769  }
2770 
2771  return *this;
2772  }
2773 
2774  // ####
2775  // another version of substitute for internal use..
2776  // ####
2777  typedef std::map<HfstSymbol, HfstTransitionGraph> SubstMap;
2778 
2780  substitute(SubstMap & substitution_map,
2781  bool harmonize) {
2782 
2783  bool symbol_found = false;
2784  for (typename SubstMap::const_iterator it = substitution_map.begin();
2785  it != substitution_map.end(); it++)
2786  {
2787  if ( not ( C::is_valid_symbol(it->first) ))
2788  {
2789  HFST_THROW_MESSAGE(EmptyStringException,
2790  "HfstTransitionGraph::substitute "
2791  "(const std::map<HfstSymbol, HfstTransitionGraph> &)");
2792  }
2793  if (!symbol_found && alphabet.find(it->first) != alphabet.end())
2794  {
2795  symbol_found = true;
2796  }
2797  }
2798 
2799  // If none of the symbols to be substituted is known to the graph,
2800  // do nothing.
2801  if (!symbol_found)
2802  {
2803  return *this;
2804  }
2805 
2806  std::set<String> substitutions_performed_for_symbols;
2807 
2808  // Where the substituting copies of graphs
2809  // are inserted (source state, target state, weight)
2810  std::vector<substitution_data> substitutions;
2811 
2812  // Go through all states
2813  HfstState source_state=0;
2814  for (iterator it = begin(); it != end(); it++)
2815  {
2816 
2817  // The transitions that are substituted, i.e. removed
2818  std::stack<typename HfstTransitions::iterator>
2819  old_transitions;
2820 
2821  // Go through all transitions
2822  for (typename HfstTransitions::iterator tr_it
2823  = it->begin();
2824  tr_it != it->end(); tr_it++)
2825  {
2826  C data = tr_it->get_transition_data();
2827 
2828  // Whether there is anything to substitute
2829  // in this transition
2830  String istr = data.get_input_symbol();
2831  String ostr = data.get_output_symbol();
2832  typename SubstMap::iterator map_it_input = substitution_map.find(istr);
2833  typename SubstMap::iterator map_it_output = substitution_map.find(ostr);
2834 
2835  if (map_it_input == substitution_map.end() &&
2836  map_it_output == substitution_map.end())
2837  {
2838  ;
2839  }
2840  else if (istr != ostr)
2841  {
2842  std::string msg("symbol to be substituted must not occur only on one side of transition");
2844  }
2845  else
2846  {
2847  // schedule a substitution
2848  substitution_data sd
2849  (source_state,
2850  tr_it->get_target_state(),
2851  data.get_weight(),
2852  &(map_it_input->second));
2853  substitutions.push_back(sd);
2854  // schedule the old transition to be deleted
2855  old_transitions.push(tr_it);
2856  // ...
2857  substitutions_performed_for_symbols.insert(istr);
2858  }
2859  // (one transition gone through)
2860  }
2861  // (all transitions in a state gone through)
2862 
2863  // Remove the substituted transitions
2864  while (!old_transitions.empty())
2865  {
2866  it->erase(old_transitions.top());
2867  old_transitions.pop();
2868  }
2869 
2870  source_state++;
2871  }
2872  // (all states gone trough)
2873 
2874  // Remove all symbols that were substituted
2875  for (StringSet::const_iterator sym_it = substitutions_performed_for_symbols.begin();
2876  sym_it != substitutions_performed_for_symbols.end(); sym_it++)
2877  {
2878  if (*sym_it != "@_EPSILON_SYMBOL_@" && *sym_it != "@_UNKNOWN_SYMBOL_@" && *sym_it != "@_IDENTITY_SYMBOL_@")
2879  this->remove_symbol_from_alphabet(*sym_it);
2880  }
2881 
2882  // Harmonize the resulting and the substituting graphs, if needed
2883  if (harmonize)
2884  {
2885  for (StringSet::iterator sym_it = substitutions_performed_for_symbols.begin();
2886  sym_it != substitutions_performed_for_symbols.end(); sym_it++)
2887  {
2888  this->harmonize(substitution_map.at(*sym_it));
2889  }
2890  }
2891 
2892  // Add the substitutions
2893  for (typename std::vector<substitution_data>::iterator IT
2894  = substitutions.begin();
2895  IT != substitutions.end(); IT++)
2896  {
2897  add_substitution(*IT);
2898  }
2899 
2900  return *this;
2901  }
2902 
2903 
2904 
2905  bool marker2weight(const std::string & str, float & weight)
2906  {
2907  if (str.size() < 3)
2908  return false;
2909  if (str.at(0) != '@' || str.at(str.size()-1) != '@')
2910  return false;
2911 
2912  std::string weight_string = str.substr(1, str.size()-2);
2913  std::stringstream sstream(weight_string);
2914  sstream >> weight;
2915  if (sstream.fail()) {
2916  return false;
2917  }
2918  return true;
2919  }
2920 
2921  // HERE
2922  HfstTransitionGraph & substitute_markers_with_weights() {
2923 
2924  // Go through all states
2925  HfstState limit = state_vector.size();
2926  for (HfstState state = 0; state < limit; state++)
2927  {
2928  // The transitions that are substituted
2929  std::stack<typename HfstTransitions::iterator>
2930  old_transitions;
2931  // The transitions that will substitute
2932  std::vector<HfstTransition <C> > new_transitions;
2933 
2934  // Go through all transitions
2935  for (typename HfstTransitions::iterator tr_it
2936  = state_vector[state].begin();
2937  tr_it != state_vector[state].end(); tr_it++)
2938  {
2939  C data = tr_it->get_transition_data();
2940 
2941  float weight = 0;
2942  // Whether there is anything to substitute
2943  // in this transition
2944  if ( (!marker2weight(data.get_input_symbol(), weight)) &&
2945  marker2weight(data.get_output_symbol(), weight) )
2946  {
2947  std::cerr << "got weight '" << weight << "'" << std::endl;
2948  // schedule a substitution
2949  new_transitions.push_back
2950  (HfstTransition <C> (tr_it->get_target_state(),
2951  data.get_input_symbol(),
2952  hfst::internal_epsilon,
2953  weight));
2954  // schedule the old transition to be deleted
2955  old_transitions.push(tr_it);
2956  }
2957  // or the transition must be deleted
2958  else if (marker2weight(data.get_input_symbol(), weight) &&
2959  marker2weight(data.get_output_symbol(), weight) )
2960  {
2961  std::cerr << "got weight '" << weight << "'" << std::endl;
2962  // schedule the old transition to be deleted
2963  old_transitions.push(tr_it);
2964  }
2965  else {}
2966  // (one transition gone through)
2967  }
2968  // (all transitions in a state gone through)
2969 
2970  // Remove the substituted transitions
2971  while (! old_transitions.empty()) {
2972  state_vector[state].erase(old_transitions.top());
2973  old_transitions.pop();
2974  }
2975 
2976  // Add the substituting transitions
2977  for (typename std::vector<HfstTransition <C> >::iterator IT
2978  = new_transitions.begin();
2979  IT != new_transitions.end(); IT++)
2980  {
2981  state_vector[state].push_back(*IT);
2982  }
2983 
2984  }
2985  // (all states gone trough)
2986 
2987  std::stack<StringSet::iterator> weight_markers;
2988  for (StringSet::iterator it = alphabet.begin();
2989  it != alphabet.end(); it++)
2990  {
2991  float foo;
2992  if (marker2weight(*it, foo))
2993  {
2994  weight_markers.push(it);
2995  }
2996  }
2997  while (! weight_markers.empty())
2998  {
2999  alphabet.erase(weight_markers.top());
3000  weight_markers.pop();
3001  }
3002 
3003  return *this;
3004  }
3005 
3006 
3007 
3008 
3009  /* ----------------------------
3010  Insert freely functions
3011  ---------------------------- */
3012 
3013 
3017  (const HfstSymbolPair &symbol_pair, typename C::WeightType weight)
3018  {
3019  if ( not ( C::is_valid_symbol(symbol_pair.first) &&
3020  C::is_valid_symbol(symbol_pair.second) ) ) {
3022  (EmptyStringException,
3023  "HfstTransitionGraph::insert_freely"
3024  "(const HfstSymbolPair&, W)");
3025  }
3026 
3027  alphabet.insert(symbol_pair.first);
3028  alphabet.insert(symbol_pair.second);
3029 
3030  HfstState source_state=0;
3031  for (iterator it = begin(); it != end(); it++) {
3032  HfstTransition <C> tr( source_state, symbol_pair.first,
3033  symbol_pair.second, weight );
3034  it->push_back(tr);
3035  source_state++;
3036  }
3037 
3038  return *this;
3039  }
3040 
3044  (const HfstSymbolPairSet &symbol_pairs,
3045  typename C::WeightType weight)
3046  {
3047  for (typename HfstSymbolPairSet::const_iterator symbols_it
3048  = symbol_pairs.begin();
3049  symbols_it != symbol_pairs.end(); symbols_it++)
3050  {
3051  if ( not ( C::is_valid_symbol(symbols_it->first) &&
3052  C::is_valid_symbol(symbols_it->second) ) ) {
3054  (EmptyStringException,
3055  "HfstTransitionGraph::insert_freely"
3056  "(const HfstSymbolPairSet&, W)");
3057  }
3058 
3059  alphabet.insert(symbols_it->first);
3060  alphabet.insert(symbols_it->second);
3061  }
3062 
3063  HfstState source_state=0;
3064  for (iterator it = begin(); it != end(); it++)
3065  {
3066  for (typename HfstSymbolPairSet::const_iterator symbols_it
3067  = symbol_pairs.begin();
3068  symbols_it != symbol_pairs.end(); symbols_it++)
3069  {
3070  HfstTransition <C> tr( source_state, symbols_it->first,
3071  symbols_it->second, weight );
3072  it->push_back(tr);
3073  }
3074  source_state++;
3075  }
3076 
3077  return *this;
3078  }
3079 
3083  (const HfstTransitionGraph &graph)
3084  {
3085  HfstSymbol marker_this = C::get_marker(alphabet);
3086  HfstSymbol marker_graph = C::get_marker(alphabet);
3087  HfstSymbol marker = marker_this;
3088  if (marker_graph > marker)
3089  marker = marker_graph;
3090 
3091  HfstSymbolPair marker_pair(marker, marker);
3092  insert_freely(marker_pair, 0);
3093  substitute(marker_pair, graph);
3094  alphabet.erase(marker); // TODO: fix
3095 
3096  return *this;
3097  }
3098 
3099 
3100  /* -------------------------------
3101  Harmonization function
3102  ------------------------------- */
3103 
3131  {
3132  HarmonizeUnknownAndIdentitySymbols foo(*this, another);
3133  return *this;
3134  }
3135 
3136 
3137  /* -------------------------------
3138  Disjunction functions
3139  ------------------------------- */
3140 
3141  protected:
3142  /* Disjunct the transition of path \a spv pointed by \a it
3143  to state \a s. If the transition does not exist in the graph,
3144  it is created as well as its target state.
3145 
3146  @return The final state of path \a spv, when \a it is at end. */
3147  HfstState disjunct(const StringPairVector &spv,
3148  StringPairVector::const_iterator &it,
3149  HfstState s)
3150  {
3151  // Path inserted, return the final state on this path
3152  if (it == spv.end()) {
3153  return s;
3154  }
3155 
3156  HfstTransitions tr = state_vector[s];
3157  bool transition_found=false;
3158  /* The target state of the transition followed or added */
3159  HfstState next_state;
3160 
3161  // Find the transition
3162  // (Searching is slow?)
3163  for (typename HfstTransitions::iterator tr_it = tr.begin();
3164  tr_it != tr.end(); tr_it++)
3165  {
3166  C data = tr_it->get_transition_data();
3167  if (data.get_input_symbol().compare(it->first) == 0 &&
3168  data.get_output_symbol().compare(it->second) == 0)
3169  {
3170  transition_found=true;
3171  next_state = tr_it->get_target_state();
3172  break;
3173  }
3174  }
3175 
3176  // If not found, create the transition
3177  if (not transition_found)
3178  {
3179  next_state = add_state();
3180  HfstTransition <C> transition(next_state, it->first,
3181  it->second, 0);
3182  add_transition(s, transition);
3183  }
3184 
3185  // Advance to the next transition on path
3186  it++;
3187  return disjunct(spv, it, next_state);
3188  }
3189 
3190  public:
3191 
3211  HfstTransitionGraph &disjunct
3212  (const StringPairVector &spv, typename C::WeightType weight)
3213  {
3214  StringPairVector::const_iterator it = spv.begin();
3215  HfstState final_state = disjunct(spv, it, INITIAL_STATE);
3216 
3217  // Set the weight of final state
3218  if (is_final_state(final_state))
3219  {
3220  float old_weight = get_final_weight(final_state);
3221  if (old_weight < weight)
3222  return *this; /* The same path with smaller weight remains */
3223  }
3224  set_final_weight(final_state, weight);
3225  return *this;
3226  }
3227 
3228  bool is_special_symbol(const std::string & symbol)
3229  {
3230  if (symbol.size() < 2)
3231  return false;
3232  if (symbol[0] == '@' && symbol[1] == '_')
3233  return true;
3234  return false;
3235  }
3236 
3237  HfstTransitionGraph &complete()
3238  {
3239  HfstState failure_state = add_state();
3240  HfstState current_state = 0;
3241 
3242  for (iterator it = begin(); it != end(); it++)
3243  {
3244  std::set<HfstSymbol> symbols_present;
3245 
3246  for (typename HfstTransitions::iterator tr_it
3247  = it->begin();
3248  tr_it != it->end(); tr_it++)
3249  {
3250  C data = tr_it->get_transition_data();
3251 
3252  if (data.get_input_symbol() != data.get_output_symbol())
3253  {
3255  }
3256  symbols_present.insert(data.get_input_symbol());
3257  }
3258  for (std::set<std::string>::const_iterator alpha_it = alphabet.begin();
3259  alpha_it != alphabet.end(); alpha_it++)
3260  {
3261  if (symbols_present.find(*alpha_it) ==
3262  symbols_present.end() &&
3263  ! is_special_symbol(*alpha_it))
3264  {
3266  (current_state,
3267  HfstBasicTransition(failure_state, *alpha_it, *alpha_it, 0));
3268  }
3269  }
3270  current_state++;
3271  }
3272  }
3273 
3274  StringSet get_flags() const
3275  {
3276  StringSet flags;
3277  for (StringSet::const_iterator it = alphabet.begin();
3278  it != alphabet.end(); it++)
3279  {
3280  if (FdOperation::is_diacritic(*it)) {
3281  flags.insert(*it);
3282  }
3283  }
3284  return flags;
3285  }
3286 
3287  // Whether symbol \a symbol must be purged from transitions and alphabet
3288  // of a transducer after \a flag has been eliminated from the transducer.
3289  // If \a flag is the empty string, all flags have been eliminated.
3290  bool purge_symbol(const std::string & symbol, const std::string & flag)
3291  {
3292  if (! FdOperation::is_diacritic(symbol))
3293  return false;
3294  if (flag == "")
3295  return true;
3296  else if (FdOperation::get_feature(symbol) == flag)
3297  return true;
3298  return false;
3299  }
3300 
3301  // Replace arcs in \a transducer that use flag \a flag with epsilon arcs
3302  // and remove \a flag from alphabet of \a transducer. If \a flag is the empty
3303  // string, replace/remove all flags.
3304  void flag_purge(const std::string & flag)
3305  {
3306  // (1) Go through all states and transitions
3307  for (iterator it = begin(); it != end(); it++)
3308  {
3309  for (unsigned int i=0; i < it->size(); i++)
3310  {
3311  HfstTransition<C> &tr_it = it->operator[](i);
3312 
3313  if ( purge_symbol(tr_it.get_input_symbol(), flag) ||
3314  purge_symbol(tr_it.get_output_symbol(), flag) )
3315  {
3316  // change the current transition
3317  HfstTransition<C> tr
3318  (tr_it.get_target_state(), "@_EPSILON_SYMBOL_@",
3319  "@_EPSILON_SYMBOL_@", tr_it.get_weight());
3320  it->operator[](i) = tr;
3321  }
3322  }
3323  }
3324  // (2) Go through the alphabet
3325  StringSet extra_symbols;
3326  for (StringSet::const_iterator it = alphabet.begin();
3327  it != alphabet.end(); it++)
3328  {
3329  if (purge_symbol(*it, flag))
3330  extra_symbols.insert(*it);
3331  }
3332  // remove symbols
3333  remove_symbols_from_alphabet(extra_symbols);
3334  }
3335 
3336  /* A topological sort. */
3337  struct TopologicalSort
3338  {
3339  std::vector<int> distance_of_state;
3340  std::vector<std::set<HfstState> > states_at_distance;
3341 
3342  /* Initialize the TopologicalSort by reserving space for a transducer
3343  with biggest state number \a biggest_state_number, */
3344  void set_biggest_state_number(unsigned int biggest_state_number)
3345  {
3346  distance_of_state = std::vector<int>(biggest_state_number+1, -1);
3347  }
3348 
3349  /* Set the maximum distance of \a state to \a distance, */
3350  void set_state_at_distance(HfstState state, unsigned int distance,
3351  bool overwrite)
3352  {
3353  // see that 'state' does not exceed the maximum state number given in initialization
3354  if (state > (distance_of_state.size() - 1))
3355  {
3356  std::cerr << "ERROR in TopologicalSort::set_state_at_distance: first argument ("
3357  << state << ") is out of range (should be < " << distance_of_state.size()
3358  << ")" << std::endl;
3359  }
3360  // if there is nothing on index 'state',
3361  // push back empty sets of states up to index 'state', including
3362  while (distance + 1 > (unsigned int)states_at_distance.size())
3363  {
3364  std::set<HfstState> empty_set;
3365  states_at_distance.push_back(empty_set);
3366  }
3367  // if there was previous distance defined for 'state', erase it, if needed
3368  int previous_distance = distance_of_state.at(state);
3369  if (previous_distance != -1 && previous_distance != distance && overwrite)
3370  {
3371  states_at_distance.at(previous_distance).erase(state);
3372  }
3373  // set state and distance
3374  states_at_distance.at(distance).insert(state);
3375  distance_of_state.at(state) = distance;
3376  }
3377 
3378  /* The states that have a maximum distance of \a distance. */
3379  const std::set<HfstState> & get_states_at_distance(unsigned int distance)
3380  {
3381  // if there is nothing on index 'state',
3382  // push back empty sets of states up to index 'state', including
3383  while (distance > (states_at_distance.size() - 1))
3384  {
3385  std::set<HfstState> empty_set;
3386  states_at_distance.push_back(empty_set);
3387  }
3388  return states_at_distance.at(distance);
3389  }
3390  };
3391 
3392  enum SortDistance { MaximumDistance, MinimumDistance };
3393 
3394  /*
3395  Get a topological (maximum distance) sort of this graph.
3396  @return A vector of sets of states. At each vector index ind, the
3397  result contains the set of all states whose (maximum) distance from
3398  the start state is ind.
3399  */
3400  std::vector<std::set<HfstState> > topsort(SortDistance dist) const
3401  {
3402  typedef std::set<HfstState>::const_iterator StateIt;
3403  unsigned int current_distance = 0; // topological distance
3404  TopologicalSort TopSort;
3405  TopSort.set_biggest_state_number(state_vector.size()-1);
3406  TopSort.set_state_at_distance(0,current_distance,(dist == MaximumDistance));
3407  bool new_states_found = false; // end condition for do-while loop
3408 
3409  do
3410  {
3411  new_states_found = false;
3412  // states that are accessible from the current set of states
3413  std::set<HfstState> new_states;
3414 
3415  // go through all states at current distance
3416  const std::set<HfstState> & states =
3417  TopSort.get_states_at_distance(current_distance);
3418  for (StateIt state_it = states.begin();
3419  state_it != states.end(); state_it++)
3420  {
3421  // go through all transitions of each state
3422  const HfstTransitions & transitions
3423  = this->state_vector.at(*state_it);
3424  for (typename HfstTransitions::const_iterator transition_it
3425  = transitions.begin();
3426  transition_it != transitions.end(); transition_it++)
3427  {
3428  new_states_found = true;
3429  new_states.insert(transition_it->get_target_state());
3430  }
3431  // all transitions gone through
3432  }
3433  // all states gone through
3434 
3435  // set each accessible state at distance one higher than the
3436  // current distance
3437  for (StateIt it = new_states.begin();
3438  it != new_states.end(); it++)
3439  {
3440  TopSort.set_state_at_distance(*it, current_distance + 1, (dist == MaximumDistance));
3441  }
3442  current_distance++;
3443  }
3444  while (new_states_found);
3445 
3446  return TopSort.states_at_distance;
3447  }
3448 
3452  {
3453  // get topological maximum distance sort
3454  std::vector<std::set<HfstState> > states_sorted = this->topsort(MaximumDistance);
3455  // go through all sets of states in descending order
3456  for (int distance = states_sorted.size() - 1; distance >= 0; distance--)
3457  {
3458  const std::set<HfstState> & states
3459  = states_sorted.at((unsigned int)distance);
3460  // go through all states in a set
3461  for (std::set<HfstState>::const_iterator it = states.begin();
3462  it != states.end(); it++)
3463  {
3464  // if a final state is encountered, return the distance
3465  // of that state
3466  if (is_final_state(*it))
3467  {
3468  return distance;
3469  }
3470  }
3471  }
3472  // if no final states were encountered, return a negative value
3473  return -1;
3474  }
3475 
3478  std::vector<unsigned int> path_sizes()
3479  {
3480  std::vector<unsigned int> result;
3481  // get topological maximum distance sort
3482  std::vector<std::set<HfstState> > states_sorted = this->topsort(MinimumDistance);
3483  // go through all sets of states in descending order
3484  for (int distance = states_sorted.size() - 1; distance >= 0; distance--)
3485  {
3486  const std::set<HfstState> & states
3487  = states_sorted.at((unsigned int)distance);
3488  // go through all states in a set
3489  for (std::set<HfstState>::const_iterator it = states.begin();
3490  it != states.end(); it++)
3491  {
3492  // if a final state is encountered, add its distance
3493  // to result
3494  if (is_final_state(*it))
3495  {
3496  result.push_back((unsigned int)distance);
3497  break; // go to next set of states
3498  }
3499  }
3500  }
3501  return result;
3502  }
3503 
3504  bool is_infinitely_ambiguous
3505  (HfstState state,
3506  std::set<HfstState> &epsilon_path_states,
3507  std::vector<unsigned int> &states_handled)
3508  {
3509  if (states_handled[state] != 0)
3510  return false;
3511 
3512  // Go through all transitions in this state
3513  const HfstBasicTransducer::HfstTransitions &transitions
3514  = this->operator[](state);
3516  = transitions.begin();
3517  it != transitions.end(); it++)
3518  {
3519  // (Diacritics are also treated as epsilons, although it might cause false
3520  // positive results, because loops with diacritics can be invalidated by
3521  // other diacritics.)
3522  if ( is_epsilon(it->get_input_symbol()) ||
3523  FdOperation::is_diacritic(it->get_input_symbol()) )
3524  {
3525  epsilon_path_states.insert(state);
3526  if (epsilon_path_states.find(it->get_target_state())
3527  != epsilon_path_states.end())
3528  {
3529  return true;
3530  }
3531  if (is_infinitely_ambiguous
3532  (it->get_target_state(), epsilon_path_states, states_handled))
3533  {
3534  return true;
3535  }
3536  epsilon_path_states.erase(state);
3537  }
3538  }
3539  // mark state as handled
3540  states_handled[state] = 1;
3541  return false;
3542  }
3543 
3544  bool is_infinitely_ambiguous()
3545  {
3546  std::set<HfstState> epsilon_path_states;
3547  HfstState max_state = this->get_max_state();
3548  std::vector<unsigned int> states_handled(max_state+1, 0);
3549 
3550  for (unsigned int state = INITIAL_STATE; state < (max_state+1); state++)
3551  {
3552  if (is_infinitely_ambiguous(state, epsilon_path_states, states_handled))
3553  return true;
3554  }
3555  return false;
3556  }
3557 
3558  bool is_lookup_infinitely_ambiguous
3559  (const HfstOneLevelPath& s,
3560  unsigned int& index, HfstState state,
3561  std::set<HfstState> &epsilon_path_states
3562  )
3563  {
3564  // Whether the end of the lookup path s has been reached
3565  bool only_epsilons=false;
3566  if ((unsigned int)s.second.size() == index)
3567  {
3568  only_epsilons=true;
3569  }
3570 
3571  // Go through all transitions in this state
3572  const HfstBasicTransducer::HfstTransitions &transitions
3573  = this->operator[](state);
3574  for (HfstBasicTransducer::HfstTransitions::const_iterator it
3575  = transitions.begin();
3576  it != transitions.end(); it++)
3577  {
3578  // CASE 1: Input epsilons do not consume a symbol in the lookup path s,
3579  // so they can be added freely.
3580  // (Diacritics are also treated as epsilons, although it might cause false
3581  // positive results, because loops with diacritics can be invalidated by
3582  // other diacritics.)
3583  if ( is_epsilon(it->get_input_symbol()) ||
3584  FdOperation::is_diacritic(it->get_input_symbol()) )
3585  {
3586  epsilon_path_states.insert(state);
3587  if (epsilon_path_states.find(it->get_target_state())
3588  != epsilon_path_states.end())
3589  {
3590  return true;
3591  }
3592  if (is_lookup_infinitely_ambiguous
3593  (s, index, it->get_target_state(), epsilon_path_states))
3594  {
3595  return true;
3596  }
3597  epsilon_path_states.erase(state);
3598  }
3599 
3600  /* CASE 2: Other input symbols consume a symbol in the lookup path s,
3601  so they can be added only if the end of the lookup path s has not
3602  been reached. */
3603  else if (not only_epsilons)
3604  {
3605  bool continu = false;
3606  if (it->get_input_symbol().compare(s.second.at(index)) == 0)
3607  continu = true;
3608  else if ((it->get_input_symbol().compare("@_UNKNOWN_SYMBOL_@") ||
3609  it->get_input_symbol().compare("@_IDENTITY_SYMBOL_@"))
3610  &&
3611  (alphabet.find(s.second.at(index)) == alphabet.end()))
3612  {
3613  continu = true;
3614  }
3615 
3616  if (continu)
3617  {
3618  index++; // consume an input symbol in the lookup path s
3619  std::set<HfstState> empty_set;
3620  if (is_lookup_infinitely_ambiguous
3621  (s, index, it->get_target_state(), empty_set))
3622  {
3623  return true;
3624  }
3625  index--; // add the input symbol back to the lookup path s.
3626  }
3627  }
3628  }
3629  return false;
3630  }
3631 
3632  bool is_lookup_infinitely_ambiguous(const HfstOneLevelPath & s)
3633  {
3634  std::set<HfstState> epsilon_path_states;
3635  epsilon_path_states.insert(0);
3636  unsigned int index=0;
3637 
3638  return is_lookup_infinitely_ambiguous(s, index, INITIAL_STATE,
3639  epsilon_path_states);
3640  }
3641 
3642  bool is_lookup_infinitely_ambiguous(const StringVector & s)
3643  {
3644  std::set<HfstState> epsilon_path_states;
3645  epsilon_path_states.insert(0);
3646  unsigned int index=0;
3647  HfstOneLevelPath path(0, s);
3648 
3649  return is_lookup_infinitely_ambiguous(path, index, INITIAL_STATE,
3650  epsilon_path_states);
3651  }
3652 
3653 
3654 
3655  static void push_back_to_two_level_path
3656  (HfstTwoLevelPath &path,
3657  const StringPair &sp,
3658  const float &weight)
3659  {
3660  path.second.push_back(sp);
3661  path.first = path.first + weight;
3662  }
3663 
3664  static void pop_back_from_two_level_path
3665  (HfstTwoLevelPath &path,
3666  const float &weight)
3667  {
3668  path.second.pop_back();
3669  path.first = path.first - weight;
3670  }
3671 
3672  static void add_to_results
3673  (HfstTwoLevelPaths &results,
3674  HfstTwoLevelPath &path_so_far,
3675  const float &final_weight,
3676  float * max_weight)
3677  {
3678  path_so_far.first = path_so_far.first + final_weight;
3679 
3680  if (max_weight == NULL) // no weight limitation given
3681  {
3682  results.insert(path_so_far);
3683  }
3684  else if (!(path_so_far.first > *max_weight)) // weight limitation not exceeded
3685  {
3686  results.insert(path_so_far);
3687  }
3688  else // weight limitation exceeded
3689  {
3690  ;
3691  }
3692  path_so_far.first = path_so_far.first - final_weight;
3693  }
3694 
3695  static bool is_possible_transition
3696  (const HfstBasicTransition &transition,
3697  const StringVector &lookup_path,
3698  const unsigned int &lookup_index,
3699  const StringSet &alphabet,
3700  bool &input_symbol_consumed)
3701  {
3702  std::string isymbol = transition.get_input_symbol();
3703 
3704  // If we are not at the end of lookup_path,
3705  if (not (lookup_index == (unsigned int)lookup_path.size()))
3706  {
3707  // we can go further if the current symbol in lookup_path
3708  // matches to the input symbol of the transition, i.e
3709  // either the input symbol is the same as the current symbol
3710  if ( isymbol.compare(lookup_path.at(lookup_index)) == 0 ||
3711  // or the input symbol is the identity or unknown symbol and
3712  // the current symbol is not found in the alphabet
3713  // of the transducer.
3714  ( (is_identity(isymbol) || is_unknown(isymbol)) &&
3715  (alphabet.find(lookup_path.at(lookup_index))
3716  == alphabet.end()) )
3717  )
3718  {
3719  input_symbol_consumed=true;
3720  return true;
3721  }
3722  }
3723  // Whether there are more symbols in lookup_path or not,
3724  // we can always go further if the input symbol of the transition
3725  // is an epsilon or a flag diacritic.
3726  if ( is_epsilon(isymbol) ||
3727  FdOperation::is_diacritic(isymbol) )
3728  {
3729  input_symbol_consumed=false;
3730  return true;
3731  }
3732 
3733  // No matches.
3734  return false;
3735  }
3736 
3737  void lookup_fd
3738  (const StringVector &lookup_path,
3739  HfstTwoLevelPaths &results,
3740  HfstState state,
3741  unsigned int lookup_index, // an iterator instead?
3742  HfstTwoLevelPath &path_so_far,
3743  StringSet &alphabet,
3744  HfstEpsilonHandler Eh,
3745  size_t infinite_cutoff,
3746  float * max_weight = NULL)
3747  {
3748  // Check whether the number of input epsilon cycles is exceeded
3749  if (not Eh.can_continue(state)) {
3750  return;
3751  }
3752  // Check whether the maximum weight is exceeded
3753  if (max_weight != NULL && path_so_far.first > *max_weight) {
3754  return;
3755  }
3756 
3757  // If we are at the end of lookup_path,
3758  if (lookup_index == lookup_path.size())
3759  {
3760  // and if the current state is final,
3761  if (this->is_final_state(state))
3762  {
3763  // path_so_far is a valid result if max_weight is not exceeded
3764  add_to_results
3765  (results, path_so_far, this->get_final_weight(state), max_weight);
3766  }
3767  }
3768 
3769  // Whether there are more symbols in lookup_path or not,
3770  // go through all transitions in the current state.
3771  const HfstBasicTransducer::HfstTransitions &transitions
3772  = this->operator[](state);
3773  for (HfstBasicTransducer::HfstTransitions::const_iterator it
3774  = transitions.begin();
3775  it != transitions.end(); it++)
3776  {
3777  bool input_symbol_consumed=false;
3778  if ( is_possible_transition
3779  (*it, lookup_path, lookup_index, alphabet,
3780  input_symbol_consumed) )
3781  {
3782  // update path_so_far and lookup_index
3783  std::string istr;
3784  std::string ostr;
3785 
3786  // identity symbol is replaced with the lookup symbol
3787  if (is_identity(it->get_input_symbol()))
3788  {
3789  istr = lookup_path.at(lookup_index);
3790  ostr = istr;
3791  }
3792  else
3793  {
3794  if (is_unknown(it->get_input_symbol()))
3795  istr = lookup_path.at(lookup_index);
3796  else
3797  istr = it->get_input_symbol();
3798 
3799  /*if (is_unknown(it->get_output_symbol()))
3800  ostr = std::string("?");
3801  else*/
3802  ostr = it->get_output_symbol();
3803  }
3804 
3805  push_back_to_two_level_path
3806  (path_so_far,
3807  StringPair(istr, ostr),
3808  it->get_weight());
3809 
3810  HfstEpsilonHandler * Ehp = NULL;
3811  if (input_symbol_consumed) {
3812  lookup_index++;
3813  Ehp = new HfstEpsilonHandler(infinite_cutoff);
3814  }
3815  else {
3816  Eh.push_back(state);
3817  Ehp = &Eh;
3818  }
3819 
3820  // call lookup for the target state of the transition
3821  lookup_fd(lookup_path, results, it->get_target_state(),
3822  lookup_index, path_so_far, alphabet, *Ehp, infinite_cutoff, max_weight);
3823 
3824  // return to the original values of path_so_far
3825  // and lookup_index
3826  if (input_symbol_consumed) {
3827  lookup_index--;
3828  delete Ehp;
3829  }
3830  else {
3831  // Eh.pop_back(); not needed because the destructor
3832  // of Eh is automatically called next
3833  }
3834 
3835  pop_back_from_two_level_path(path_so_far, it->get_weight());
3836  }
3837  }
3838 
3839  }
3840 
3841  void lookup_fd
3842  (const StringVector &lookup_path,
3843  HfstTwoLevelPaths &results,
3844  size_t infinite_cutoff,
3845  float * max_weight = NULL)
3846  {
3847  HfstState state = 0;
3848  unsigned int lookup_index = 0;
3849  HfstTwoLevelPath path_so_far;
3850  StringSet alphabet = this->get_alphabet();
3851  HfstEpsilonHandler Eh(infinite_cutoff);
3852  lookup_fd(lookup_path, results, state, lookup_index, path_so_far,
3853  alphabet, Eh, infinite_cutoff, max_weight);
3854  }
3855 
3856 
3857 /* /\** @brief Determine whether this graph has input-epsilon cycles. */
3858 /* *\/ */
3859 /* bool has_input_epsilon_cycles(void) */
3860 /* { */
3861 /* typedef std::map<HfstState, */
3862 /* std::set<HfstTransition<C> > > */
3863 /* HfstStates; */
3864 /* HfstStates state_map; */
3865 
3866 /* std::set<HfstState> total_seen; */
3867 /* for (state_vector::iterator it = state_vector.begin(); */
3868 /* it != state_vector.end(); ++it) { */
3869 /* if (total_seen.count(*it) != 0) { */
3870 /* continue; */
3871 /* } */
3872 
3873 /* } */
3874 /* } */
3875 
3876 
3877  // --- Friends ---
3878 
3879  friend class ConversionFunctions;
3880  friend class hfst::HarmonizeUnknownAndIdentitySymbols;
3881  };
3882 
3887  typedef HfstTransitionGraph <HfstTropicalTransducerTransitionData>
3889 
3891  typedef HfstTransitionGraph <HfstFastTransitionData>
3893 
3894 
3895  }
3896 
3897 }
3898 
3899 #endif // #ifndef _HFST_TRANSITION_GRAPH_H_
C::SymbolType get_output_symbol() const
Get the output symbol of the transition.
Definition: HfstTransition.h:96
C::SymbolType get_input_symbol() const
Get the input symbol of the transition.
Definition: HfstTransition.h:91
iterator begin()
Get an iterator to the beginning of the states in the graph.
Definition: HfstTransitionGraph.h:660
HfstTransitionGraph & insert_freely(const HfstSymbolPair &symbol_pair, typename C::WeightType weight)
Insert freely any number of symbol_pair in the graph with weight weight.
Definition: HfstTransitionGraph.h:3017
int longest_path_size()
Definition: HfstTransitionGraph.h:3451
std::pair< String, String > StringPair
A symbol pair in a transition.
Definition: HfstSymbolDefs.h:71
HfstTransitionGraph< HfstFastTransitionData > HfstFastTransducer
A specialization for faster conversion.
Definition: HfstDataTypes.h:117
HfstTransitionGraph(void)
Create a graph with one initial state that has state number zero and is not a final state...
Definition: HfstTransitionGraph.h:198
std::string String
A UTF-8 symbol in a transition.
Definition: HfstSymbolDefs.h:60
HfstTransitionGraph & sort_arcs(void)
Sort the arcs of this transducer according to input and output symbols.
Definition: HfstTransitionGraph.h:643
const HfstTransitions & operator[](HfstState s) const
Get the set of transitions of state s in this graph.
Definition: HfstTransitionGraph.h:680
std::vector< std::pair< std::string, std::string > > StringPairVector
A vector of string pairs.
Definition: HfstDataTypes.h:106
HfstTransitionGraph(const HfstTransitionGraph &graph)
Create a deep copy of HfstTransitionGraph graph.
Definition: HfstTransitionGraph.h:239
void prune_alphabet(bool force=true)
Remove all symbols that do not occur in transitions of the graph from its alphabet.
Definition: HfstTransitionGraph.h:448
HfstState get_max_state() const
Get the biggest state number in use.
Definition: HfstTransitionGraph.h:547
unsigned int HfstState
The number of a state in an HfstTransitionGraph.
Definition: HfstDataTypes.h:120
A synchronous finite-state transducer.
Definition: HfstTransducer.h:227
const HfstTransitionGraphAlphabet & get_alphabet() const
Get the set of HfstSymbols in the alphabet of the graph.
Definition: HfstTransitionGraph.h:496
HfstState get_target_state() const
Get the target state of the transition.
Definition: HfstTransition.h:81
void write_in_xfst_format(std::ostream &os, bool write_weights=true)
Write the graph in xfst text format to ostream os. write_weights defines whether weights are printed ...
Definition: HfstTransitionGraph.h:893
void write_in_att_format_number(FILE *file, bool write_weights=true)
Write the graph in AT&T format to FILE file using numbers instead of symbol names. write_weights defines whether weights are printed.
Definition: HfstTransitionGraph.h:1706
State is not final (and cannot have a final weight).
Definition: HfstExceptionDefs.h:250
std::vector< unsigned int > path_sizes()
Definition: HfstTransitionGraph.h:3478
bool is_final_state(HfstState s) const
Whether state s is final. FIXME: return positive infinity instead if not final.
Definition: HfstTransitionGraph.h:620
std::set< HfstSymbol > HfstSymbolSet
A set of symbol pairs.
Definition: HfstTransitionGraph.h:138
HfstTransitionGraph(const hfst::HfstTransducer &transducer)
Create an HfstTransitionGraph equivalent to HfstTransducer transducer. FIXME: move to a separate file...
Definition: HfstTransitionGraph.h:248
void add_transition(HfstState s, const HfstTransition< C > &transition, bool add_symbols_to_alphabet=true)
Add a transition transition to state s.
Definition: HfstTransitionGraph.h:554
Class HfstTransition.
Base class for HfstExceptions. Holds its own name and the file and line number where it was thrown...
Definition: HfstExceptionDefs.h:15
A simple transition graph format that consists of states and transitions between those states...
Definition: HfstDataTypes.h:113
std::vector< HfstTransition< C > > HfstTransitions
Datatype for the states of a transition in a graph.
Definition: HfstTransitionGraph.h:128
HfstStates::const_iterator const_iterator
A const iterator type that points a state in a graph.
Definition: HfstTransitionGraph.h:179
void write_in_att_format(FILE *file, bool write_weights=true)
Write the graph in AT&T format to FILE file. write_weights defines whether weights are printed...
Definition: HfstTransitionGraph.h:1603
void add_symbol_to_alphabet(const HfstSymbol &symbol)
Explicitly add symbol to the alphabet of the graph.
Definition: HfstTransitionGraph.h:346
void write_in_att_format(std::ostream &os, bool write_weights=true)
Write the graph in AT&T format to ostream os. write_weights defines whether weights are printed...
Definition: HfstTransitionGraph.h:1556
const C & get_transition_data() const
Get the transition data of the transition.
Definition: HfstTransition.h:86
#define HFST_THROW(E)
Macro to throw an exception of type E. Use THROW instead of regular throw with subclasses of HfstExce...
Definition: HfstExceptionDefs.h:28
std::set< HfstTwoLevelPath > HfstTwoLevelPaths
A set of two-level weighted paths.
Definition: HfstDataTypes.h:110
std::string name
The name of the graph.
Definition: HfstTransitionGraph.h:182
The stream is at end.
Definition: HfstExceptionDefs.h:149
std::vector< HfstSymbolPair > HfstSymbolPairVector
A vector of symbol pairs.
Definition: HfstTransitionGraph.h:140
The StateId argument is not valid.
Definition: HfstExceptionDefs.h:304
const_iterator end() const
Get a const iterator to the end of states (last state + 1) in the graph.
Definition: HfstTransitionGraph.h:672
HfstTransitionGraph & substitute(const HfstSymbol &old_symbol, const HfstSymbol &new_symbol, bool input_side=true, bool output_side=true)
Substitute old_symbol with new_symbol in all transitions. input_side and output_side define whether t...
Definition: HfstTransitionGraph.h:2311
C::WeightType get_final_weight(HfstState s) const
Definition: HfstTransitionGraph.h:625
C::SymbolType HfstSymbol
Datatype for a symbol in a transition.
Definition: HfstTransitionGraph.h:131
void remove_transition(HfstState s, const HfstTransition< C > &transition, bool remove_symbols_from_alphabet=false)
Remove transition transition from state s. remove_symbols_from_alphabet defines whether symbols in tr...
Definition: HfstTransitionGraph.h:574
std::set< StringPair > StringPairSet
A set of symbol pairs used in substituting symbol pairs and in rule functions.
Definition: HfstSymbolDefs.h:83
void add_symbols_to_alphabet(const HfstSymbolSet &symbols)
Same as add_symbol_to_alphabet for each symbol in symbols.
Definition: HfstTransitionGraph.h:368
HfstTransitionGraph & harmonize(HfstTransitionGraph &another)
Harmonize this HfstTransitionGraph and another.
Definition: HfstTransitionGraph.h:3130
const HfstTransitions & transitions(HfstState s) const
Alternative name for operator[].
Definition: HfstTransitionGraph.h:693
std::pair< float, StringVector > HfstOneLevelPath
A path of one level of arcs with collected weight.
Definition: HfstDataTypes.h:97
Transducers are not automata.
Definition: HfstExceptionDefs.h:291
void write_in_prolog_format(std::ostream &os, const std::string &name, bool write_weights=true)
Write the graph in prolog format to ostream os. write_weights defines whether weights are printed (to...
Definition: HfstTransitionGraph.h:1056
std::pair< HfstSymbol, HfstSymbol > HfstSymbolPair
Datatype for a symbol pair in a transition.
Definition: HfstTransitionGraph.h:134
Declarations of functions for converting between transducer backend formats.
void write_in_prolog_format(FILE *file, const std::string &name, bool write_weights=true)
Write the graph in prolog format to FILE file. write_weights defines whether weights are printed (tod...
Definition: HfstTransitionGraph.h:994
std::vector< HfstState > states() const
The states of the graph.
Definition: HfstTransitionGraph.h:185
#define HFST_THROW_MESSAGE(E, M)
Macro to throw an exception of type E with message M. Use THROW instead of regular throw with subclas...
Definition: HfstExceptionDefs.h:35
std::map< String, String > HfstSymbolSubstitutions
A map of substitutions used when performing multiple symbol-to-symbol substitutions.
Definition: HfstSymbolDefs.h:89
iterator end()
Get an iterator to the end of states (last state + 1) in the graph.
Definition: HfstTransitionGraph.h:668
std::pair< float, StringPairVector > HfstTwoLevelPath
A path of two level of arcs with collected weight.
Definition: HfstDataTypes.h:108
HfstTransition< HfstTropicalTransducerTransitionData > HfstBasicTransition
An HfstTransition with transition data of type HfstTropicalTransducerTransitionData.
Definition: HfstDataTypes.h:122
C::WeightType get_weight() const
Get the weight of the transition.
Definition: HfstTransition.h:111
HfstTransitionGraph & substitute(bool(*func)(const HfstSymbolPair &sp, HfstSymbolPairSet &sps))
Substitute all transitions with a set of transitions as defined by function func. ...
Definition: HfstTransitionGraph.h:2474
const_iterator begin() const
Get a const iterator to the beginning of states in the graph.
Definition: HfstTransitionGraph.h:664
HfstState add_state(HfstState s)
Add a state s to this graph.
Definition: HfstTransitionGraph.h:538
void remove_symbol_from_alphabet(const HfstSymbol &symbol)
Remove symbol symbol from the alphabet of the graph.
Definition: HfstTransitionGraph.h:354
std::set< HfstSymbol > HfstTransitionGraphAlphabet
Datatype for the alphabet of a graph.
Definition: HfstTransitionGraph.h:142
HfstTransitionGraph & substitute(const HfstSymbolPair &sp, const HfstTransitionGraph &graph)
Substitute all transitions old_symbol : new_symbol with a copy of graph.
Definition: HfstTransitionGraph.h:2582
HfstTransitionGraph< HfstTropicalTransducerTransitionData > HfstBasicTransducer
An HfstTransitionGraph with transitions of type HfstTropicalTransducerTransitionData and weight type ...
Definition: HfstDataTypes.h:114
A transition that consists of a target state and transition data represented by class C...
Definition: HfstDataTypes.h:122
void write_in_xfst_format(FILE *file, bool write_weights=true)
Write the graph in xfst text format to FILE file. write_weights defines whether weights are printed (...
Definition: HfstTransitionGraph.h:1515
void set_final_weight(HfstState s, const typename C::WeightType &weight)
Set the final weight of state s in this graph to weight.
Definition: HfstTransitionGraph.h:635
HfstTransitionGraph & operator=(const HfstTransitionGraph &graph)
The assignment operator.
Definition: HfstTransitionGraph.h:222
std::map< StringPair, StringPair > HfstSymbolPairSubstitutions
A map of substitutions used when performing multiple symbol pair-to-symbol pair substitutions.
Definition: HfstSymbolDefs.h:95
HfstState add_state(void)
Add a new state to this graph and return its number.
Definition: HfstTransitionGraph.h:526
std::set< HfstSymbolPair > HfstSymbolPairSet
A set of symbol pairs.
Definition: HfstTransitionGraph.h:136
The stream is not in valid AT&T format.
Definition: HfstExceptionDefs.h:229