13 #ifndef _HFST_OL_CONVERT_H_ 
   14 #define _HFST_OL_CONVERT_H_ 
   17 #include "fst/fstlib.h" 
   18 #endif // HAVE_OPENFST 
   20 #include "transducer.h" 
   25     typedef std::map<hfst_ol::TransitionTableIndex,unsigned int>
 
   26     HfstOlToBasicStateMap;
 
   28 struct TransitionPlaceholder {
 
   33 TransitionPlaceholder(
unsigned int t, SymbolNumber o, 
float w):
 
   40 typedef std::map<SymbolNumber, std::vector<TransitionPlaceholder> >
 
   44 struct StatePlaceholder {
 
   45     unsigned int state_number;
 
   46     unsigned int start_index;
 
   47     unsigned int first_transition;
 
   48     SymbolTransitionsMap inputs;
 
   51     StatePlaceholder (
unsigned int state, 
bool finality, 
unsigned int first,
 
   54     start_index(UINT_MAX),
 
   55     first_transition(first),
 
   57     final_weight(final_weight)
 
   60     state_number(UINT_MAX),
 
   61     start_index(UINT_MAX),
 
   62     first_transition(UINT_MAX),
 
   67     bool is_simple(std::set<SymbolNumber> 
const & flag_symbols)
 const 
   69         if (state_number == 0) {
 
   72         if (flag_symbols.size() == 0) {
 
   73         return inputs.size() < 2;
 
   75         bool have_zero = 
false;
 
   76         SymbolNumber input_symbols = 0;
 
   77         for(SymbolTransitionsMap::const_iterator it = inputs.begin();
 
   78         it != inputs.end(); ++it) {
 
   79         if ((it->first == 0) or (flag_symbols.count(it->first) != 0)) {
 
   87         if (input_symbols > 1) {
 
   94     unsigned int number_of_transitions(
void)
 const {
 
   95     unsigned int count = 0;
 
   96     for(SymbolTransitionsMap::const_iterator it = inputs.begin();
 
   97         it != inputs.end(); ++it) {
 
   98         count += it->second.size();
 
  103     unsigned int symbol_offset(
 
  104         SymbolNumber 
const symbol,
 
  105         std::set<SymbolNumber> 
const & flag_symbols)
 const {
 
  109         unsigned int offset = 0;
 
  110         if (flag_symbols.size() == 0) {
 
  111             for(SymbolTransitionsMap::const_iterator it = inputs.begin();
 
  112                 it!= inputs.end(); ++it) {
 
  113                 if (symbol == it->first) {
 
  116                 offset += it->second.size();
 
  120             if (inputs.count(0) != 0) { 
 
  121                 offset = inputs.find(0)->second.size();
 
  123             for(std::set<SymbolNumber>::iterator flag_it = flag_symbols.begin();
 
  124                 flag_it != flag_symbols.end(); ++flag_it) {
 
  125                 if (inputs.count(*flag_it) != 0) { 
 
  126                     if (symbol == *flag_it) {
 
  130                     offset += inputs.find(*flag_it)->second.size();
 
  133             for(SymbolTransitionsMap::const_iterator it = inputs.begin();
 
  134                 it!= inputs.end(); ++it) {
 
  135                 if (it->first == 0 || flag_symbols.count(it->first) != 0) {
 
  138                 if (symbol == it->first) {
 
  141                 offset += it->second.size();
 
  143             std::string message(
"error in conversion between optimized lookup " 
  144                                 "format and HfstTransducer;\ntried to calculate " 
  145                                 "symbol_offset for symbol not present in state");
 
  150         std::string message(
"error in function StatePlaceholder::symbol_offset");
 
  155 bool compare_states_by_input_size(
 
  156     const StatePlaceholder & lhs, 
const StatePlaceholder & rhs);
 
  157 bool compare_states_by_state_number(
 
  158     const StatePlaceholder & lhs, 
const StatePlaceholder & rhs);
 
  160 class IndexPlaceholders: 
public std::map<unsigned int,
 
  161         std::pair<unsigned int, SymbolNumber> >
 
  164     bool fits(StatePlaceholder 
const & state,
 
  165               std::set<SymbolNumber> 
const & flag_symbols,
 
  166               unsigned int const position)
 const 
  168     if (count(position) != 0) {
 
  171     for (SymbolTransitionsMap::const_iterator it = state.inputs.begin();
 
  172          it != state.inputs.end(); ++it) {
 
  173         SymbolNumber index_offset = it->first;
 
  174         if (flag_symbols.count(index_offset) != 0) {
 
  177         if (count(index_offset + position + 1) != 0) {
 
  184     bool unsuitable(
unsigned int const index,
 
  185                     SymbolNumber 
const symbols,
 
  186                     float const packing_aggression)
 const 
  188     if (count(index) != 0) {
 
  201     unsigned int filled = 0;
 
  202     for (
unsigned int i = 0; i < symbols; ++i) {
 
  203         filled += count(index + i + 1);
 
  204         if (filled >= (packing_aggression*symbols)) {
 
  213 void write_transitions_from_state_placeholders(
 
  214     TransducerTable<TransitionW> & transition_table,
 
  215     std::vector<hfst_ol::StatePlaceholder>
 
  216     & state_placeholders,
 
  217     std::set<SymbolNumber> & flag_symbols);
 
  219 void add_transitions_with(SymbolNumber symbol,
 
  220               std::vector<TransitionPlaceholder> & transitions,
 
  221               TransducerTable<TransitionW> & transition_table,
 
  222               std::vector<hfst_ol::StatePlaceholder>
 
  223               & state_placeholders,
 
  224               std::set<SymbolNumber> & flag_symbols);
 
  226 #if HAVE_OPENFST // Covers remainder of file 
  227 typedef fst::StdArc::StateId StateId;
 
  228 typedef fst::StdArc StdArc;
 
  229 typedef fst::StdVectorFst TransduceR;
 
  230 typedef fst::ArcIterator<TransduceR> ArcIterator;
 
  231 typedef std::set<StateId> StateIdSet;
 
  232 typedef std::set<int64> OfstSymbolSet;
 
  234 const StateIdNumber NO_ID_NUMBER = UINT_MAX;
 
  235 const StateId NO_STATE_ID = UINT_MAX;
 
  236 const SymbolNumber BIG_STATE_LIMIT = 1;
 
  239 struct transition_label
 
  245 struct compare_transition_labels 
 
  247   bool operator() ( 
const transition_label &l1,
 
  248             const transition_label &l2)
 const 
  250     if (l1.input_symbol == l2.input_symbol)
 
  251         return l1.output_symbol < l2.output_symbol;
 
  252     return l1.input_symbol < l2.input_symbol;
 
  256 typedef std::set<transition_label,compare_transition_labels> LabelSet;
 
  258 enum place_holder {EMPTY, EMPTY_START, OCCUPIED_START, OCCUPIED };
 
  259 typedef std::vector<place_holder> PlaceHolderVector;
 
  264 class ConvertIdNumberMap
 
  267   typedef std::map<StateIdNumber,StateId> IdNumbersToStateIds;
 
  268   typedef std::map<StateId,StateIdNumber> StateIdsToIdNumbers;
 
  270   IdNumbersToStateIds id_to_node;
 
  271   StateIdsToIdNumbers node_to_id;
 
  273   StateIdNumber node_counter;
 
  275   void add_node(StateId n, TransduceR *tr);
 
  281   void set_node_maps(TransduceR * t);
 
  284   ConvertIdNumberMap(TransduceR * t):
 
  286   { set_node_maps(t); }
 
  288   StateIdNumber get_number_of_nodes(
void)
 const 
  289   { 
return node_counter; }
 
  291   StateIdNumber get_node_id(StateId n) 
const;
 
  292   StateId get_id_node(StateIdNumber i) 
const;
 
  295 typedef std::map<int64,unsigned int> OfstSymbolCountMap;
 
  296 typedef std::set<std::string> SymbolSet;
 
  298 class ConvertTransducerAlphabet
 
  301   SymbolTable symbol_table;
 
  303   TransduceR* transducer;
 
  304   fst::SymbolTable * ofst_symbol_table;
 
  307   std::map<int64, SymbolNumber> input_symbols_map;
 
  308   std::map<int64, SymbolNumber> output_symbols_map;
 
  310   void inspect_node(StateId n, StateIdSet& visited_nodes,
 
  311     OfstSymbolCountMap& symbol_count_map, SymbolSet& all_symbol_set);
 
  312   void get_symbol_info(
 
  313       OfstSymbolCountMap &symbol_count_map, SymbolSet& all_symbol_set);
 
  314   void populate_symbol_table(
 
  315       OfstSymbolCountMap &input_symbol_counts, SymbolSet& all_symbol_set);
 
  318   ConvertTransducerAlphabet(TransduceR* t);
 
  320   void display() 
const;
 
  322   const SymbolTable& get_symbol_table()
 const {
return symbol_table;}
 
  323   SymbolNumber lookup_ofst_input_symbol(int64 s) 
const;
 
  324   SymbolNumber lookup_ofst_output_symbol(int64 s) 
const;
 
  325   bool is_flag_diacritic(SymbolNumber symbol) 
const;
 
  327   TransducerAlphabet to_alphabet() 
const;
 
  330 class ConvertTransition
 
  333   SymbolNumber input_symbol;
 
  334   SymbolNumber output_symbol;
 
  339     StateIdNumber target_state_id;
 
  340     TransitionTableIndex target_state_index;
 
  344   TransitionTableIndex table_index; 
 
  350   ConvertTransition(
const StdArc &a);
 
  352   void display() 
const;
 
  354   SymbolNumber get_input_symbol(
void)
 const 
  355   { 
return input_symbol; }
 
  357   void set_target_state_index();
 
  359   void set_table_index(TransitionTableIndex i)
 
  362   TransitionTableIndex get_table_index(
void)
 const 
  363   { 
return table_index; }
 
  365   template<
class T> T to_transition() 
const;
 
  367   bool numerical_cmp( 
const ConvertTransition &another_transition) 
const;
 
  368   bool operator<(
const ConvertTransition &another_index) 
const;
 
  371 class ConvertTransitionIndex
 
  374   SymbolNumber input_symbol;
 
  379     ConvertTransition* first_transition;
 
  380     TransitionTableIndex first_transition_index;
 
  383   ConvertTransitionIndex(SymbolNumber input, ConvertTransition* transition):
 
  384     input_symbol(input), first_transition(transition) {}
 
  386   void display() 
const;
 
  388   SymbolNumber get_input_symbol(
void)
 const 
  389   { 
return input_symbol; }
 
  391   ConvertTransition* get_first_transition()
 const 
  392   { 
return first_transition; }
 
  394   void set_first_transition_index(TransitionTableIndex i)
 
  395   { first_transition_index = i; }
 
  397   template<
class T> T to_transition_index() 
const;
 
  399   bool operator<(
const ConvertTransitionIndex &another_index) 
const;
 
  402 struct ConvertTransitionCompare
 
  404   bool operator() (
const ConvertTransition * t1,
 
  405            const ConvertTransition * t2)
 const  
  407     return t1->operator<(*t2); 
 
  411 struct ConvertTransitionIndexCompare
 
  413   bool operator() (
const ConvertTransitionIndex * i1,
 
  414            const ConvertTransitionIndex * i2)
 const  
  416     return i1->operator<(*i2); 
 
  420 typedef std::set<ConvertTransition*,ConvertTransitionCompare>
 
  421     ConvertTransitionSet;
 
  422 typedef std::set<ConvertTransitionIndex*,ConvertTransitionIndexCompare>
 
  423     ConvertTransitionIndexSet;
 
  425 class ConvertFstState
 
  428   ConvertTransitionSet transitions;
 
  429   ConvertTransitionIndexSet transition_indices;
 
  433   TransitionTableIndex first_transition_index;
 
  439   TransitionTableIndex table_index;
 
  446   void set_transitions(StateId n, TransduceR * tr);
 
  447   void set_transition_indices(
void);
 
  449   friend class fst_state_compare;
 
  451   ConvertFstState(StateId n, TransduceR * tr);
 
  454   void display() 
const;
 
  456   TransitionTableIndex set_transition_table_indices(
 
  457       TransitionTableIndex place);
 
  459   TransitionTableIndex get_first_transition_index()
 const 
  460   { 
return first_transition_index; }
 
  462   void set_table_index(TransitionTableIndex i)
 
  464   TransitionTableIndex get_table_index(
void)
 const 
  465   { 
return table_index; }
 
  467   SymbolNumberSet * get_input_symbols(
void) 
const;
 
  469   SymbolNumber number_of_input_symbols(
void)
 const 
  470   { 
return transition_indices.size(); }
 
  471   SymbolNumber number_of_transitions(
void)
 const 
  472   { 
return transitions.size(); }
 
  473   bool is_final(
void)
 const {
return final;}
 
  474   bool is_big_state(
void)
 const 
  476       return (transition_indices.size() > BIG_STATE_LIMIT);
 
  478   bool is_start_state(
void)
 const {
return id == 0;}
 
  479   StateIdNumber get_id(
void)
 const {
return id;}
 
  482   void set_transition_target_indices();
 
  486   void insert_transition_indices(TransducerTable<T>& index_table) 
const;
 
  489   TransitionTableIndex append_transitions(TransducerTable<T>& transition_table, 
 
  490                                           TransitionTableIndex place) 
const;
 
  493  typedef std::vector<ConvertFstState*> ConvertFstStateVector;
 
  495 class ConvertTransitionTableIndices 
 
  498   PlaceHolderVector indices;
 
  499   PlaceHolderVector::size_type lower_bound;
 
  500   unsigned int lower_bound_test_count;
 
  501   SymbolNumber number_of_input_symbols;
 
  503   bool state_fits(SymbolNumberSet * input_symbols,
 
  505           PlaceHolderVector::size_type index);
 
  507   void insert_state(SymbolNumberSet * input_symbols,
 
  509             PlaceHolderVector::size_type index);
 
  510   void get_more_space(
void);
 
  512   ConvertTransitionTableIndices(SymbolNumber input_symbol_count):
 
  513     lower_bound(0), lower_bound_test_count(0),
 
  514     number_of_input_symbols(input_symbol_count)
 
  519   PlaceHolderVector::size_type add_state(ConvertFstState * state);
 
  520   PlaceHolderVector::size_type size(
void)
 const 
  521   { 
return indices.size(); }
 
  523   PlaceHolderVector::size_type last_full_index(
void) 
const;
 
  526 class ConvertTransducerHeader
 
  529   static void full_traversal(TransducerHeader& h, TransduceR* tr, StateId n,
 
  530          StateIdSet& visited_nodes, StateIdSet& nodes_in_path,
 
  531                  OfstSymbolSet& all_input_symbols);
 
  532   static void find_input_epsilon_cycles(StateId n, StateId t,
 
  533                     StateIdSet &epsilon_targets,
 
  534                     bool unweighted_only, TransduceR * tr,
 
  535                     TransducerHeader& h);
 
  537   static void compute_header(TransducerHeader& header,
 
  538       TransduceR * t, SymbolNumber symbol_count,
 
  539         TransitionTableIndex number_of_index_table_entries,
 
  540         TransitionTableIndex number_of_target_table_entries,
 
  544 class ConvertTransducer
 
  548   ConvertIdNumberMap * id_number_map;
 
  549   ConvertTransitionTableIndices * fst_indices;
 
  550   PlaceHolderVector::size_type index_table_size;
 
  552   TransducerHeader header;
 
  553   ConvertTransducerAlphabet alphabet;
 
  554   ConvertFstStateVector states;
 
  556   void read_nodes(
void);
 
  557   void set_transition_table_indices(
void);
 
  558   void set_index_table_indices(
void);
 
  560   void add_input_symbols(StateId n,
 
  561              SymbolNumberSet &input_symbols,
 
  562              StateIdSet &visited_nodes);
 
  563   SymbolNumber number_of_input_symbols(
void);
 
  565   TransitionTableIndex count_transitions(
void) 
const;
 
  567   void display_states() 
const;
 
  568   void display_tables() 
const;
 
  570   template<
class T> TransducerTable<T> make_index_table(
 
  571       TransitionTableIndex index_table_size) 
const;
 
  572   template<
class T> TransducerTable<T> make_transition_table() 
const;
 
  574   ConvertTransducer(TransduceR * tr, 
bool weighted);
 
  575   ~ConvertTransducer();
 
  577   ConvertIdNumberMap& get_id_number_map() {
return *id_number_map;}
 
  578   ConvertTransducerAlphabet& get_alphabet() {
return alphabet;}
 
  579   ConvertFstState& get_state(StateIdNumber s) {
return *states[s];}
 
  580   bool is_weighted()
 const {
return header.probe_flag(Weighted);}
 
  582   Transducer* to_transducer() 
const;
 
  585   static ConvertTransducer* constructing_transducer; 
 
  588 #endif // HAVE_OPENFST 
  592 #endif // include guard 
An error happened probably due to a bug in the HFST code. 
Definition: HfstExceptionDefs.h:378
 
#define HFST_THROW_MESSAGE(E, M)
Macro to throw an exception of type E with message M. Use THROW instead of regular throw with subclas...
Definition: HfstExceptionDefs.h:35