13 #ifndef _HFST_OL_CONVERT_H_
14 #define _HFST_OL_CONVERT_H_
17 #include "fst/fstlib.h"
18 #endif // HAVE_OPENFST
20 #include "transducer.h"
25 typedef std::map<hfst_ol::TransitionTableIndex,unsigned int>
26 HfstOlToBasicStateMap;
28 struct TransitionPlaceholder {
33 TransitionPlaceholder(
unsigned int t, SymbolNumber o,
float w):
40 typedef std::map<SymbolNumber, std::vector<TransitionPlaceholder> >
44 struct StatePlaceholder {
45 unsigned int state_number;
46 unsigned int start_index;
47 unsigned int first_transition;
48 SymbolTransitionsMap inputs;
51 StatePlaceholder (
unsigned int state,
bool finality,
unsigned int first,
54 start_index(UINT_MAX),
55 first_transition(first),
57 final_weight(final_weight)
60 state_number(UINT_MAX),
61 start_index(UINT_MAX),
62 first_transition(UINT_MAX),
67 bool is_simple(std::set<SymbolNumber>
const & flag_symbols)
const
69 if (state_number == 0) {
72 if (flag_symbols.size() == 0) {
73 return inputs.size() < 2;
75 bool have_zero =
false;
76 SymbolNumber input_symbols = 0;
77 for(SymbolTransitionsMap::const_iterator it = inputs.begin();
78 it != inputs.end(); ++it) {
79 if ((it->first == 0) or (flag_symbols.count(it->first) != 0)) {
87 if (input_symbols > 1) {
94 unsigned int number_of_transitions(
void)
const {
95 unsigned int count = 0;
96 for(SymbolTransitionsMap::const_iterator it = inputs.begin();
97 it != inputs.end(); ++it) {
98 count += it->second.size();
103 unsigned int symbol_offset(
104 SymbolNumber
const symbol,
105 std::set<SymbolNumber>
const & flag_symbols)
const {
109 unsigned int offset = 0;
110 if (flag_symbols.size() == 0) {
111 for(SymbolTransitionsMap::const_iterator it = inputs.begin();
112 it!= inputs.end(); ++it) {
113 if (symbol == it->first) {
116 offset += it->second.size();
120 if (inputs.count(0) != 0) {
121 offset = inputs.find(0)->second.size();
123 for(std::set<SymbolNumber>::iterator flag_it = flag_symbols.begin();
124 flag_it != flag_symbols.end(); ++flag_it) {
125 if (inputs.count(*flag_it) != 0) {
126 if (symbol == *flag_it) {
130 offset += inputs.find(*flag_it)->second.size();
133 for(SymbolTransitionsMap::const_iterator it = inputs.begin();
134 it!= inputs.end(); ++it) {
135 if (it->first == 0 || flag_symbols.count(it->first) != 0) {
138 if (symbol == it->first) {
141 offset += it->second.size();
143 std::string message(
"error in conversion between optimized lookup "
144 "format and HfstTransducer;\ntried to calculate "
145 "symbol_offset for symbol not present in state");
150 std::string message(
"error in function StatePlaceholder::symbol_offset");
155 bool compare_states_by_input_size(
156 const StatePlaceholder & lhs,
const StatePlaceholder & rhs);
157 bool compare_states_by_state_number(
158 const StatePlaceholder & lhs,
const StatePlaceholder & rhs);
160 class IndexPlaceholders:
public std::map<unsigned int,
161 std::pair<unsigned int, SymbolNumber> >
164 bool fits(StatePlaceholder
const & state,
165 std::set<SymbolNumber>
const & flag_symbols,
166 unsigned int const position)
const
168 if (count(position) != 0) {
171 for (SymbolTransitionsMap::const_iterator it = state.inputs.begin();
172 it != state.inputs.end(); ++it) {
173 SymbolNumber index_offset = it->first;
174 if (flag_symbols.count(index_offset) != 0) {
177 if (count(index_offset + position + 1) != 0) {
184 bool unsuitable(
unsigned int const index,
185 SymbolNumber
const symbols,
186 float const packing_aggression)
const
188 if (count(index) != 0) {
201 unsigned int filled = 0;
202 for (
unsigned int i = 0; i < symbols; ++i) {
203 filled += count(index + i + 1);
204 if (filled >= (packing_aggression*symbols)) {
213 void write_transitions_from_state_placeholders(
214 TransducerTable<TransitionW> & transition_table,
215 std::vector<hfst_ol::StatePlaceholder>
216 & state_placeholders,
217 std::set<SymbolNumber> & flag_symbols);
219 void add_transitions_with(SymbolNumber symbol,
220 std::vector<TransitionPlaceholder> & transitions,
221 TransducerTable<TransitionW> & transition_table,
222 std::vector<hfst_ol::StatePlaceholder>
223 & state_placeholders,
224 std::set<SymbolNumber> & flag_symbols);
226 #if HAVE_OPENFST // Covers remainder of file
227 typedef fst::StdArc::StateId StateId;
228 typedef fst::StdArc StdArc;
229 typedef fst::StdVectorFst TransduceR;
230 typedef fst::ArcIterator<TransduceR> ArcIterator;
231 typedef std::set<StateId> StateIdSet;
232 typedef std::set<int64> OfstSymbolSet;
234 const StateIdNumber NO_ID_NUMBER = UINT_MAX;
235 const StateId NO_STATE_ID = UINT_MAX;
236 const SymbolNumber BIG_STATE_LIMIT = 1;
239 struct transition_label
245 struct compare_transition_labels
247 bool operator() (
const transition_label &l1,
248 const transition_label &l2)
const
250 if (l1.input_symbol == l2.input_symbol)
251 return l1.output_symbol < l2.output_symbol;
252 return l1.input_symbol < l2.input_symbol;
256 typedef std::set<transition_label,compare_transition_labels> LabelSet;
258 enum place_holder {EMPTY, EMPTY_START, OCCUPIED_START, OCCUPIED };
259 typedef std::vector<place_holder> PlaceHolderVector;
264 class ConvertIdNumberMap
267 typedef std::map<StateIdNumber,StateId> IdNumbersToStateIds;
268 typedef std::map<StateId,StateIdNumber> StateIdsToIdNumbers;
270 IdNumbersToStateIds id_to_node;
271 StateIdsToIdNumbers node_to_id;
273 StateIdNumber node_counter;
275 void add_node(StateId n, TransduceR *tr);
281 void set_node_maps(TransduceR * t);
284 ConvertIdNumberMap(TransduceR * t):
286 { set_node_maps(t); }
288 StateIdNumber get_number_of_nodes(
void)
const
289 {
return node_counter; }
291 StateIdNumber get_node_id(StateId n)
const;
292 StateId get_id_node(StateIdNumber i)
const;
295 typedef std::map<int64,unsigned int> OfstSymbolCountMap;
296 typedef std::set<std::string> SymbolSet;
298 class ConvertTransducerAlphabet
301 SymbolTable symbol_table;
303 TransduceR* transducer;
304 fst::SymbolTable * ofst_symbol_table;
307 std::map<int64, SymbolNumber> input_symbols_map;
308 std::map<int64, SymbolNumber> output_symbols_map;
310 void inspect_node(StateId n, StateIdSet& visited_nodes,
311 OfstSymbolCountMap& symbol_count_map, SymbolSet& all_symbol_set);
312 void get_symbol_info(
313 OfstSymbolCountMap &symbol_count_map, SymbolSet& all_symbol_set);
314 void populate_symbol_table(
315 OfstSymbolCountMap &input_symbol_counts, SymbolSet& all_symbol_set);
318 ConvertTransducerAlphabet(TransduceR* t);
320 void display()
const;
322 const SymbolTable& get_symbol_table()
const {
return symbol_table;}
323 SymbolNumber lookup_ofst_input_symbol(int64 s)
const;
324 SymbolNumber lookup_ofst_output_symbol(int64 s)
const;
325 bool is_flag_diacritic(SymbolNumber symbol)
const;
327 TransducerAlphabet to_alphabet()
const;
330 class ConvertTransition
333 SymbolNumber input_symbol;
334 SymbolNumber output_symbol;
339 StateIdNumber target_state_id;
340 TransitionTableIndex target_state_index;
344 TransitionTableIndex table_index;
350 ConvertTransition(
const StdArc &a);
352 void display()
const;
354 SymbolNumber get_input_symbol(
void)
const
355 {
return input_symbol; }
357 void set_target_state_index();
359 void set_table_index(TransitionTableIndex i)
362 TransitionTableIndex get_table_index(
void)
const
363 {
return table_index; }
365 template<
class T> T to_transition()
const;
367 bool numerical_cmp(
const ConvertTransition &another_transition)
const;
368 bool operator<(
const ConvertTransition &another_index)
const;
371 class ConvertTransitionIndex
374 SymbolNumber input_symbol;
379 ConvertTransition* first_transition;
380 TransitionTableIndex first_transition_index;
383 ConvertTransitionIndex(SymbolNumber input, ConvertTransition* transition):
384 input_symbol(input), first_transition(transition) {}
386 void display()
const;
388 SymbolNumber get_input_symbol(
void)
const
389 {
return input_symbol; }
391 ConvertTransition* get_first_transition()
const
392 {
return first_transition; }
394 void set_first_transition_index(TransitionTableIndex i)
395 { first_transition_index = i; }
397 template<
class T> T to_transition_index()
const;
399 bool operator<(
const ConvertTransitionIndex &another_index)
const;
402 struct ConvertTransitionCompare
404 bool operator() (
const ConvertTransition * t1,
405 const ConvertTransition * t2)
const
407 return t1->operator<(*t2);
411 struct ConvertTransitionIndexCompare
413 bool operator() (
const ConvertTransitionIndex * i1,
414 const ConvertTransitionIndex * i2)
const
416 return i1->operator<(*i2);
420 typedef std::set<ConvertTransition*,ConvertTransitionCompare>
421 ConvertTransitionSet;
422 typedef std::set<ConvertTransitionIndex*,ConvertTransitionIndexCompare>
423 ConvertTransitionIndexSet;
425 class ConvertFstState
428 ConvertTransitionSet transitions;
429 ConvertTransitionIndexSet transition_indices;
433 TransitionTableIndex first_transition_index;
439 TransitionTableIndex table_index;
446 void set_transitions(StateId n, TransduceR * tr);
447 void set_transition_indices(
void);
449 friend class fst_state_compare;
451 ConvertFstState(StateId n, TransduceR * tr);
454 void display()
const;
456 TransitionTableIndex set_transition_table_indices(
457 TransitionTableIndex place);
459 TransitionTableIndex get_first_transition_index()
const
460 {
return first_transition_index; }
462 void set_table_index(TransitionTableIndex i)
464 TransitionTableIndex get_table_index(
void)
const
465 {
return table_index; }
467 SymbolNumberSet * get_input_symbols(
void)
const;
469 SymbolNumber number_of_input_symbols(
void)
const
470 {
return transition_indices.size(); }
471 SymbolNumber number_of_transitions(
void)
const
472 {
return transitions.size(); }
473 bool is_final(
void)
const {
return final;}
474 bool is_big_state(
void)
const
476 return (transition_indices.size() > BIG_STATE_LIMIT);
478 bool is_start_state(
void)
const {
return id == 0;}
479 StateIdNumber get_id(
void)
const {
return id;}
482 void set_transition_target_indices();
486 void insert_transition_indices(TransducerTable<T>& index_table)
const;
489 TransitionTableIndex append_transitions(TransducerTable<T>& transition_table,
490 TransitionTableIndex place)
const;
493 typedef std::vector<ConvertFstState*> ConvertFstStateVector;
495 class ConvertTransitionTableIndices
498 PlaceHolderVector indices;
499 PlaceHolderVector::size_type lower_bound;
500 unsigned int lower_bound_test_count;
501 SymbolNumber number_of_input_symbols;
503 bool state_fits(SymbolNumberSet * input_symbols,
505 PlaceHolderVector::size_type index);
507 void insert_state(SymbolNumberSet * input_symbols,
509 PlaceHolderVector::size_type index);
510 void get_more_space(
void);
512 ConvertTransitionTableIndices(SymbolNumber input_symbol_count):
513 lower_bound(0), lower_bound_test_count(0),
514 number_of_input_symbols(input_symbol_count)
519 PlaceHolderVector::size_type add_state(ConvertFstState * state);
520 PlaceHolderVector::size_type size(
void)
const
521 {
return indices.size(); }
523 PlaceHolderVector::size_type last_full_index(
void)
const;
526 class ConvertTransducerHeader
529 static void full_traversal(TransducerHeader& h, TransduceR* tr, StateId n,
530 StateIdSet& visited_nodes, StateIdSet& nodes_in_path,
531 OfstSymbolSet& all_input_symbols);
532 static void find_input_epsilon_cycles(StateId n, StateId t,
533 StateIdSet &epsilon_targets,
534 bool unweighted_only, TransduceR * tr,
535 TransducerHeader& h);
537 static void compute_header(TransducerHeader& header,
538 TransduceR * t, SymbolNumber symbol_count,
539 TransitionTableIndex number_of_index_table_entries,
540 TransitionTableIndex number_of_target_table_entries,
544 class ConvertTransducer
548 ConvertIdNumberMap * id_number_map;
549 ConvertTransitionTableIndices * fst_indices;
550 PlaceHolderVector::size_type index_table_size;
552 TransducerHeader header;
553 ConvertTransducerAlphabet alphabet;
554 ConvertFstStateVector states;
556 void read_nodes(
void);
557 void set_transition_table_indices(
void);
558 void set_index_table_indices(
void);
560 void add_input_symbols(StateId n,
561 SymbolNumberSet &input_symbols,
562 StateIdSet &visited_nodes);
563 SymbolNumber number_of_input_symbols(
void);
565 TransitionTableIndex count_transitions(
void)
const;
567 void display_states()
const;
568 void display_tables()
const;
570 template<
class T> TransducerTable<T> make_index_table(
571 TransitionTableIndex index_table_size)
const;
572 template<
class T> TransducerTable<T> make_transition_table()
const;
574 ConvertTransducer(TransduceR * tr,
bool weighted);
575 ~ConvertTransducer();
577 ConvertIdNumberMap& get_id_number_map() {
return *id_number_map;}
578 ConvertTransducerAlphabet& get_alphabet() {
return alphabet;}
579 ConvertFstState& get_state(StateIdNumber s) {
return *states[s];}
580 bool is_weighted()
const {
return header.probe_flag(Weighted);}
582 Transducer* to_transducer()
const;
585 static ConvertTransducer* constructing_transducer;
588 #endif // HAVE_OPENFST
592 #endif // include guard
An error happened probably due to a bug in the HFST code.
Definition: HfstExceptionDefs.h:378
#define HFST_THROW_MESSAGE(E, M)
Macro to throw an exception of type E with message M. Use THROW instead of regular throw with subclas...
Definition: HfstExceptionDefs.h:35