14 #ifndef _HFST_OL_TRANSDUCER_TRANSDUCER_H_
15 #define _HFST_OL_TRANSDUCER_TRANSDUCER_H_
30 #include "../../HfstExceptionDefs.h"
31 #include "../../HfstFlagDiacritics.h"
32 #include "../../HfstSymbolDefs.h"
36 using hfst::FdOperation;
43 typedef unsigned short SymbolNumber;
44 typedef unsigned int TransitionTableIndex;
45 typedef unsigned int TransitionNumber;
46 typedef unsigned int StateIdNumber;
47 typedef short ValueNumber;
49 typedef std::set<SymbolNumber> SymbolNumberSet;
50 typedef std::vector<SymbolNumber> SymbolNumberVector;
51 typedef std::set<TransitionTableIndex> TransitionTableIndexSet;
52 typedef std::vector<std::string> SymbolTable;
57 typedef std::vector<std::string> StringVector;
60 typedef std::vector<short> FlagDiacriticState;
61 typedef std::map<SymbolNumber, hfst::FdOperation> OperationMap;
62 typedef std::map<std::string, SymbolNumber> StringSymbolMap;
68 TransitionTableIndex index;
69 FlagDiacriticState flags;
70 TraversalState(TransitionTableIndex i, FlagDiacriticState f):
72 bool operator==(TraversalState & rhs);
74 typedef std::vector<std::vector<TraversalState> > PositionStates;
76 const SymbolNumber NO_SYMBOL_NUMBER = std::numeric_limits<SymbolNumber>::max();
77 const TransitionTableIndex NO_TABLE_INDEX =
78 std::numeric_limits<TransitionTableIndex>::max();
79 const Weight INFINITE_WEIGHT =
static_cast<float>(NO_TABLE_INDEX);
81 enum HeaderFlag {Weighted, Deterministic, Input_deterministic, Minimized,
82 Cyclic, Has_epsilon_epsilon_transitions,
83 Has_input_epsilon_transitions, Has_input_epsilon_cycles,
84 Has_unweighted_input_epsilon_cycles};
88 const TransitionTableIndex TRANSITION_TARGET_TABLE_START = 2147483648u;
89 const unsigned int MAX_IO_LEN = 10000;
90 const unsigned int MAX_RECURSION_DEPTH = 5000;
95 bool should_ascii_tokenize(
unsigned char c);
97 inline bool indexes_transition_table(
const TransitionTableIndex i)
99 return i >= TRANSITION_TARGET_TABLE_START;
101 inline bool indexes_transition_index_table(
const TransitionTableIndex i)
103 return i < TRANSITION_TARGET_TABLE_START;
106 class TransducerHeader
109 SymbolNumber number_of_input_symbols;
110 SymbolNumber number_of_symbols;
111 TransitionTableIndex size_of_transition_index_table;
112 TransitionTableIndex size_of_transition_target_table;
114 StateIdNumber number_of_states;
115 TransitionNumber number_of_transitions;
119 bool input_deterministic;
122 bool has_epsilon_epsilon_transitions;
123 bool has_input_epsilon_transitions;
124 bool has_input_epsilon_cycles;
125 bool has_unweighted_input_epsilon_cycles;
127 static void header_error()
133 static T read_property(std::istream& is)
136 is.read(reinterpret_cast<char*>(&p),
sizeof(T));
140 static void write_property(T prop, std::ostream& os)
141 { os.write(reinterpret_cast<const char*>(&prop),
sizeof(prop)); }
142 static bool read_bool_property(std::istream& is)
145 is.read(reinterpret_cast<char*>(&prop),
sizeof(
unsigned int));
153 static void write_bool_property(
bool value, std::ostream& os)
155 unsigned int prop = (value?1:0);
156 os.write(reinterpret_cast<char*>(&prop),
sizeof(prop));
159 TransducerHeader(
bool weights):
160 number_of_input_symbols(0),
161 number_of_symbols(1),
162 size_of_transition_index_table(1),
163 size_of_transition_target_table(0),
165 number_of_transitions(0),
168 input_deterministic(true),
171 has_epsilon_epsilon_transitions(false),
172 has_input_epsilon_transitions(false),
173 has_input_epsilon_cycles(false),
174 has_unweighted_input_epsilon_cycles(false)
180 SymbolNumber input_symbols,
181 SymbolNumber symbols,
182 TransitionTableIndex transition_index_table,
183 TransitionTableIndex transition_table,
185 number_of_input_symbols(input_symbols),
186 number_of_symbols(symbols),
187 size_of_transition_index_table(transition_index_table),
188 size_of_transition_target_table(transition_table),
190 number_of_transitions(0),
193 input_deterministic(true),
196 has_epsilon_epsilon_transitions(false),
197 has_input_epsilon_transitions(false),
198 has_input_epsilon_cycles(false),
199 has_unweighted_input_epsilon_cycles(false)
203 TransducerHeader(std::istream& is):
204 number_of_input_symbols(read_property<SymbolNumber>(is)),
205 number_of_symbols(read_property<SymbolNumber>(is)),
206 size_of_transition_index_table(
207 read_property<TransitionTableIndex>(is)),
208 size_of_transition_target_table(
209 read_property<TransitionTableIndex>(is)),
210 number_of_states(read_property<StateIdNumber>(is)),
211 number_of_transitions(read_property<TransitionNumber>(is)),
212 weighted(read_bool_property(is)),
213 deterministic(read_bool_property(is)),
214 input_deterministic(read_bool_property(is)),
215 minimized(read_bool_property(is)),
216 cyclic(read_bool_property(is)),
217 has_epsilon_epsilon_transitions(read_bool_property(is)),
218 has_input_epsilon_transitions(read_bool_property(is)),
219 has_input_epsilon_cycles(read_bool_property(is)),
220 has_unweighted_input_epsilon_cycles(read_bool_property(is))
227 SymbolNumber symbol_count(
void)
const {
return number_of_symbols; }
228 SymbolNumber input_symbol_count(
void)
const {
229 return number_of_input_symbols;
231 void increment_symbol_count(
void)
232 {++number_of_symbols; ++number_of_input_symbols;}
234 TransitionTableIndex index_table_size(
void)
const
235 {
return size_of_transition_index_table; }
236 TransitionTableIndex target_table_size(
void)
const
237 {
return size_of_transition_target_table; }
239 bool probe_flag(HeaderFlag flag)
const
245 return deterministic;
246 case Input_deterministic:
247 return input_deterministic;
252 case Has_epsilon_epsilon_transitions:
253 return has_epsilon_epsilon_transitions;
254 case Has_input_epsilon_transitions:
255 return has_input_epsilon_transitions;
256 case Has_input_epsilon_cycles:
257 return has_input_epsilon_cycles;
258 case Has_unweighted_input_epsilon_cycles:
259 return has_unweighted_input_epsilon_cycles;
264 void set_flag(HeaderFlag flag,
bool value)
271 deterministic =
true;
273 case Input_deterministic:
274 input_deterministic =
true;
282 case Has_epsilon_epsilon_transitions:
283 has_epsilon_epsilon_transitions =
true;
285 case Has_input_epsilon_transitions:
286 has_input_epsilon_transitions =
true;
288 case Has_input_epsilon_cycles:
289 has_input_epsilon_cycles =
true;
291 case Has_unweighted_input_epsilon_cycles:
292 has_unweighted_input_epsilon_cycles =
true;
300 std::cout <<
"Transducer properties:" << std::endl
301 <<
" number_of_symbols: "
302 << number_of_symbols << std::endl
303 <<
" number_of_input_symbols: "
304 << number_of_input_symbols << std::endl
305 <<
" size_of_transition_index_table: "
306 << size_of_transition_index_table << std::endl
307 <<
" size_of_transition_target_table: "
308 << size_of_transition_target_table << std::endl
309 <<
" number_of_states: "
310 << number_of_states << std::endl
311 <<
" number_of_transitions: "
312 << number_of_transitions << std::endl
314 << weighted << std::endl
315 <<
" deterministic: "
316 << deterministic << std::endl
317 <<
" input_deterministic: "
318 << input_deterministic << std::endl
320 << minimized << std::endl
322 << cyclic << std::endl
323 <<
" has_epsilon_epsilon_transitions: "
324 << has_epsilon_epsilon_transitions << std::endl
325 <<
" has_input_epsilon_transitions: "
326 << has_input_epsilon_transitions << std::endl
327 <<
" has_input_epsilon_cycles: "
328 << has_input_epsilon_cycles << std::endl
329 <<
" has_unweighted_input_epsilon_cycles: "
330 << has_unweighted_input_epsilon_cycles << std::endl;
333 void write(std::ostream& os)
const
335 write_property(number_of_input_symbols, os);
336 write_property(number_of_symbols, os);
337 write_property(size_of_transition_index_table, os);
338 write_property(size_of_transition_target_table, os);
339 write_property(number_of_states, os);
340 write_property(number_of_transitions, os);
341 write_bool_property(weighted, os);
342 write_bool_property(deterministic, os);
343 write_bool_property(input_deterministic, os);
344 write_bool_property(minimized, os);
345 write_bool_property(cyclic, os);
346 write_bool_property(has_epsilon_epsilon_transitions, os);
347 write_bool_property(has_input_epsilon_transitions, os);
348 write_bool_property(has_input_epsilon_cycles, os);
349 write_bool_property(has_unweighted_input_epsilon_cycles, os);
352 friend class ConvertTransducerHeader;
355 class TransducerAlphabet
358 SymbolTable symbol_table;
359 hfst::FdTable<SymbolNumber> fd_table;
360 SymbolNumber unknown_symbol;
361 SymbolNumber default_symbol;
362 SymbolNumber identity_symbol;
368 symbol_table.push_back(
"@_EPSILON_SYMBOL_@");
369 unknown_symbol = NO_SYMBOL_NUMBER;
370 default_symbol = NO_SYMBOL_NUMBER;
371 identity_symbol = NO_SYMBOL_NUMBER;
373 TransducerAlphabet(std::istream& is, SymbolNumber symbol_count);
374 TransducerAlphabet(
const SymbolTable& st);
376 void display()
const;
378 void write(std::ostream& os)
const
380 for(SymbolTable::const_iterator i = symbol_table.begin();
381 i != symbol_table.end(); i++)
388 bool has_flag_diacritics()
const
389 {
return fd_table.num_features() > 0; }
390 bool is_flag_diacritic(SymbolNumber symbol)
const
391 {
return fd_table.is_diacritic(symbol); }
392 bool is_like_epsilon(SymbolNumber symbol)
const;
394 const SymbolTable& get_symbol_table()
const
395 {
return symbol_table; }
397 const std::string string_from_symbol(
const SymbolNumber symbol)
const
399 {
return (symbol == 0) ?
"" : symbol_table[symbol]; }
401 SymbolNumber symbol_from_string(
const std::string symbol_string)
const;
402 StringSymbolMap build_string_symbol_map(
void)
const;
403 const hfst::FdTable<SymbolNumber>& get_fd_table()
const
405 const hfst::FdOperation * get_operation(SymbolNumber symbol)
const
407 return fd_table.get_operation(symbol);
409 SymbolNumber get_unknown_symbol(
void)
const
410 {
return unknown_symbol; }
411 SymbolNumber get_default_symbol(
void)
const
412 {
return default_symbol; }
413 SymbolNumber get_identity_symbol(
void)
const
414 {
return identity_symbol; }
415 void add_symbol(
char * symbol);
419 class TransitionIndex
422 SymbolNumber input_symbol;
423 TransitionTableIndex first_transition_index;
425 static const size_t size =
426 sizeof(SymbolNumber) +
sizeof(TransitionTableIndex);
427 TransitionIndex(): input_symbol(NO_SYMBOL_NUMBER),
428 first_transition_index(NO_TABLE_INDEX) {}
429 TransitionIndex(SymbolNumber input,
430 TransitionTableIndex first_transition):
431 input_symbol(input), first_transition_index(first_transition) {}
433 TransitionIndex(std::istream& is):
434 input_symbol(NO_SYMBOL_NUMBER), first_transition_index(0)
436 is.read(reinterpret_cast<char*>(&input_symbol),
437 sizeof(SymbolNumber));
438 is.read(reinterpret_cast<char*>(&first_transition_index),
439 sizeof(TransitionTableIndex));
442 TransitionIndex(
char * p):
443 input_symbol(*((SymbolNumber*) p)),
444 first_transition_index((*(TransitionTableIndex*)
445 (p + sizeof(SymbolNumber)))) {}
446 virtual ~TransitionIndex() {}
448 void write(std::ostream& os,
bool weighted)
const
450 os.write(reinterpret_cast<const char*>(&input_symbol),
451 sizeof(input_symbol));
452 if(!weighted and input_symbol == NO_SYMBOL_NUMBER and
453 first_transition_index != NO_TABLE_INDEX) {
455 unsigned int unweighted_final_index = 1;
456 os.write(reinterpret_cast<const char*>(&unweighted_final_index),
457 sizeof(first_transition_index));
459 os.write(reinterpret_cast<const char*>(
460 &first_transition_index),
461 sizeof(first_transition_index));
465 void display()
const;
467 TransitionTableIndex get_target(
void)
const
468 {
return first_transition_index; }
469 SymbolNumber get_input_symbol(
void)
const
470 {
return input_symbol; }
472 bool matches(
const SymbolNumber s)
const;
473 virtual bool final(void)
const;
474 virtual Weight final_weight(
void)
const {
return 0.0; }
476 static TransitionIndex create_final()
477 {
return TransitionIndex(NO_SYMBOL_NUMBER, 1); }
480 class TransitionWIndex :
public TransitionIndex
483 TransitionWIndex(): TransitionIndex() {}
484 TransitionWIndex(SymbolNumber input,
485 TransitionTableIndex first_transition):
486 TransitionIndex(input, first_transition) {}
487 TransitionWIndex(std::istream& is):
488 TransitionIndex(is) {}
489 TransitionWIndex(
char * p):
490 TransitionIndex(p) {}
492 Weight final_weight(
void)
const;
494 static TransitionWIndex create_final()
495 {
return TransitionWIndex(NO_SYMBOL_NUMBER, 0); }
497 static TransitionWIndex create_final(Weight w)
501 TransitionTableIndex i;
505 return TransitionWIndex(NO_SYMBOL_NUMBER, weight.i);
512 SymbolNumber input_symbol;
513 SymbolNumber output_symbol;
514 TransitionTableIndex target_index;
516 static const size_t size = 2 *
sizeof(SymbolNumber) +
517 sizeof(TransitionTableIndex);
518 Transition(SymbolNumber input, SymbolNumber output,
519 TransitionTableIndex target, Weight bogus=0.0f):
520 input_symbol(input), output_symbol(output), target_index(target)
521 {(void)bogus; bogus=0.0f;}
522 Transition(
bool final, Weight bogus=0.0f):
523 input_symbol(NO_SYMBOL_NUMBER), output_symbol(NO_SYMBOL_NUMBER),
524 target_index(final?1:NO_TABLE_INDEX) {(void)bogus; bogus=0.0f;}
525 Transition(std::istream& is):
526 input_symbol(NO_SYMBOL_NUMBER), output_symbol(NO_SYMBOL_NUMBER),
529 is.read(reinterpret_cast<char*>(&input_symbol),
530 sizeof(SymbolNumber));
531 is.read(reinterpret_cast<char*>(&output_symbol),
532 sizeof(SymbolNumber));
533 is.read(reinterpret_cast<char*>(&target_index),
534 sizeof(target_index));
537 Transition(
char * p):
538 input_symbol(*(SymbolNumber*) p),
539 output_symbol(*(SymbolNumber*) (p + sizeof(SymbolNumber))),
540 target_index(*(TransitionTableIndex*) (p + 2 * sizeof(SymbolNumber)))
543 virtual ~Transition() {}
545 virtual void write(std::ostream& os,
bool weighted)
const
547 os.write(reinterpret_cast<const char*>(&input_symbol),
548 sizeof(input_symbol));
549 os.write(reinterpret_cast<const char*>(&output_symbol),
550 sizeof(output_symbol));
551 os.write(reinterpret_cast<const char*>(&target_index),
552 sizeof(target_index));
558 virtual void display()
const;
560 TransitionTableIndex get_target(
void)
const {
return target_index;}
561 SymbolNumber get_output_symbol(
void)
const {
return output_symbol;}
562 SymbolNumber get_input_symbol(
void)
const {
return input_symbol;}
564 bool matches(
const SymbolNumber s)
const;
565 virtual bool final(void)
const;
566 virtual Weight get_weight(
void)
const {
return 0.0; }
569 class TransitionW :
public Transition
572 Weight transition_weight;
574 static const size_t size = 2 *
sizeof(SymbolNumber) +
575 sizeof(TransitionTableIndex) +
sizeof(Weight);
577 TransitionW(SymbolNumber input, SymbolNumber output,
578 TransitionTableIndex target, Weight w):
579 Transition(input, output, target), transition_weight(w) {}
580 TransitionW(
bool final, Weight w):
581 Transition(final), transition_weight(w) {}
582 TransitionW(std::istream& is): Transition(is), transition_weight(0.0f)
583 {is.read(reinterpret_cast<char*>(&transition_weight),
sizeof(Weight));}
584 TransitionW(
char * p):
586 transition_weight(*((Weight*)
587 (p + 2 * sizeof(SymbolNumber)
588 + sizeof(TransitionTableIndex))))
591 void write(std::ostream& os,
bool weighted)
const
593 Transition::write(os,
false);
595 os.write(reinterpret_cast<const char*>(&transition_weight),
596 sizeof(transition_weight));
600 void display()
const;
602 Weight get_weight(
void)
const {
return transition_weight; }
607 class TransducerTable
610 std::vector<T> table;
612 TransducerTable(): table() {}
613 TransducerTable(
size_t size,
const T& entry): table(size, entry) {}
615 std::istream& is, TransitionTableIndex index_count): table()
617 char * p = (
char*) malloc(T::size * index_count);
618 is.read(p, T::size * index_count);
621 table.push_back(T(p));
627 TransducerTable(
const TransducerTable& t): table(t.table) {}
629 void append(
const T& v) {table.push_back(v);}
630 void set(
size_t index,
const T& v) {table[index] = v;}
632 const T& operator[](TransitionTableIndex i)
const
634 return (i < TRANSITION_TARGET_TABLE_START) ?
635 table[i] : table[i-TRANSITION_TARGET_TABLE_START];
638 void display(
bool transition_table)
const
640 for(
size_t i=0;i<table.size();i++)
644 std::cout <<
"/" << i+TRANSITION_TARGET_TABLE_START;
650 unsigned int size()
const {
return table.size();}
653 class TransducerTablesInterface
656 virtual ~TransducerTablesInterface() {}
658 virtual const TransitionIndex& get_index(
659 TransitionTableIndex i)
const = 0;
660 virtual const Transition& get_transition(
661 TransitionTableIndex i)
const = 0;
662 virtual Weight get_weight(
663 TransitionTableIndex i)
const = 0;
664 virtual SymbolNumber get_transition_input(
665 TransitionTableIndex i)
const = 0;
666 virtual SymbolNumber get_transition_output(
667 TransitionTableIndex i)
const = 0;
668 virtual TransitionTableIndex get_transition_target(
669 TransitionTableIndex i)
const = 0;
670 virtual bool get_transition_finality(
671 TransitionTableIndex i)
const = 0;
672 virtual SymbolNumber get_index_input(
673 TransitionTableIndex i)
const = 0;
674 virtual TransitionTableIndex get_index_target(
675 TransitionTableIndex i)
const = 0;
676 virtual bool get_index_finality(
677 TransitionTableIndex i)
const = 0;
678 virtual Weight get_final_weight(
679 TransitionTableIndex i)
const = 0;
681 virtual void display()
const {}
684 template <
class T1,
class T2>
685 class TransducerTables :
public TransducerTablesInterface
688 TransducerTable<T1> index_table;
689 TransducerTable<T2> transition_table;
691 TransducerTables(std::istream& is, TransitionTableIndex index_table_size,
692 TransitionTableIndex transition_table_size):
694 is, index_table_size),
695 transition_table(is, transition_table_size) { }
697 TransducerTables(): index_table(1, T1::create_final()),
698 transition_table() {}
699 TransducerTables(
const TransducerTable<T1>& index_table,
700 const TransducerTable<T2>& transition_table):
701 index_table(index_table), transition_table(transition_table) {}
703 const TransitionIndex& get_index(TransitionTableIndex i)
const
704 {
return index_table[i];}
705 const Transition& get_transition(TransitionTableIndex i)
const
706 {
return transition_table[i];}
707 Weight get_weight(TransitionTableIndex i)
const
708 {
return transition_table[i].get_weight(); }
709 SymbolNumber get_transition_input(TransitionTableIndex i)
const
710 {
return transition_table[i].get_input_symbol(); }
711 SymbolNumber get_transition_output(TransitionTableIndex i)
const
712 {
return transition_table[i].get_output_symbol(); }
713 TransitionTableIndex get_transition_target(TransitionTableIndex i)
const
714 {
return transition_table[i].get_target(); }
715 bool get_transition_finality(TransitionTableIndex i)
const
716 {
return transition_table[i].final(); }
717 SymbolNumber get_index_input(TransitionTableIndex i)
const
718 {
return index_table[i].get_input_symbol(); }
719 TransitionTableIndex get_index_target(TransitionTableIndex i)
const
720 {
return index_table[i].get_target(); }
721 bool get_index_finality(TransitionTableIndex i)
const
722 {
return index_table[i].final(); }
723 Weight get_final_weight(TransitionTableIndex i)
const
724 {
return index_table[i].final_weight(); }
729 std::cout <<
"Transition index table:" << std::endl;
730 index_table.display(
false);
731 std::cout <<
"Transition table:" << std::endl;
732 transition_table.display(
true);
740 typedef std::vector<OlLetterTrie*> OlLetterTrieVector;
745 OlLetterTrieVector letters;
746 SymbolNumberVector symbols;
750 letters(UCHAR_MAX, static_cast<OlLetterTrie*>(NULL)),
751 symbols(UCHAR_MAX,NO_SYMBOL_NUMBER)
754 void add_string(
const char * p,SymbolNumber symbol_key);
756 SymbolNumber find_key(
char ** p);
763 SymbolNumber number_of_input_symbols;
764 OlLetterTrie letters;
765 SymbolNumberVector ascii_symbols;
767 void read_input_symbols(
const SymbolTable & kt);
768 void read_input_symbol(
const char * symbol,
const int symbol_number);
771 Encoder(
const SymbolTable & st, SymbolNumber input_symbol_count):
772 number_of_input_symbols(input_symbol_count),
773 ascii_symbols(UCHAR_MAX,NO_SYMBOL_NUMBER)
775 read_input_symbols(st);
778 SymbolNumber find_key(
char ** p);
780 friend class Transducer;
781 friend class PmatchContainer;
786 class Tape:
public SymbolNumberVector
789 void write(
unsigned int i, SymbolNumber s)
791 if (this->size() > i) {
792 this->operator[](i) = s;
794 while (this->size() <= i) {
795 this->push_back(NO_SYMBOL_NUMBER);
797 this->operator[](i) = s;
807 TransducerHeader* header;
808 TransducerAlphabet* alphabet;
810 TransducerTablesInterface* tables;
813 void load_tables(std::istream& is);
816 Weight current_weight;
817 HfstOneLevelPaths * lookup_paths;
821 hfst::FdState<SymbolNumber> flag_state;
823 bool found_transition;
826 unsigned int recursion_depth_left;
828 void try_epsilon_transitions(
unsigned int input_tape_pos,
829 unsigned int output_tape_pos,
830 TransitionTableIndex i);
832 void try_epsilon_indices(
unsigned int input_tape_pos,
833 unsigned int output_tape_pos,
834 TransitionTableIndex i);
836 void find_transitions(SymbolNumber input,
837 unsigned int input_tape_pos,
838 unsigned int output_tape_pos,
839 TransitionTableIndex i);
841 void find_index(SymbolNumber input,
842 unsigned int input_tape_pos,
843 unsigned int output_tape_pos,
844 TransitionTableIndex i);
846 void get_analyses(
unsigned int input_tape_pos,
847 unsigned int output_tape_pos,
848 TransitionTableIndex i);
857 const TransducerAlphabet& alphabet,
858 const TransducerTable<TransitionIndex>& index_table,
859 const TransducerTable<Transition>& transition_table);
861 const TransducerAlphabet& alphabet,
862 const TransducerTable<TransitionWIndex>& index_table,
863 const TransducerTable<TransitionW>& transition_table);
866 void write(std::ostream& os)
const;
868 void display()
const;
870 const TransducerHeader& get_header()
const
872 const TransducerAlphabet& get_alphabet()
const
873 {
return *alphabet; }
874 const Encoder& get_encoder(
void)
const
876 const hfst::FdTable<SymbolNumber>& get_fd_table()
const
877 {
return alphabet->get_fd_table(); }
878 const SymbolTable& get_symbol_table()
const
879 {
return alphabet->get_symbol_table(); }
882 const TransitionIndex& get_index(TransitionTableIndex i)
const
883 {
return tables->get_index(i); }
884 const Transition& get_transition(TransitionTableIndex i)
const
885 {
return tables->get_transition(i); }
886 bool final_index(TransitionTableIndex i)
const
888 if (indexes_transition_table(i)) {
889 return tables->get_transition_finality(i);
891 return tables->get_index_finality(i);
894 bool is_infinitely_ambiguous(
void)
const
896 return header->probe_flag(Has_input_epsilon_cycles);
899 bool is_lookup_infinitely_ambiguous(
const StringVector & s);
900 bool is_lookup_infinitely_ambiguous(
const std::string & input);
901 void find_loop_epsilon_transitions(
unsigned int input_pos,
902 TransitionTableIndex i,
903 PositionStates position_states);
904 void find_loop_epsilon_indices(
unsigned int input_pos,
905 TransitionTableIndex i,
906 PositionStates position_states);
907 void find_loop_transitions(SymbolNumber input,
908 unsigned int input_pos,
909 TransitionTableIndex i,
910 PositionStates position_states);
911 void find_loop_index(SymbolNumber input,
912 unsigned int input_pos,
913 TransitionTableIndex i,
914 PositionStates position_states);
915 void find_loop(
unsigned int input_pos,
916 TransitionTableIndex i,
917 PositionStates position_states);
919 TransducerTable<TransitionWIndex> & copy_windex_table();
920 TransducerTable<TransitionW> & copy_transitionw_table();
921 TransducerTable<TransitionIndex> & copy_index_table();
922 TransducerTable<Transition> & copy_transition_table();
930 TransitionTableIndexSet get_transitions_from_state(
931 TransitionTableIndex state_index)
const;
934 bool initialize_input(
const char * input_str);
935 HfstOneLevelPaths * lookup_fd(
const StringVector & s, ssize_t limit = -1);
940 HfstOneLevelPaths * lookup_fd(
const std::string & s, ssize_t limit = -1);
941 HfstOneLevelPaths * lookup_fd(
const char * s, ssize_t limit = -1);
942 void note_analysis(
void);
945 SymbolNumber get_unknown_symbol(
void)
const
946 {
return alphabet->get_unknown_symbol(); }
947 StringSymbolMap get_string_symbol_map(
void)
const
948 {
return alphabet->build_string_symbol_map(); }
949 STransition take_epsilons(
const TransitionTableIndex i)
const;
950 STransition take_epsilons_and_flags(
const TransitionTableIndex i);
951 STransition take_non_epsilons(
const TransitionTableIndex i,
952 const SymbolNumber symbol)
const;
953 TransitionTableIndex next(
const TransitionTableIndex i,
954 const SymbolNumber symbol)
const;
955 TransitionTableIndex next_e(
const TransitionTableIndex i)
const;
956 bool has_transitions(
const TransitionTableIndex i,
957 const SymbolNumber symbol)
const;
958 bool has_epsilons_or_flags(
const TransitionTableIndex i);
959 Weight final_weight(
const TransitionTableIndex i)
const;
960 bool is_flag(
const SymbolNumber symbol)
961 {
return alphabet->is_flag_diacritic(symbol); }
962 bool is_weighted(
void)
963 {
return header->probe_flag(Weighted);}
966 friend class ConvertTransducer;
971 TransitionTableIndex index;
975 STransition(TransitionTableIndex i,
982 STransition(TransitionTableIndex i,
992 typedef std::pair<std::string, Weight> StringWeightPair;
994 class StringWeightComparison
1001 StringWeightComparison(
bool reverse_result=
false):
1002 reverse(reverse_result)
1005 bool operator() (StringWeightPair lhs, StringWeightPair rhs)
1008 return (lhs.second < rhs.second);
1010 return (lhs.second > rhs.second);
1015 typedef std::priority_queue<StringWeightPair,
1016 std::vector<StringWeightPair>,
1017 StringWeightComparison> CorrectionQueue;
1018 typedef std::priority_queue<StringWeightPair,
1019 std::vector<StringWeightPair>,
1020 StringWeightComparison> AnalysisQueue;
1021 typedef std::priority_queue<StringWeightPair,
1022 std::vector<StringWeightPair>,
1023 StringWeightComparison> HyphenationQueue;
1108 SymbolNumberVector string;
1109 unsigned int input_state;
1110 TransitionTableIndex mutator_state;
1111 TransitionTableIndex lexicon_state;
1112 hfst::FdState<SymbolNumber> flag_state;
1115 TreeNode(SymbolNumberVector prev_string,
1117 TransitionTableIndex mutator,
1118 TransitionTableIndex lexicon,
1119 hfst::FdState<SymbolNumber> state,
1121 string(prev_string),
1123 mutator_state(mutator),
1124 lexicon_state(lexicon),
1129 TreeNode(hfst::FdState<SymbolNumber> start_state):
1130 string(SymbolNumberVector()),
1134 flag_state(start_state),
1138 TreeNode update_lexicon(SymbolNumber next_symbol,
1139 TransitionTableIndex next_lexicon,
1142 TreeNode update_mutator(SymbolNumber next_symbol,
1143 TransitionTableIndex next_mutator,
1146 void increment_mutator(
void);
1148 TreeNode update(SymbolNumber next_symbol,
1149 unsigned int next_input,
1150 TransitionTableIndex next_mutator,
1151 TransitionTableIndex next_lexicon,
1154 TreeNode update(SymbolNumber next_symbol,
1155 TransitionTableIndex next_mutator,
1156 TransitionTableIndex next_lexicon,
1162 typedef std::deque<TreeNode> TreeNodeQueue;
1164 int nByte_utf8(
unsigned char c);
1170 SymbolNumberVector s;
1174 s(SymbolNumberVector())
1177 bool initialize(
const Encoder & encoder,
char * input, SymbolNumber other);
1179 unsigned int len(
void)
1184 SymbolNumber operator[](
unsigned int i)
1191 class AlphabetTranslationException:
public std::runtime_error
1195 AlphabetTranslationException(
const std::string what):
1196 std::runtime_error(what)
1209 TreeNodeQueue queue;
1210 SymbolNumberVector alphabet_translator;
1212 std::vector<std::string> symbol_table;
1215 mutator(mutator_ptr),
1216 lexicon(lexicon_ptr),
1217 input(InputString()),
1218 queue(TreeNodeQueue()),
1219 alphabet_translator(SymbolNumberVector()),
1221 symbol_table(lexicon->get_symbol_table())
1223 build_alphabet_translator();
1226 bool init_input(
char * str,
const Encoder & encoder, SymbolNumber other);
1228 void build_alphabet_translator(
void);
1229 void lexicon_epsilons(
void);
1230 void mutator_epsilons(
void);
1231 void consume_input(
void);
1232 void lexicon_consume(
void);
1235 bool check(
char * line);
1238 CorrectionQueue
correct(
char * line);
1239 std::string stringify(SymbolNumberVector symbol_vector);
A compiled transducer format, suitable for fast lookup operations.
Definition: 3831_transducer.h:804
#define HFST_THROW(E)
Macro to throw an exception of type E. Use THROW instead of regular throw with subclasses of HfstExce...
Definition: HfstExceptionDefs.h:28
A spellchecker, constructed from two optimized-lookup transducer instances. An alphabet translator is...
Definition: 3831_transducer.h:1203
std::set< HfstOneLevelPath > HfstOneLevelPaths
A set of simple paths.
Definition: HfstDataTypes.h:101
std::pair< float, StringVector > HfstOneLevelPath
A path of one level of arcs with collected weight.
Definition: HfstDataTypes.h:97
CorrectionQueue correct(char *line)
Definition: ospell.cc:282
bool check(char *line)
Definition: ospell.cc:326
Transducer has wrong type.
Definition: HfstExceptionDefs.h:388