1 #ifndef _HFST_OL_TRANSDUCER_PMATCH_H_
2 #define _HFST_OL_TRANSDUCER_PMATCH_H_
7 #include "transducer.h"
11 class PmatchTransducer;
12 class PmatchContainer;
14 const unsigned int PMATCH_MAX_RECURSION_DEPTH = 5000;
16 typedef std::map<SymbolNumber, PmatchTransducer *> RtnMap;
17 enum SpecialSymbol{entry,
30 class PmatchAlphabet:
public TransducerAlphabet {
33 std::map<SpecialSymbol, SymbolNumber> special_symbols;
34 std::map<SymbolNumber, std::string> end_tag_map;
35 std::map<std::string, SymbolNumber> rtn_names;
36 bool is_end_tag(
const SymbolNumber symbol)
const;
37 std::string end_tag(
const SymbolNumber symbol);
38 std::string start_tag(
const SymbolNumber symbol);
42 PmatchAlphabet(std::istream& is, SymbolNumber symbol_count);
44 ~PmatchAlphabet(
void);
45 static bool is_end_tag(
const std::string & symbol);
46 static bool is_insertion(
const std::string & symbol);
47 static std::string name_from_insertion(
48 const std::string & symbol);
49 void add_special_symbol(
const std::string & str, SymbolNumber symbol_number);
50 void add_rtn(PmatchTransducer * rtn, std::string
const & name);
51 bool has_rtn(std::string
const & name)
const;
52 bool has_rtn(SymbolNumber symbol)
const;
53 PmatchTransducer * get_rtn(SymbolNumber symbol);
54 SymbolNumber get_special(SpecialSymbol special)
const;
55 SymbolNumberVector get_specials(
void)
const;
56 std::string stringify(
const SymbolNumberVector & str);
57 std::string locatefy(
const SymbolNumberVector & str);
59 friend class PmatchTransducer;
60 friend class PmatchContainer;
66 PmatchAlphabet alphabet;
68 SymbolNumber orig_symbol_count;
69 SymbolNumber symbol_count;
70 PmatchTransducer * toplevel;
72 SymbolNumber * input_tape;
73 SymbolNumber * orig_input_tape;
74 SymbolNumber * output_tape;
75 SymbolNumber * orig_output_tape;
76 SymbolNumberVector output;
77 std::vector<char> possible_first_symbols;
79 unsigned int recursion_depth_left;
83 PmatchContainer(std::istream & is,
bool _verbose =
false,
84 bool extract_tags =
false);
85 ~PmatchContainer(
void);
86 unsigned int input_histogram_buffer[80];
90 void initialize_input(
const char * input);
91 bool has_unsatisfied_rtns(
void)
const;
92 std::string get_unsatisfied_rtn_name(
void)
const;
93 std::string match(std::string & input);
94 std::string locate(std::string & input);
95 bool has_queued_input(
void);
96 void copy_to_output(
const SymbolNumberVector & best_result);
97 std::string stringify_output(
void);
98 std::string locatefy_output(
void);
99 static std::string parse_name_from_hfst3_header(std::istream & f);
100 unsigned int input_pos(SymbolNumber * tape_pos) {
return tape_pos - orig_input_tape; }
101 void be_verbose(
void) { verbose =
true; }
102 bool is_verbose(
void) {
return verbose; }
103 bool try_recurse(
void)
105 if (recursion_depth_left > 0) {
106 --recursion_depth_left;
112 void unrecurse(
void) { ++recursion_depth_left; }
113 void reset_recursion(
void) { recursion_depth_left = PMATCH_MAX_RECURSION_DEPTH; }
116 struct SimpleTransition
120 TransitionTableIndex target;
121 static const size_t SIZE =
122 sizeof(SymbolNumber) +
123 sizeof(SymbolNumber) +
124 sizeof(TransitionTableIndex);
126 SymbolNumber i, SymbolNumber o, TransitionTableIndex t):
127 input(i), output(o), target(t) {}
128 bool final(void)
const {
129 return input == NO_SYMBOL_NUMBER &&
130 output == NO_SYMBOL_NUMBER &&
138 TransitionTableIndex target;
139 static const size_t SIZE =
140 sizeof(SymbolNumber) +
141 sizeof(TransitionTableIndex);
143 SymbolNumber i, TransitionTableIndex t):
144 input(i), target(t) {}
145 bool final(void)
const {
146 return input == NO_SYMBOL_NUMBER &&
147 target != NO_TABLE_INDEX;
151 struct ContextMatchedTrap
154 ContextMatchedTrap(
bool p): polarity(p) {}
157 class PmatchTransducer
160 enum ContextChecking{none, LC, NLC, RC, NRC};
166 struct LocalVariables
168 hfst::FdState<SymbolNumber> flag_state;
170 SymbolNumber * context_placeholder;
171 ContextChecking context;
172 bool default_symbol_trap;
173 bool negative_context_success;
174 bool pending_passthrough;
175 Weight running_weight;
180 SymbolNumber * candidate_input_pos;
181 SymbolNumber * output_tape_head;
182 SymbolNumberVector best_result;
186 std::stack<LocalVariables> local_stack;
187 std::stack<RtnVariables> rtn_stack;
189 std::vector<TransitionW> transition_table;
190 std::vector<TransitionWIndex> index_table;
192 PmatchAlphabet & alphabet;
193 SymbolNumber orig_symbol_count;
194 PmatchContainer * container;
198 void try_epsilon_transitions(SymbolNumber * input_tape,
199 SymbolNumber * output_tape,
200 TransitionTableIndex i);
202 void try_epsilon_indices(SymbolNumber * input_tape,
203 SymbolNumber * output_tape,
204 TransitionTableIndex i);
206 void find_transitions(SymbolNumber input,
207 SymbolNumber * input_tape,
208 SymbolNumber * output_tape,
209 TransitionTableIndex i);
211 void find_index(SymbolNumber input,
212 SymbolNumber * input_tape,
213 SymbolNumber * output_tape,
214 TransitionTableIndex i);
216 void get_analyses(SymbolNumber * input_tape,
217 SymbolNumber * output_tape,
218 TransitionTableIndex index);
220 bool checking_context(
void)
const;
221 bool try_entering_context(SymbolNumber symbol);
222 bool try_exiting_context(SymbolNumber symbol);
223 void exit_context(
void);
225 void collect_first_epsilon(TransitionTableIndex i,
226 SymbolNumberVector
const& input_symbols,
227 std::set<TransitionTableIndex> & seen_indices);
228 void collect_first_epsilon_index(TransitionTableIndex i,
229 SymbolNumberVector
const& input_symbols,
230 std::set<TransitionTableIndex> & seen_indices);
231 void collect_first_transition(TransitionTableIndex i,
232 SymbolNumberVector
const& input_symbols,
233 std::set<TransitionTableIndex> & seen_indices);
234 void collect_first_index(TransitionTableIndex i,
235 SymbolNumberVector
const& input_symbols,
236 std::set<TransitionTableIndex> & seen_indices);
237 void collect_first(TransitionTableIndex i,
238 SymbolNumberVector
const& input_symbols,
239 std::set<TransitionTableIndex> & seen_indices);
243 PmatchTransducer(std::istream& is,
244 TransitionTableIndex index_table_size,
245 TransitionTableIndex transition_table_size,
246 PmatchAlphabet & alphabet,
247 PmatchContainer * container);
249 std::set<SymbolNumber> possible_first_symbols;
255 bool final_index(TransitionTableIndex i)
const
257 if (indexes_transition_table(i)) {
258 return transition_table[i].final();
260 return index_table[i].final();
264 static bool indexes_transition_table(TransitionTableIndex i)
265 {
return i >= TRANSITION_TARGET_TABLE_START; }
267 const SymbolNumberVector & get_best_result(
void)
const
268 {
return rtn_stack.top().best_result; }
269 SymbolNumber * get_candidate_input_pos(
void)
const
270 {
return rtn_stack.top().candidate_input_pos; }
272 void match(SymbolNumber ** input_tape_entry, SymbolNumber ** output_tape_entry);
273 void rtn_call(SymbolNumber * input_tape_entry, SymbolNumber * output_tape_entry);
275 void note_analysis(SymbolNumber * input_tape, SymbolNumber * output_tape);
276 void collect_possible_first_symbols(
void);
282 #endif //_HFST_OL_TRANSDUCER_PMATCH_H_