HFST - Helsinki Finite-State Transducer Technology API  version 3.7.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
pmatch.h
1 #ifndef _HFST_OL_TRANSDUCER_PMATCH_H_
2 #define _HFST_OL_TRANSDUCER_PMATCH_H_
3 
4 #include <map>
5 #include <stack>
6 #include <sstream>
7 #include "transducer.h"
8 
9 namespace hfst_ol {
10 
11  class PmatchTransducer;
12  class PmatchContainer;
13 
14  const unsigned int PMATCH_MAX_RECURSION_DEPTH = 5000;
15 
16  typedef std::map<SymbolNumber, PmatchTransducer *> RtnMap;
17  enum SpecialSymbol{entry,
18  exit,
19  LC_entry,
20  LC_exit,
21  RC_entry,
22  RC_exit,
23  NLC_entry,
24  NLC_exit,
25  NRC_entry,
26  NRC_exit,
27  Pmatch_passthrough,
28  boundary};
29 
30  class PmatchAlphabet: public TransducerAlphabet {
31  protected:
32  RtnMap rtns;
33  std::map<SpecialSymbol, SymbolNumber> special_symbols;
34  std::map<SymbolNumber, std::string> end_tag_map;
35  std::map<std::string, SymbolNumber> rtn_names;
36  bool is_end_tag(const SymbolNumber symbol) const;
37  std::string end_tag(const SymbolNumber symbol);
38  std::string start_tag(const SymbolNumber symbol);
39  bool extract_tags;
40 
41  public:
42  PmatchAlphabet(std::istream& is, SymbolNumber symbol_count);
43  PmatchAlphabet(void);
44  ~PmatchAlphabet(void);
45  static bool is_end_tag(const std::string & symbol);
46  static bool is_insertion(const std::string & symbol);
47  static std::string name_from_insertion(
48  const std::string & symbol);
49  void add_special_symbol(const std::string & str, SymbolNumber symbol_number);
50  void add_rtn(PmatchTransducer * rtn, std::string const & name);
51  bool has_rtn(std::string const & name) const;
52  bool has_rtn(SymbolNumber symbol) const;
53  PmatchTransducer * get_rtn(SymbolNumber symbol);
54  SymbolNumber get_special(SpecialSymbol special) const;
55  SymbolNumberVector get_specials(void) const;
56  std::string stringify(const SymbolNumberVector & str);
57  std::string locatefy(const SymbolNumberVector & str);
58 
59  friend class PmatchTransducer;
60  friend class PmatchContainer;
61  };
62 
63  class PmatchContainer
64  {
65  protected:
66  PmatchAlphabet alphabet;
67  Encoder * encoder;
68  SymbolNumber orig_symbol_count;
69  SymbolNumber symbol_count;
70  PmatchTransducer * toplevel;
71  size_t io_size;
72  SymbolNumber * input_tape;
73  SymbolNumber * orig_input_tape;
74  SymbolNumber * output_tape;
75  SymbolNumber * orig_output_tape;
76  SymbolNumberVector output;
77  std::vector<char> possible_first_symbols;
78  bool verbose;
79  unsigned int recursion_depth_left;
80 
81  public:
82 
83  PmatchContainer(std::istream & is, bool _verbose = false,
84  bool extract_tags = false);
85  ~PmatchContainer(void);
86  unsigned int input_histogram_buffer[80];
87 
88  long line_number;
89 
90  void initialize_input(const char * input);
91  bool has_unsatisfied_rtns(void) const;
92  std::string get_unsatisfied_rtn_name(void) const;
93  std::string match(std::string & input);
94  std::string locate(std::string & input);
95  bool has_queued_input(void);
96  void copy_to_output(const SymbolNumberVector & best_result);
97  std::string stringify_output(void);
98  std::string locatefy_output(void);
99  static std::string parse_name_from_hfst3_header(std::istream & f);
100  unsigned int input_pos(SymbolNumber * tape_pos) { return tape_pos - orig_input_tape; }
101  void be_verbose(void) { verbose = true; }
102  bool is_verbose(void) { return verbose; }
103  bool try_recurse(void)
104  {
105  if (recursion_depth_left > 0) {
106  --recursion_depth_left;
107  return true;
108  } else {
109  return false;
110  }
111  }
112  void unrecurse(void) { ++recursion_depth_left; }
113  void reset_recursion(void) { recursion_depth_left = PMATCH_MAX_RECURSION_DEPTH; }
114  };
115 
116  struct SimpleTransition
117  {
118  SymbolNumber input;
119  SymbolNumber output;
120  TransitionTableIndex target;
121  static const size_t SIZE =
122  sizeof(SymbolNumber) +
123  sizeof(SymbolNumber) +
124  sizeof(TransitionTableIndex);
125  SimpleTransition(
126  SymbolNumber i, SymbolNumber o, TransitionTableIndex t):
127  input(i), output(o), target(t) {}
128  bool final(void) const {
129  return input == NO_SYMBOL_NUMBER &&
130  output == NO_SYMBOL_NUMBER &&
131  target == 1;
132  }
133  };
134 
135  struct SimpleIndex
136  {
137  SymbolNumber input;
138  TransitionTableIndex target;
139  static const size_t SIZE =
140  sizeof(SymbolNumber) +
141  sizeof(TransitionTableIndex);
142  SimpleIndex(
143  SymbolNumber i, TransitionTableIndex t):
144  input(i), target(t) {}
145  bool final(void) const {
146  return input == NO_SYMBOL_NUMBER &&
147  target != NO_TABLE_INDEX;
148  }
149  };
150 
151  struct ContextMatchedTrap
152  {
153  bool polarity;
154  ContextMatchedTrap(bool p): polarity(p) {}
155  };
156 
157  class PmatchTransducer
158  {
159  protected:
160  enum ContextChecking{none, LC, NLC, RC, NRC};
161 
162 // Transducers have static data, ie. tables for describing the states and
163 // transitions, and dynamic data, which is altered during lookup.
164 // In pmatch several instances of the same transducer may be operating
165 // in a stack, so this dynamic data is put in a class of its own.
166  struct LocalVariables
167  {
168  hfst::FdState<SymbolNumber> flag_state;
169  char tape_step;
170  SymbolNumber * context_placeholder;
171  ContextChecking context;
172  bool default_symbol_trap;
173  bool negative_context_success;
174  bool pending_passthrough;
175  Weight running_weight;
176  };
177 
178  struct RtnVariables
179  {
180  SymbolNumber * candidate_input_pos;
181  SymbolNumber * output_tape_head;
182  SymbolNumberVector best_result;
183  Weight best_weight;
184  };
185 
186  std::stack<LocalVariables> local_stack;
187  std::stack<RtnVariables> rtn_stack;
188 
189  std::vector<TransitionW> transition_table;
190  std::vector<TransitionWIndex> index_table;
191 
192  PmatchAlphabet & alphabet;
193  SymbolNumber orig_symbol_count;
194  PmatchContainer * container;
195 
196  // The mutually recursive lookup-handling functions
197 
198  void try_epsilon_transitions(SymbolNumber * input_tape,
199  SymbolNumber * output_tape,
200  TransitionTableIndex i);
201 
202  void try_epsilon_indices(SymbolNumber * input_tape,
203  SymbolNumber * output_tape,
204  TransitionTableIndex i);
205 
206  void find_transitions(SymbolNumber input,
207  SymbolNumber * input_tape,
208  SymbolNumber * output_tape,
209  TransitionTableIndex i);
210 
211  void find_index(SymbolNumber input,
212  SymbolNumber * input_tape,
213  SymbolNumber * output_tape,
214  TransitionTableIndex i);
215 
216  void get_analyses(SymbolNumber * input_tape,
217  SymbolNumber * output_tape,
218  TransitionTableIndex index);
219 
220  bool checking_context(void) const;
221  bool try_entering_context(SymbolNumber symbol);
222  bool try_exiting_context(SymbolNumber symbol);
223  void exit_context(void);
224 
225  void collect_first_epsilon(TransitionTableIndex i,
226  SymbolNumberVector const& input_symbols,
227  std::set<TransitionTableIndex> & seen_indices);
228  void collect_first_epsilon_index(TransitionTableIndex i,
229  SymbolNumberVector const& input_symbols,
230  std::set<TransitionTableIndex> & seen_indices);
231  void collect_first_transition(TransitionTableIndex i,
232  SymbolNumberVector const& input_symbols,
233  std::set<TransitionTableIndex> & seen_indices);
234  void collect_first_index(TransitionTableIndex i,
235  SymbolNumberVector const& input_symbols,
236  std::set<TransitionTableIndex> & seen_indices);
237  void collect_first(TransitionTableIndex i,
238  SymbolNumberVector const& input_symbols,
239  std::set<TransitionTableIndex> & seen_indices);
240 
241 
242  public:
243  PmatchTransducer(std::istream& is,
244  TransitionTableIndex index_table_size,
245  TransitionTableIndex transition_table_size,
246  PmatchAlphabet & alphabet,
247  PmatchContainer * container);
248 
249  std::set<SymbolNumber> possible_first_symbols;
250 
251  // const SimpleIndex& get_index(TransitionTableIndex i) const
252  // { return index_table[i] }
253  // const Transition& get_transition(TransitionTableIndex i) const
254  // { return tables->get_transition(i); }
255  bool final_index(TransitionTableIndex i) const
256  {
257  if (indexes_transition_table(i)) {
258  return transition_table[i].final();
259  } else {
260  return index_table[i].final();
261  }
262  }
263 
264  static bool indexes_transition_table(TransitionTableIndex i)
265  { return i >= TRANSITION_TARGET_TABLE_START; }
266 
267  const SymbolNumberVector & get_best_result(void) const
268  { return rtn_stack.top().best_result; }
269  SymbolNumber * get_candidate_input_pos(void) const
270  { return rtn_stack.top().candidate_input_pos; }
271 
272  void match(SymbolNumber ** input_tape_entry, SymbolNumber ** output_tape_entry);
273  void rtn_call(SymbolNumber * input_tape_entry, SymbolNumber * output_tape_entry);
274  void rtn_exit(void);
275  void note_analysis(SymbolNumber * input_tape, SymbolNumber * output_tape);
276  void collect_possible_first_symbols(void);
277 
278  };
279 
280 }
281 
282 #endif //_HFST_OL_TRANSDUCER_PMATCH_H_