HFST - Helsinki Finite-State Transducer Technology API  version 3.7.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TropicalWeightTransducer.h
Go to the documentation of this file.
1 // This program is free software: you can redistribute it and/or modify
2 // it under the terms of the GNU General Public License as published by
3 // the Free Software Foundation, version 3 of the License.
4 //
5 // This program is distributed in the hope that it will be useful,
6 // but WITHOUT ANY WARRANTY; without even the implied warranty of
7 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8 // GNU General Public License for more details.
9 //
10 // You should have received a copy of the GNU General Public License
11 // along with this program. If not, see <http://www.gnu.org/licenses/>.
12 #ifndef _TROPICAL_WEIGHT_TRANSDUCER_H_
13 #define _TROPICAL_WEIGHT_TRANSDUCER_H_
14 
15 #include "HfstSymbolDefs.h"
16 #include "HfstExceptionDefs.h"
17 #include "HfstFlagDiacritics.h"
18 
19 #ifdef WINDOWS
20 #include "back-ends/openfstwin/src/include/fst/fstlib.h"
21 #else
22 #include "../../../config.h"
23 #include "back-ends/openfst/src/include/fst/fstlib.h"
24 #endif // WINDOWS
25 
26 #include "HfstExtractStrings.h"
27 #include <cstdio>
28 #include <string>
29 #include <sstream>
30 #include <iostream>
31 //#include "HfstAlphabet.h"
32 
37 namespace hfst {
38 namespace implementations
39 {
40  using namespace fst;
41  ;
42  typedef StdArc::StateId StateId;
43 
44  typedef std::vector<StdArc> StdArcVector;
45  struct StdArcLessThan {
46  bool operator() (const StdArc &arc1,const StdArc &arc2) const; };
47 
48  using std::ostream;
49  using std::ostringstream;
50  using std::stringstream;
51 
52  void openfst_tropical_set_hopcroft(bool value);
53 
54  class TropicalWeightInputStream
55  {
56  private:
57  std::string filename;
58  ifstream i_stream;
59  istream &input_stream;
60  void skip_identifier_version_3_0(void);
61  void skip_hfst_header(void);
62  public:
63  TropicalWeightInputStream(void);
64  TropicalWeightInputStream(const std::string &filename);
65  void close(void);
66  bool is_eof(void) const;
67  bool is_bad(void) const;
68  bool is_good(void) const;
69  bool is_fst(void) const;
70  bool operator() (void) const;
71  void ignore(unsigned int);
72  StdVectorFst * read_transducer();
73 
74  char stream_get();
75  short stream_get_short();
76  void stream_unget(char c);
77 
78  static bool is_fst(FILE * f);
79  static bool is_fst(istream &s);
80  };
81 
82  class TropicalWeightOutputStream
83  {
84  private:
85  std::string filename;
86  ofstream o_stream;
87  ostream &output_stream;
88  bool hfst_format;
89  //void write_3_0_library_header(std::ostream &out);
90  public:
91  TropicalWeightOutputStream(bool hfst_format=true);
92  TropicalWeightOutputStream
93  (const std::string &filename, bool hfst_format=false);
94  void close(void);
95  void write(const char &c);
96  void write_transducer(StdVectorFst * transducer);
97  };
98 
99  class TropicalWeightTransitionIterator;
100 
101  typedef StateId TropicalWeightState;
102 
103  class TropicalWeightStateIterator
104  {
105  protected:
106  StateIterator<StdVectorFst> * iterator;
107  public:
108  TropicalWeightStateIterator(StdVectorFst * t);
109  ~TropicalWeightStateIterator(void);
110  void next(void);
111  bool done(void);
112  TropicalWeightState value(void);
113  };
114 
115 
116  class TropicalWeightTransition
117  {
118  protected:
119  StdArc arc;
120  StdVectorFst * t;
121  public:
122  TropicalWeightTransition(const StdArc &arc, StdVectorFst *t);
123  ~TropicalWeightTransition(void);
124  std::string get_input_symbol(void) const;
125  std::string get_output_symbol(void) const;
126  TropicalWeightState get_target_state(void) const;
127  TropicalWeight get_weight(void) const;
128  };
129 
130 
131  class TropicalWeightTransitionIterator
132  {
133  protected:
134  ArcIterator<StdVectorFst> * arc_iterator;
135  StdVectorFst * t;
136  public:
137  TropicalWeightTransitionIterator(StdVectorFst * t, StateId state);
138  ~TropicalWeightTransitionIterator(void);
139  void next(void);
140  bool done(void);
141  TropicalWeightTransition value(void);
142  };
143 
144 
145  class TropicalWeightTransducer
146  {
147  public:
148  static StdVectorFst * create_empty_transducer(void);
149  static StdVectorFst * create_epsilon_transducer(void);
150 
151  // string versions
152  static StdVectorFst * define_transducer(const std::string &symbol);
153  static StdVectorFst * define_transducer
154  (const std::string &isymbol, const std::string &osymbol);
155  static StdVectorFst * define_transducer
156  (const hfst::StringPairVector &spv);
157  static StdVectorFst * define_transducer
158  (const hfst::StringPairSet &sps, bool cyclic=false);
159  static StdVectorFst * define_transducer
160  (const std::vector<StringPairSet> &spsv);
161 
162  // number versions
163  static StdVectorFst * define_transducer(unsigned int number);
164  static StdVectorFst * define_transducer
165  (unsigned int inumber, unsigned int onumber);
166  static StdVectorFst * define_transducer
167  (const hfst::NumberPairVector &npv);
168  static StdVectorFst * define_transducer
169  (const hfst::NumberPairSet &nps, bool cyclic=false);
170  static StdVectorFst * define_transducer
171  (const std::vector<NumberPairSet> &npsv);
172 
173  static StdVectorFst * copy(StdVectorFst * t);
174  static StdVectorFst * determinize(StdVectorFst * t);
175  static StdVectorFst * minimize(StdVectorFst * t);
176  static StdVectorFst * remove_epsilons(StdVectorFst * t);
177  static StdVectorFst * n_best(StdVectorFst * t, unsigned int n);
178  static StdVectorFst * prune(StdVectorFst * t);
179  static StdVectorFst * repeat_star(StdVectorFst * t);
180  static StdVectorFst * repeat_plus(StdVectorFst * t);
181  static StdVectorFst * repeat_n(StdVectorFst * t, unsigned int n);
182  static StdVectorFst * repeat_le_n(StdVectorFst * t, unsigned int n);
183  static StdVectorFst * optionalize(StdVectorFst * t);
184  static StdVectorFst * invert(StdVectorFst * t);
185  static StdVectorFst * reverse(StdVectorFst * transducer);
186  static StdVectorFst * extract_input_language(StdVectorFst * t);
187  static StdVectorFst * extract_output_language(StdVectorFst * t);
188  static void extract_paths
189  (StdVectorFst * t, hfst::ExtractStringsCb& callback,
190  int cycles=-1, FdTable<int64>* fd=NULL, bool filter_fd=false
191  /*bool include_spv=false*/);
192 
193  static void extract_random_paths
194  (StdVectorFst *t, HfstTwoLevelPaths &results, int max_num);
195 
196  static void extract_random_paths_fd
197  (StdVectorFst *t, HfstTwoLevelPaths &results, int max_num, bool filter_fd);
198 
199  static StdVectorFst * compose(StdVectorFst * t1,
200  StdVectorFst * t2);
201  static StdVectorFst * concatenate(StdVectorFst * t1,
202  StdVectorFst * t2);
203  static StdVectorFst * disjunct(StdVectorFst * t1,
204  StdVectorFst * t2);
205 
206  static StdVectorFst * disjunct
207  (StdVectorFst * t, const StringPairVector &spv);
208  static StdVectorFst * disjunct
209  (StdVectorFst * t, const NumberPairVector &npv);
210 
211  static StdVectorFst * disjunct_as_tries(StdVectorFst * t1,
212  const StdVectorFst * t2);
213 
214  static StdVectorFst * intersect(StdVectorFst * t1,
215  StdVectorFst * t2);
216  static StdVectorFst * subtract(StdVectorFst * t1,
217  StdVectorFst * t2);
218  static StdVectorFst * set_weight(StdVectorFst * t,float f);
219  static StdVectorFst * set_final_weights(StdVectorFst * t, float weight, bool increment=false);
220  static StdVectorFst * transform_weights
221  (StdVectorFst * t,float (*func)(float f));
222  static StdVectorFst * push_weights
223  (StdVectorFst * t, bool to_initial_state);
224 
225  static std::pair<StdVectorFst*, StdVectorFst*> harmonize
226  (StdVectorFst *t1, StdVectorFst *t2, bool unknown_symbols_in_use=true);
227 
228  static void write_in_att_format(StdVectorFst * t, FILE *ofile);
229  static void write_in_att_format_number(StdVectorFst * t, FILE *ofile);
230 
231  //static void test_minimize(void);
232 
233  static void write_in_att_format(StdVectorFst * t, std::ostream &os);
234  static void write_in_att_format_number
235  (StdVectorFst * t, std::ostream &os);
236 
237  static StdVectorFst * read_in_att_format(FILE *ifile);
238 
239  static bool are_equivalent(StdVectorFst *one, StdVectorFst *another);
240  static bool is_cyclic(StdVectorFst * t);
241  static bool is_automaton(StdVectorFst * t);
242 
243  static FdTable<int64>* get_flag_diacritics(StdVectorFst * t);
244 
245  static void print_alphabet(const StdVectorFst *t);
246 
247  // string versions
248  static StdVectorFst * insert_freely
249  (StdVectorFst * t, const StringPair &symbol_pair);
250  static StdVectorFst * substitute
251  (StdVectorFst * t, std::string old_symbol, std::string new_symbol);
252  static StdVectorFst * substitute(StdVectorFst * t,
253  StringPair old_symbol_pair,
254  StringPair new_symbol_pair);
255  static StdVectorFst * substitute(StdVectorFst * t,
256  StringPair old_symbol_pair,
257  StringPairSet new_symbol_pair_set);
258  static StdVectorFst * substitute(StdVectorFst * t,
259  const StringPair old_symbol_pair,
260  StdVectorFst *transducer);
261 
262  // number versions
263  static StdVectorFst * insert_freely
264  (StdVectorFst * t, const NumberPair &number_pair);
265  static StdVectorFst * substitute
266  (StdVectorFst * t, unsigned int, unsigned int);
267  static StdVectorFst * substitute(StdVectorFst * t,
268  NumberPair old_number_pair,
269  NumberPair new_number_pair);
270  static StdVectorFst * substitute(StdVectorFst * t,
271  const NumberPair old_number_pair,
272  StdVectorFst *transducer);
273 
274  static void insert_to_alphabet
275  (StdVectorFst *t, const std::string &symbol);
276  static void remove_from_alphabet
277  (StdVectorFst *t, const std::string &symbol);
278  static StringSet get_alphabet(StdVectorFst *t);
279  static void get_first_input_symbols
280  (StdVectorFst *t, StateId s, std::set<StateId> & visited_states, StringSet & symbols);
281  static StringSet get_first_input_symbols(StdVectorFst *t);
282  static unsigned int get_symbol_number(StdVectorFst *t,
283  const std::string &symbol);
284  static unsigned int get_biggest_symbol_number(StdVectorFst *t);
285  static StringVector get_symbol_vector(StdVectorFst *t);
286 
287  static NumberNumberMap create_mapping
288  (StdVectorFst * t1, StdVectorFst * t2);
289  static void recode_symbol_numbers
290  (StdVectorFst * t, hfst::NumberNumberMap &km);
291  static StdVectorFst * expand_arcs
292  (StdVectorFst * t, hfst::StringSet &unknown,
293  bool unknown_symbols_in_use);
294 
295 #ifdef FOO
296  static StdVectorFst * compose_intersect(StdVectorFst * t,
297  Grammar * grammar);
298 #endif
299 
300  float get_profile_seconds();
301 
302  static unsigned int number_of_states(const StdVectorFst * t);
303  static unsigned int number_of_arcs(const StdVectorFst * t);
304 
305  // for HFST version 2 transducer handling
306  static void set_symbol_table
307  (StdVectorFst * t,
308  std::vector<std::pair<unsigned short, std::string> > symbol_mappings);
309 
310  private:
311  static fst::SymbolTable create_symbol_table(std::string name);
312  static void initialize_symbol_tables(StdVectorFst *t);
313  static void remove_symbol_table(StdVectorFst *t);
314 
315  /* Maps state numbers in AT&T text format to state ids used by
316  OpenFst transducers. */
317  typedef std::map<int, StateId> StateMap;
318  static StateId add_and_map_state(StdVectorFst *t, int state_number,
319  StateMap &state_map);
320 
321  static int has_arc(StdVectorFst &t,
322  StdArc::StateId sourcestate,
323  StdArc::Label ilabel,
324  StdArc::Label olabel);
325  static void disjunct_as_tries(StdVectorFst &t1,
326  StateId t1_state,
327  const StdVectorFst * t2,
328  StateId t2_state);
329  static void add_sub_trie(StdVectorFst &t1,
330  StateId t1_state,
331  const StdVectorFst * t2,
332  StateId t2_state);
333 
334  public:
335  static StateId add_state(StdVectorFst *t);
336  static void set_final_weight(StdVectorFst *t, StateId s, float w);
337  static void add_transition
338  (StdVectorFst *t, StateId source,
339  std::string &isymbol, std::string &osymbol, float w, StateId target);
340  static float get_final_weight(StdVectorFst *t, StateId s);
341  static float is_final(StdVectorFst *t, StateId s);
342  static StateId get_initial_state(StdVectorFst *t);
343  static void represent_empty_transducer_as_having_one_state
344  (StdVectorFst *t);
345 
346  };
347 
348 } }
349 #endif
std::pair< String, String > StringPair
A symbol pair in a transition.
Definition: HfstSymbolDefs.h:71
std::vector< std::pair< std::string, std::string > > StringPairVector
A vector of string pairs.
Definition: HfstDataTypes.h:106
A file for exceptions.
Class declarations for flag diacritic handling.
std::set< HfstTwoLevelPath > HfstTwoLevelPaths
A set of two-level weighted paths.
Definition: HfstDataTypes.h:110
std::set< StringPair > StringPairSet
A set of symbol pairs used in substituting symbol pairs and in rule functions.
Definition: HfstSymbolDefs.h:83
Typedefs and functions for symbols, symbol pairs and sets of symbols.