HFST - Helsinki Finite-State Transducer Technology API  version 3.7.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
LogWeightTransducer.h
Go to the documentation of this file.
1 // This program is free software: you can redistribute it and/or modify
2 // it under the terms of the GNU General Public License as published by
3 // the Free Software Foundation, version 3 of the License.
4 //
5 // This program is distributed in the hope that it will be useful,
6 // but WITHOUT ANY WARRANTY; without even the implied warranty of
7 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8 // GNU General Public License for more details.
9 //
10 // You should have received a copy of the GNU General Public License
11 // along with this program. If not, see <http://www.gnu.org/licenses/>.
12 #ifndef _LOG_WEIGHT_TRANSDUCER_H_
13 #define _LOG_WEIGHT_TRANSDUCER_H_
14 
15 #include "HfstSymbolDefs.h"
16 #include "HfstExceptionDefs.h"
17 #include "HfstFlagDiacritics.h"
18 
19 #ifdef WINDOWS
20 #include "back-ends/openfstwin/src/include/fst/fstlib.h"
21 #else
22 #include "../../../config.h"
23 #include "back-ends/openfst/src/include/fst/fstlib.h"
24 #endif // WINDOWS
25 
26 #include "HfstExtractStrings.h"
27 #include <cstdio>
28 #include <string>
29 #include <sstream>
30 #include <iostream>
31 //#include "HfstAlphabet.h"
32 
37 namespace hfst {
38 namespace implementations
39 {
40  using namespace fst;
41  ;
42  typedef LogArc::StateId StateId;
43  typedef VectorFst<LogArc> LogFst;
44 
45  typedef std::vector<LogArc> LogArcVector;
46  struct LogArcLessThan {
47  bool operator() (const LogArc &arc1,const LogArc &arc2) const; };
48 
49  using std::ostream;
50  using std::ostringstream;
51  using std::stringstream;
52 
53  void openfst_log_set_hopcroft(bool value);
54 
55  class LogWeightInputStream
56  {
57  private:
58  std::string filename;
59  ifstream i_stream;
60  istream &input_stream;
61  void skip_identifier_version_3_0(void);
62  void skip_hfst_header(void);
63  public:
64  LogWeightInputStream(void);
65  LogWeightInputStream(const std::string &filename);
66  void close(void);
67  bool is_eof(void) const;
68  bool is_bad(void) const;
69  bool is_good(void) const;
70  bool is_fst(void) const;
71  bool operator() (void) const;
72  void ignore(unsigned int);
73  LogFst * read_transducer();
74 
75  char stream_get();
76  short stream_get_short();
77  void stream_unget(char c);
78 
79  static bool is_fst(FILE * f);
80  static bool is_fst(istream &s);
81  };
82 
83  class LogWeightOutputStream
84  {
85  private:
86  std::string filename;
87  ofstream o_stream;
88  ostream &output_stream;
89  //void write_3_0_library_header(std::ostream &out);
90  public:
91  LogWeightOutputStream(void);
92  LogWeightOutputStream(const std::string &filename);
93  void close(void);
94  void write(const char &c);
95  void write_transducer(LogFst * transducer);
96  };
97 
98  class LogWeightTransitionIterator;
99 
100  typedef StateId LogWeightState;
101 
102  class LogWeightStateIterator
103  {
104  protected:
105  StateIterator<LogFst> * iterator;
106  public:
107  LogWeightStateIterator(LogFst * t);
108  ~LogWeightStateIterator(void);
109  void next(void);
110  bool done(void);
111  LogWeightState value(void);
112  };
113 
114 
115  class LogWeightTransition
116  {
117  protected:
118  LogArc arc;
119  LogFst * t;
120  public:
121  LogWeightTransition(const LogArc &arc, LogFst *t);
122  ~LogWeightTransition(void);
123  std::string get_input_symbol(void) const;
124  std::string get_output_symbol(void) const;
125  LogWeightState get_target_state(void) const;
126  LogWeight get_weight(void) const;
127  };
128 
129 
130  class LogWeightTransitionIterator
131  {
132  protected:
133  ArcIterator<LogFst> * arc_iterator;
134  LogFst * t;
135  public:
136  LogWeightTransitionIterator(LogFst * t, StateId state);
137  ~LogWeightTransitionIterator(void);
138  void next(void);
139  bool done(void);
140  LogWeightTransition value(void);
141  };
142 
143 
144  class LogWeightTransducer
145  {
146  public:
147  static LogFst * create_empty_transducer(void);
148  static LogFst * create_epsilon_transducer(void);
149 
150  // string versions
151  static LogFst * define_transducer(const std::string &symbol);
152  static LogFst * define_transducer
153  (const std::string &isymbol, const std::string &osymbol);
154  static LogFst * define_transducer
155  (const hfst::StringPairVector &spv);
156  static LogFst * define_transducer
157  (const hfst::StringPairSet &sps, bool cyclic=false);
158  static LogFst * define_transducer(const std::vector<StringPairSet> &spsv);
159 
160  // number versions
161  static LogFst * define_transducer(unsigned int number);
162  static LogFst * define_transducer
163  (unsigned int inumber, unsigned int onumber);
164  static LogFst * define_transducer(const hfst::NumberPairVector &npv);
165  static LogFst * define_transducer
166  (const hfst::NumberPairSet &nps, bool cyclic=false);
167  static LogFst * define_transducer
168  (const std::vector<NumberPairSet> &npsv);
169 
170  static LogFst * copy(LogFst * t);
171  static LogFst * determinize(LogFst * t);
172  static LogFst * minimize(LogFst * t);
173  static LogFst * remove_epsilons(LogFst * t);
174  static LogFst * n_best(LogFst * t, unsigned int n);
175  static LogFst * repeat_star(LogFst * t);
176  static LogFst * repeat_plus(LogFst * t);
177  static LogFst * repeat_n(LogFst * t, unsigned int n);
178  static LogFst * repeat_le_n(LogFst * t, unsigned int n);
179  static LogFst * optionalize(LogFst * t);
180  static LogFst * invert(LogFst * t);
181  static LogFst * reverse(LogFst * transducer);
182  static LogFst * extract_input_language(LogFst * t);
183  static LogFst * extract_output_language(LogFst * t);
184  static void extract_paths
185  (LogFst * t, hfst::ExtractStringsCb& callback,
186  int cycles=-1, FdTable<int64>* fd=NULL, bool filter_fd=false
187  /*bool include_spv=false*/);
188  static void extract_random_paths
189  (const LogFst *t, HfstTwoLevelPaths &results, int max_num);
190  static LogFst * compose(LogFst * t1,
191  LogFst * t2);
192  static LogFst * concatenate(LogFst * t1,
193  LogFst * t2);
194  static LogFst * disjunct(LogFst * t1,
195  LogFst * t2);
196 
197  static LogFst * disjunct(LogFst * t, const StringPairVector &spv);
198  static LogFst * disjunct(LogFst * t, const NumberPairVector &npv);
199 
200  static LogFst * intersect(LogFst * t1,
201  LogFst * t2);
202  static LogFst * subtract(LogFst * t1,
203  LogFst * t2);
204  static LogFst * set_weight(LogFst * t,float f);
205  static LogFst * set_final_weights(LogFst * t, float weight);
206  static LogFst * transform_weights(LogFst * t,float (*func)(float f));
207  static LogFst * push_weights(LogFst * t, bool to_initial_state);
208 
209  static std::pair<LogFst*, LogFst*> harmonize
210  (LogFst *t1, LogFst *t2, bool unknown_symbols_in_use=true);
211 
212  static void write_in_att_format(LogFst * t, FILE *ofile);
213  static void write_in_att_format_number(LogFst * t, FILE *ofile);
214 
215  static void test_minimize(void);
216 
217  static void write_in_att_format(LogFst * t, std::ostream &os);
218  static void write_in_att_format_number(LogFst * t, std::ostream &os);
219 
220  static LogFst * read_in_att_format(FILE *ifile);
221 
222  static bool are_equivalent(LogFst *one, LogFst *another);
223  static bool is_cyclic(LogFst * t);
224  static bool is_automaton(LogFst * t);
225 
226  static FdTable<int64>* get_flag_diacritics(LogFst * t);
227 
228  // string versions
229  static LogFst * insert_freely(LogFst * t, const StringPair &symbol_pair);
230  static LogFst * substitute
231  (LogFst * t, std::string old_symbol, std::string new_symbol);
232  static LogFst * substitute(LogFst * t,
233  StringPair old_symbol_pair,
234  StringPair new_symbol_pair);
235  static LogFst * substitute(LogFst * t,
236  StringPair old_symbol_pair,
237  StringPairSet new_symbol_pair_set);
238  static LogFst * substitute(LogFst * t,
239  const StringPair old_symbol_pair,
240  LogFst *transducer);
241 
242  // number versions
243  static LogFst * insert_freely(LogFst * t, const NumberPair &number_pair);
244  static LogFst * substitute(LogFst * t, unsigned int, unsigned int);
245  static LogFst * substitute(LogFst * t,
246  NumberPair old_number_pair,
247  NumberPair new_number_pair);
248  static LogFst * substitute(LogFst * t,
249  const NumberPair old_number_pair,
250  LogFst *transducer);
251 
252  static void insert_to_alphabet
253  (LogFst *t, const std::string &symbol);
254  static void remove_from_alphabet
255  (LogFst *t, const std::string &symbol);
256  static StringSet get_alphabet(LogFst *t);
257  static unsigned int get_symbol_number(LogFst *t,
258  const std::string &symbol);
259 
260  static NumberNumberMap create_mapping(LogFst * t1, LogFst * t2);
261  static void recode_symbol_numbers(LogFst * t, hfst::NumberNumberMap &km);
262 
263  static LogFst * expand_arcs
264  (LogFst * t, hfst::StringSet &unknown, bool unknown_symbols_in_use);
265 
266  float get_profile_seconds();
267 
268  static unsigned int number_of_states(const LogFst * t);
269 
270  private:
271  static fst::SymbolTable create_symbol_table(std::string name);
272  static void initialize_symbol_tables(LogFst *t);
273  static void remove_symbol_table(LogFst *t);
274 
275  /* Maps state numbers in AT&T text format to state ids used by
276  OpenFst transducers. */
277  typedef std::map<int, StateId> StateMap;
278 
279  static StateId add_and_map_state
280  (LogFst *t, int state_number, StateMap &state_map);
281 
282  static int has_arc(LogFst &t,
283  LogArc::StateId sourcestate,
284  LogArc::Label ilabel,
285  LogArc::Label olabel);
286  static void disjunct_as_tries(LogFst &t1,
287  StateId t1_state,
288  const LogFst * t2,
289  StateId t2_state);
290  static void add_sub_trie(LogFst &t1,
291  StateId t1_state,
292  const LogFst * t2,
293  StateId t2_state);
294 
295  public:
296  static StateId add_state(LogFst *t);
297  static void set_final_weight(LogFst *t, StateId s, float w);
298  static void add_transition
299  (LogFst *t, StateId source,
300  std::string &isymbol, std::string &osymbol, float w, StateId target);
301  static float get_final_weight(LogFst *t, StateId s);
302  static float is_final(LogFst *t, StateId s);
303  static StateId get_initial_state(LogFst *t);
304  static void represent_empty_transducer_as_having_one_state(LogFst *t);
305 
306  };
307 
308 } }
309 #endif
std::pair< String, String > StringPair
A symbol pair in a transition.
Definition: HfstSymbolDefs.h:71
std::vector< std::pair< std::string, std::string > > StringPairVector
A vector of string pairs.
Definition: HfstDataTypes.h:106
A file for exceptions.
Class declarations for flag diacritic handling.
std::set< HfstTwoLevelPath > HfstTwoLevelPaths
A set of two-level weighted paths.
Definition: HfstDataTypes.h:110
std::set< StringPair > StringPairSet
A set of symbol pairs used in substituting symbol pairs and in rule functions.
Definition: HfstSymbolDefs.h:83
Typedefs and functions for symbols, symbol pairs and sets of symbols.