HFST - Helsinki Finite-State Transducer Technology API  version 3.7.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SfstTransducer.h
Go to the documentation of this file.
1 // This program is free software: you can redistribute it and/or modify
2 // it under the terms of the GNU General Public License as published by
3 // the Free Software Foundation, version 3 of the License.
4 //
5 // This program is distributed in the hope that it will be useful,
6 // but WITHOUT ANY WARRANTY; without even the implied warranty of
7 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8 // GNU General Public License for more details.
9 //
10 // You should have received a copy of the GNU General Public License
11 // along with this program. If not, see <http://www.gnu.org/licenses/>.
12 
13 #ifndef _SFST_TRANSDUCER_H_
14 #define _SFST_TRANSDUCER_H_
15 
16 #include "HfstExceptionDefs.h"
17 #include "HfstFlagDiacritics.h"
18 #include "HfstSymbolDefs.h"
19 #include "HfstExtractStrings.h"
20 #include "back-ends/sfst/interface.h"
21 #include "back-ends/sfst/fst.h"
22 #include <cstdio>
23 #include <string>
24 #include <sstream>
25 #include <iostream>
26 
31 namespace hfst {
32 namespace implementations
33 {
34  //using namespace SFST;
35  typedef SFST::Transducer Transducer;
36  ;
37  using std::ostream;
38  using std::ostringstream;
39 
40  void sfst_set_hopcroft(bool);
41 
42  class SfstInputStream
43  {
44  private:
45  std::string filename;
46  FILE * input_file;
47  bool is_minimal; // whether the next transducer in the stream is minimal
48  // this can be said in the header
49  void add_symbol(StringNumberMap &string_number_map,
50  SFST::Character c,
51  SFST::Alphabet &alphabet);
52 
53  public:
54  SfstInputStream(void);
55  SfstInputStream(const std::string &filename);
56  void close(void);
57  bool is_eof(void);
58  bool is_bad(void);
59  bool is_good(void);
60  bool is_fst(void);
61  void ignore(unsigned int);
62 
63  char stream_get();
64  short stream_get_short();
65  void stream_unget(char c);
66 
67  bool set_implementation_specific_header_data
68  (StringPairVector &data, unsigned int index);
69  SFST::Transducer * read_transducer();
70 
71  static bool is_fst(FILE * f);
72  static bool is_fst(std::istream &s);
73  };
74 
75  class SfstOutputStream
76  {
77  private:
78  std::string filename;
79  FILE *ofile;
80  //void write_3_0_library_header(FILE *file, bool is_minimal);
81  public:
82  SfstOutputStream(void);
83  SfstOutputStream(const std::string &filename);
84  void close(void);
85  void write(const char &c);
86  void append_implementation_specific_header_data
87  (std::vector<char> &header, SFST::Transducer *t);
88  void write_transducer(SFST::Transducer * transducer);
89  };
90 
91  class HfstNode2Int {
92 
93  struct hashf {
94  size_t operator()(const SFST::Node *node) const {
95  return (size_t)node;
96  }
97  };
98  struct equalf {
99  int operator()(const SFST::Node *n1, const SFST::Node *n2) const {
100  return (n1 == n2);
101  }
102  };
103  typedef SFST::hash_map<SFST::Node*, int, hashf, equalf> NL;
104 
105  private:
106  NL number;
107 
108  public:
109  int &operator[]( SFST::Node *node ) {
110  NL::iterator it=number.find(node);
111  if (it == number.end())
112  return number.insert(NL::value_type(node, 0)).first->second;
113  return it->second;
114  };
115  };
116 
117 
118  class SfstTransducer
119  {
120  public:
121  static SFST::Transducer * create_empty_transducer(void);
122  static SFST::Transducer * create_epsilon_transducer(void);
123 
124  static SFST::Transducer * define_transducer(unsigned int number);
125  static SFST::Transducer * define_transducer
126  (unsigned int inumber, unsigned int onumber);
127 
128  static SFST::Transducer * define_transducer(const std::string &symbol);
129  static SFST::Transducer * define_transducer
130  (const std::string &isymbol, const std::string &osymbol);
131  static SFST::Transducer * define_transducer
132  (const StringPairVector &spv);
133  static SFST::Transducer * define_transducer
134  (const StringPairSet &sps, bool cyclic=false);
135  static SFST::Transducer * define_transducer
136  (const std::vector<StringPairSet> &spsv);
137  static SFST::Transducer * copy(SFST::Transducer * t);
138  static SFST::Transducer * determinize(SFST::Transducer * t);
139  static SFST::Transducer * minimize(SFST::Transducer * t);
140  static SFST::Transducer * remove_epsilons(SFST::Transducer * t);
141  static SFST::Transducer * repeat_star(SFST::Transducer * t);
142  static SFST::Transducer * repeat_plus(SFST::Transducer * t);
143  static SFST::Transducer * repeat_n(SFST::Transducer * t,unsigned int n);
144  static SFST::Transducer * repeat_le_n(SFST::Transducer * t,unsigned int n);
145  static SFST::Transducer * optionalize(SFST::Transducer * t);
146  static SFST::Transducer * invert(SFST::Transducer * t);
147  static SFST::Transducer * reverse(SFST::Transducer * transducer);
148  static SFST::Transducer * extract_input_language(SFST::Transducer * t);
149  static SFST::Transducer * extract_output_language(SFST::Transducer * t);
150  static std::vector<SFST::Transducer*> extract_path_transducers
151  (SFST::Transducer *t);
152  static void extract_paths
153  (SFST::Transducer * t, hfst::ExtractStringsCb& callback, int cycles=-1,
154  FdTable<SFST::Character>* fd=NULL, bool filter_fd=false);
155 
156  static void extract_random_paths
157  (SFST::Transducer *t, HfstTwoLevelPaths &results, int max_num);
158 
159  static SFST::Transducer * insert_freely
160  (SFST::Transducer *t , const StringPair &symbol_pair);
161  static SFST::Transducer * substitute
162  (SFST::Transducer * t, String old_symbol, String new_symbol);
163  static SFST::Transducer * substitute
164  (SFST::Transducer *t, const StringPair &symbol_pair, SFST::Transducer *tr);
165 
166  static SFST::Transducer * compose(SFST::Transducer * t1,
167  SFST::Transducer * t2);
168  static SFST::Transducer * concatenate(SFST::Transducer * t1,
169  SFST::Transducer * t2);
170  static SFST::Transducer * disjunct(SFST::Transducer * t1,
171  SFST::Transducer * t2);
172  static SFST::Transducer * intersect(SFST::Transducer * t1,
173  SFST::Transducer * t2);
174  static SFST::Transducer * subtract(SFST::Transducer * t1,
175  SFST::Transducer * t2);
176  static std::pair<SFST::Transducer*, SFST::Transducer*> harmonize
177  (SFST::Transducer *t1, SFST::Transducer *t2, bool unknown_symbols_in_use=true);
178 
179  static bool are_equivalent(SFST::Transducer * t1, SFST::Transducer * t2);
180  static bool is_cyclic(SFST::Transducer * t);
181  static bool is_automaton(SFST::Transducer * t);
182 
183  static FdTable<SFST::Character>* get_flag_diacritics(SFST::Transducer * t);
184 
185  static void print_test(SFST::Transducer *t);
186  static void print_alphabet(SFST::Transducer *t);
187 
188  static unsigned int get_biggest_symbol_number(SFST::Transducer * t);
189 
190  static StringVector get_symbol_vector(SFST::Transducer * t);
191 
192  static std::map<std::string, unsigned int> get_symbol_map(SFST::Transducer * t);
193 
194  static SFST::Transducer * disjunct(SFST::Transducer * t, const StringPairVector &spv);
195 
196  static StringPairSet get_symbol_pairs(SFST::Transducer *t);
197 
198  float get_profile_seconds();
199  static unsigned int number_of_states(SFST::Transducer *t);
200  static unsigned int number_of_arcs(SFST::Transducer *t);
201 
202  static StringSet get_alphabet(SFST::Transducer *t);
203  static void insert_to_alphabet(SFST::Transducer *t, const std::string &symbol);
204  static void remove_from_alphabet
205  (SFST::Transducer *t, const std::string &symbol);
206  static unsigned int get_symbol_number(SFST::Transducer *t,
207  const std::string &symbol);
208 
209  protected:
210  static void initialize_alphabet(SFST::Transducer *t);
211  static SFST::Transducer * expand_arcs(SFST::Transducer * t, StringSet &unknown);
212 
213  static void expand_node( SFST::Transducer *t, SFST::Node *origin, SFST::Label &l,
214  SFST::Node *target, hfst::StringSet &s );
215  static void expand2
216  ( SFST::Transducer *t, SFST::Node *node,
217  hfst::StringSet &new_symbols, std::set<SFST::Node*> &visited_nodes );
218  static void expand(SFST::Transducer *t, hfst::StringSet &new_symbols);
219 
220  };
221 } }
222 #endif
std::pair< String, String > StringPair
A symbol pair in a transition.
Definition: HfstSymbolDefs.h:71
std::string String
A UTF-8 symbol in a transition.
Definition: HfstSymbolDefs.h:60
std::vector< std::pair< std::string, std::string > > StringPairVector
A vector of string pairs.
Definition: HfstDataTypes.h:106
A file for exceptions.
Class declarations for flag diacritic handling.
std::set< HfstTwoLevelPath > HfstTwoLevelPaths
A set of two-level weighted paths.
Definition: HfstDataTypes.h:110
std::set< StringPair > StringPairSet
A set of symbol pairs used in substituting symbol pairs and in rule functions.
Definition: HfstSymbolDefs.h:83
Typedefs and functions for symbols, symbol pairs and sets of symbols.