HFST - Helsinki Finite-State Transducer Technology API  version 3.7.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FomaTransducer.h
Go to the documentation of this file.
1 // This program is free software: you can redistribute it and/or modify
2 // it under the terms of the GNU General Public License as published by
3 // the Free Software Foundation, version 3 of the License.
4 //
5 // This program is distributed in the hope that it will be useful,
6 // but WITHOUT ANY WARRANTY; without even the implied warranty of
7 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8 // GNU General Public License for more details.
9 //
10 // You should have received a copy of the GNU General Public License
11 // along with this program. If not, see <http://www.gnu.org/licenses/>.
12 
13 #ifndef _FOMA_TRANSDUCER_H_
14 #define _FOMA_TRANSDUCER_H_
15 
16 #include "HfstSymbolDefs.h"
17 #include "HfstExceptionDefs.h"
18 #include "HfstExtractStrings.h"
19 #include "HfstFlagDiacritics.h"
20 #include <stdbool.h> // foma uses _Bool
21 #include <stdlib.h>
22 
23 #ifndef _FOMALIB_H_
24 #define _FOMALIB_H_
25 #include "back-ends/foma/fomalib.h"
26 #endif
27 
28 #ifndef WINDOWS
29  #include <zlib.h>
30 #endif
31 #include <cstdio>
32 #include <string>
33 #include <sstream>
34 #include <iostream>
35 
36 #include "../FormatSpecifiers.h"
37 
42 namespace hfst {
43  namespace implementations
44 {
45  ;
46  using std::ostream;
47  using std::ostringstream;
48 
49  class FomaInputStream
50  {
51  private:
52  std::string filename;
53  FILE * input_file;
54  void skip_identifier_version_3_0(void);
55  void skip_hfst_header(void);
56  public:
57  FomaInputStream(void);
58  FomaInputStream(const std::string &filename);
59  void close(void);
60  bool is_eof(void);
61  bool is_bad(void);
62  bool is_good(void);
63  bool is_fst(void);
64  void ignore(unsigned int);
65  fsm * read_transducer();
66 
67  char stream_get();
68  short stream_get_short();
69  void stream_unget(char c);
70 
71  static bool is_fst(FILE * f);
72  static bool is_fst(std::istream &s);
73  };
74 
75  class FomaOutputStream
76  {
77  private:
78  std::string filename;
79  FILE *ofile;
80  //void write_3_0_library_header(FILE *file);
81  public:
82  FomaOutputStream(void);
83  FomaOutputStream(const std::string &filename);
84  void close(void);
85  void write(const char &c);
86  void write_transducer(fsm * transducer);
87  };
88 
89  class FomaTransducer
90  {
91  public:
92  static fsm * create_empty_transducer(void);
93  static fsm * create_epsilon_transducer(void);
94  static fsm * define_transducer(const hfst::StringPairVector &spv);
95  static fsm * define_transducer
96  (const hfst::StringPairSet &sps, bool cyclic=false);
97  static fsm * define_transducer(const std::vector<StringPairSet> &spsv);
98  static fsm * define_transducer
99  (const std::string &symbol);
100  static fsm * define_transducer
101  (const std::string &isymbol, const std::string &osymbol);
102  static fsm * copy(fsm * t);
103  static fsm * determinize(fsm * t);
104  static fsm * minimize(fsm * t);
105  static fsm * remove_epsilons(fsm * t);
106  static fsm * repeat_star(fsm * t);
107  static fsm * repeat_plus(fsm * t);
108  static fsm * repeat_n(fsm * t, unsigned int n);
109  static fsm * repeat_le_n(fsm * t, unsigned int n);
110  static fsm * optionalize(fsm * t);
111  static fsm * invert(fsm * t);
112  static fsm * reverse(fsm * t);
113  static fsm * extract_input_language(fsm * t);
114  static fsm * extract_output_language(fsm * t);
115 
116  static fsm * insert_freely(fsm * t, const StringPair &symbol_pair);
117 
118  static bool are_equivalent(fsm *t1, fsm *t2);
119  static bool is_cyclic(fsm * t);
120 
121  static fsm * substitute
122  (fsm * t,hfst::String old_symbol,hfst::String new_symbol);
123 
124  static fsm * compose(fsm * t1,
125  fsm * t2);
126  static fsm * concatenate(fsm * t1,
127  fsm * t2);
128  static fsm * disjunct(fsm * t1,
129  fsm * t2);
130  static fsm * intersect(fsm * t1,
131  fsm * t2);
132  static fsm * subtract(fsm * t1,
133  fsm * t2);
134 
135  static void extract_paths(fsm * t, hfst::ExtractStringsCb& callback,
136  int cycles=-1, FdTable<int>* fd=NULL,
137  bool filter_fd=false);
138  static void extract_random_paths
139  (const fsm *t, HfstTwoLevelPaths &results, int max_num);
140 
141  static FdTable<int>* get_flag_diacritics(fsm * t);
142 
143  static unsigned int get_biggest_symbol_number(fsm * t);
144  static StringVector get_symbol_vector(fsm * t);
145  static std::map<std::string, unsigned int> get_symbol_map(fsm * t);
146 
147  static void insert_to_alphabet(fsm *t, const std::string &symbol);
148  static void remove_from_alphabet(fsm *t, const std::string &symbol);
149  static StringSet get_alphabet(fsm *t);
150  static unsigned int get_symbol_number(fsm *t,
151  const std::string &symbol);
152 
153  static void harmonize(fsm *net1, fsm *net2);
154 
155  static fsm * read_net(FILE * file);
156  static int write_net(fsm * net, FILE * file);
157 
158  static void delete_foma(fsm * net);
159  static void print_test(fsm * t);
160 
161  static fsm * read_lexc(const std::string &filename, bool verbose);
162 
163  static unsigned int number_of_states(fsm * net);
164  static unsigned int number_of_arcs(fsm * net);
165 
166  static fsm * eliminate_flags(fsm * t);
167  static fsm * eliminate_flag(fsm * t, const std::string & flag);
168 
169  };
170 
171 } }
172 #endif
std::pair< String, String > StringPair
A symbol pair in a transition.
Definition: HfstSymbolDefs.h:71
std::string String
A UTF-8 symbol in a transition.
Definition: HfstSymbolDefs.h:60
std::vector< std::pair< std::string, std::string > > StringPairVector
A vector of string pairs.
Definition: HfstDataTypes.h:106
A file for exceptions.
Class declarations for flag diacritic handling.
std::set< HfstTwoLevelPath > HfstTwoLevelPaths
A set of two-level weighted paths.
Definition: HfstDataTypes.h:110
std::set< StringPair > StringPairSet
A set of symbol pairs used in substituting symbol pairs and in rule functions.
Definition: HfstSymbolDefs.h:83
Typedefs and functions for symbols, symbol pairs and sets of symbols.