HFST - Helsinki Finite-State Transducer Technology API  version 3.7.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HfstTropicalTransducerTransitionData.h
1 #include <string>
2 #include <map>
3 #include <set>
4 #include <cassert>
5 #include <cstdio>
6 #include <iostream>
7 #include <vector>
8 #include "../HfstExceptionDefs.h"
9 
10 namespace hfst {
11 
12  namespace implementations {
13 
14  struct string_comparison {
15  bool operator() (const std::string &str1, const std::string &str2) const {
16  return (str1.compare(str2) < 0);
17  }
18  };
19 
35  public:
37  typedef std::string SymbolType;
39  typedef float WeightType;
41  typedef std::set<SymbolType> SymbolTypeSet;
42 
43  typedef std::vector<SymbolType>
44  Number2SymbolVector;
45  typedef std::map<SymbolType, unsigned int, string_comparison>
46  Symbol2NumberMap;
47 
48  static SymbolType get_epsilon()
49  {
50  return SymbolType("@_EPSILON_SYMBOL_@");
51  }
52 
53  static SymbolType get_unknown()
54  {
55  return SymbolType("@_UNKNOWN_SYMBOL_@");
56  }
57 
58  static SymbolType get_identity()
59  {
60  return SymbolType("@_IDENTITY_SYMBOL_@");
61  }
62 
63  public: /* FIXME: Should be private. */
64  /* Maps that contain information of the mappings between strings
65  and numbers */
66  static Number2SymbolVector number2symbol_map;
67  static Symbol2NumberMap symbol2number_map;
68  /* The biggest number in use. */
69  static unsigned int max_number;
70 
71  /* Get the biggest number used to represent a symbol. */
72  static unsigned int get_max_number() {
73  return max_number;
74  }
75 
76  /*
77  Get a vector that defines how numbers of a transducer must
78  be changed, i.e. harmonized, so that it follows the same
79  number-to-string encoding as all transducers that use the datatype
80  HfstTropicalTransducerTransitionData.
81 
82  \a symbols defines how numbers are mapped to strings in the
83  original transducer so that each index in \a symbols
84  is the number that corresponds to the string at that index.
85  An empty string at an index means that the index is not
86  used in the original transducer.
87 
88  The result is a vector whose each index is the number that
89  must be replaced by the number at that index when a
90  transducer is harmonized. If an index is not used in the
91  transducer, the result will contain a zero at that index.
92  */
93  static std::vector<unsigned int> get_harmonization_vector
94  (const std::vector<SymbolType> &symbols)
95  {
96  std::vector<unsigned int> harmv;
97  harmv.reserve(symbols.size());
98  harmv.resize(symbols.size(), 0);
99  for (unsigned int i=0; i<symbols.size(); i++)
100  {
101  if (symbols.at(i) != "")
102  harmv.at(i) = get_number(symbols.at(i));
103  }
104  return harmv;
105  }
106 
107  static std::vector<unsigned int> get_reverse_harmonization_vector
108  (const std::map<SymbolType, unsigned int> &symbols)
109  {
110  std::vector<unsigned int> harmv;
111  harmv.reserve(max_number+1);
112  harmv.resize(max_number+1, 0);
113  for (unsigned int i=0; i<harmv.size(); i++)
114  {
115  std::map<SymbolType, unsigned int>::const_iterator it
116  = symbols.find(get_symbol(i));
117  if (it != symbols.end())
118  harmv.at(i) = it->second;
119  }
120  return harmv;
121  }
122 
123  protected:
124  /* Get the symbol that is mapped as \a number */
125  static const std::string &get_symbol(unsigned int number)
126  {
127  if (number >= number2symbol_map.size()) {
128  std::string message("HfstTropicalTransducerTransitionData: "
129  "number ");
130  std::ostringstream oss;
131  oss << number;
132  message.append(oss.str());
133  message.append(" is not mapped to any symbol");
135  (HfstFatalException, message);
136  }
137  return number2symbol_map[number];
138  }
139 
140  /* Get the number that is used to represent \a symbol */
141  static unsigned int get_number(const std::string &symbol)
142  {
143  if(symbol == "") { // FAIL
144  Symbol2NumberMap::iterator it = symbol2number_map.find(symbol);
145  if (it == symbol2number_map.end()) {
146  std::cerr << "ERROR: No number for the empty symbol\n"
147  << std::endl;
148  }
149  else {
150  std::cerr << "ERROR: The empty symbol corresdponds to number "
151  << it->second << std::endl;
152  }
153  assert(false);
154  }
155 
156  Symbol2NumberMap::iterator it = symbol2number_map.find(symbol);
157  if (it == symbol2number_map.end())
158  {
159  max_number++;
160  symbol2number_map[symbol] = max_number;
161  number2symbol_map.push_back(symbol);
162  return max_number;
163  }
164  return it->second;
165  }
166 
167  //private: TEST
168  public:
169  /* The actual transition data */
170  unsigned int input_number;
171  unsigned int output_number;
172  WeightType weight;
173 
174  public:
175  void print_transition_data()
176  {
177  fprintf(stderr, "%i:%i %f\n",
178  input_number, output_number, weight);
179  }
180 
181  public:
182 
186  input_number(0), output_number(0), weight(0) {}
187 
192  input_number = data.input_number;
193  output_number = data.output_number;
194  weight = data.weight;
195  }
196 
201  SymbolType osymbol,
202  WeightType weight) {
203  if (isymbol == "" || osymbol == "")
205  (EmptyStringException,
206  "HfstTropicalTransducerTransitionData"
207  "(SymbolType, SymbolType, WeightType)");
208 
209  input_number = get_number(isymbol);
210  output_number = get_number(osymbol);
211  this->weight = weight;
212  }
213 
215  (unsigned int inumber,
216  unsigned int onumber,
217  WeightType weight) {
218  input_number = inumber;
219  output_number = onumber;
220  this->weight = weight;
221  }
222 
224  const SymbolType &get_input_symbol() const {
225  return get_symbol(input_number);
226  }
227 
229  const SymbolType &get_output_symbol() const {
230  return get_symbol(output_number);
231  }
232 
233  unsigned int get_input_number() const {
234  return input_number;
235  }
236 
237  unsigned int get_output_number() const {
238  return output_number;
239  }
240 
243  return weight;
244  }
245 
246  /* Are these needed? */
247  static bool is_epsilon(const SymbolType &symbol) {
248  return (symbol.compare("@_EPSILON_SYMBOL_@") == 0);
249  }
250  static bool is_unknown(const SymbolType &symbol) {
251  return (symbol.compare("@_UNKNOWN_SYMBOL_@") == 0);
252  }
253  static bool is_identity(const SymbolType &symbol) {
254  return (symbol.compare("@_IDENTITY_SYMBOL_@") == 0);
255  }
256  static bool is_valid_symbol(const SymbolType &symbol) {
257  if (symbol == "")
258  return false;
259  return true;
260  }
261 
262  static SymbolType get_marker(const SymbolTypeSet &sts) {
263  (void)sts;
264  return SymbolType("@_MARKER_SYMBOL_@");
265  }
266 
273  const {
274  if (input_number < another.input_number )
275  return true;
276  if (input_number > another.input_number)
277  return false;
278  if (output_number < another.output_number)
279  return true;
280  if (output_number > another.output_number)
281  return false;
282  return (weight < another.weight);
283  }
284 
285  void operator=(const HfstTropicalTransducerTransitionData &another)
286  {
287  input_number = another.input_number;
288  output_number = another.output_number;
289  weight = another.weight;
290  }
291 
292  friend class Number2SymbolVectorInitializer;
293  friend class Symbol2NumberMapInitializer;
294 
295  friend class ComposeIntersectFst;
296  friend class ComposeIntersectLexicon;
297  friend class ComposeIntersectRule;
298  friend class ComposeIntersectRulePair;
299  template <class C> friend class HfstTransitionGraph;
300 
301  };
302 
303  // Initialization of static members in class
304  // HfstTropicalTransducerTransitionData..
305  class Number2SymbolVectorInitializer {
306  public:
307  Number2SymbolVectorInitializer
308  (HfstTropicalTransducerTransitionData::Number2SymbolVector &vect) {
309  vect.push_back(std::string("@_EPSILON_SYMBOL_@"));
310  vect.push_back(std::string("@_UNKNOWN_SYMBOL_@"));
311  vect.push_back(std::string("@_IDENTITY_SYMBOL_@"));
312  }
313  };
314 
315  class Symbol2NumberMapInitializer {
316  public:
317  Symbol2NumberMapInitializer
318  (HfstTropicalTransducerTransitionData::Symbol2NumberMap &map) {
319  map["@_EPSILON_SYMBOL_@"] = 0;
320  map["@_UNKNOWN_SYMBOL_@"] = 1;
321  map["@_IDENTITY_SYMBOL_@"] = 2;
322  }
323  };
324 
325  } // namespace implementations
326 
327 } // namespace hfst
bool operator<(const HfstTropicalTransducerTransitionData &another) const
Whether this transition is less than transition another.
Definition: HfstTropicalTransducerTransitionData.h:272
HfstTropicalTransducerTransitionData()
Create a HfstTropicalTransducerTransitionData with epsilon input and output strings and weight zero...
Definition: HfstTropicalTransducerTransitionData.h:185
std::set< SymbolType > SymbolTypeSet
A set of symbols.
Definition: HfstTropicalTransducerTransitionData.h:41
float WeightType
The weight type.
Definition: HfstTropicalTransducerTransitionData.h:39
HfstTropicalTransducerTransitionData(SymbolType isymbol, SymbolType osymbol, WeightType weight)
Create a HfstTropicalTransducerTransitionData with input symbol isymbol, output symbol osymbol and we...
Definition: HfstTropicalTransducerTransitionData.h:200
std::string SymbolType
The input and output symbol type.
Definition: HfstTropicalTransducerTransitionData.h:37
An error happened probably due to a bug in the HFST code.
Definition: HfstExceptionDefs.h:378
#define HFST_THROW_MESSAGE(E, M)
Macro to throw an exception of type E with message M. Use THROW instead of regular throw with subclas...
Definition: HfstExceptionDefs.h:35
const SymbolType & get_input_symbol() const
Get the input symbol.
Definition: HfstTropicalTransducerTransitionData.h:224
One implementation of template class C in HfstTransition.
Definition: HfstTropicalTransducerTransitionData.h:34
const SymbolType & get_output_symbol() const
Get the output symbol.
Definition: HfstTropicalTransducerTransitionData.h:229
WeightType get_weight() const
Get the weight.
Definition: HfstTropicalTransducerTransitionData.h:242