HFST - Helsinki Finite-State Transducer Technology API  version 3.7.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HfstFlagDiacritics.h
Go to the documentation of this file.
1 #ifndef _FLAG_DIACRITICS_H_
2 #define _FLAG_DIACRITICS_H_
3 
4 #include <iostream>
5 #include <string>
6 #include <map>
7 #include <vector>
8 #include <cassert>
9 #include <utility>
10 
14 namespace hfst {
15 
16 enum FdOperator {Pop, Nop, Rop, Dop, Cop, Uop};
17 
18 typedef unsigned short FdFeature;
19 typedef short FdValue;
20 
21 class FdOperation
22 {
23 private:
24  FdOperator op;
25  FdFeature feature;
26  FdValue value;
27  std::string name;
28 public:
29  FdOperation
30  (FdOperator op, FdFeature feat, FdValue val, const std::string& str):
31  op(op), feature(feat), value(val), name(str) {}
32 
33  FdOperator Operator(void) const { return op; }
34  FdFeature Feature(void) const { return feature; }
35  FdValue Value(void) const { return value; }
36  std::string Name(void) const { return name; }
37 
38  static FdOperator char_to_operator(char c)
39  {
40  switch (c) {
41  case 'P': return Pop;
42  case 'N': return Nop;
43  case 'R': return Rop;
44  case 'D': return Dop;
45  case 'C': return Cop;
46  case 'U': return Uop;
47  default:
48  throw;
49  }
50  }
51 
52  static bool is_diacritic(const std::string& diacritic_str);
53  static std::string::size_type find_diacritic
54  (const std::string& diacritic_str,
55  std::string::size_type& length);
56 
57  static std::string get_operator(const std::string& diacritic);
58  static std::string get_feature(const std::string& diacritic);
59  static std::string get_value(const std::string& diacritic);
60  static bool has_value(const std::string& diacritic);
61 };
62 
63 template<class T> class FdState;
64 
68 template<class T>
69 class FdTable
70 {
71 private:
72  // Used for generating IDs that stand in for feature and value strings
73  std::map<std::string, FdFeature> feature_map;
74  std::map<std::string, FdValue> value_map;
75 
76  std::map<T, FdOperation> operations;
77  std::map<std::string, T> symbol_map;
78 public:
79  FdTable(): feature_map(), value_map()
80  { value_map[std::string()] = 0; } // empty value = neutral
81 
82  void define_diacritic(T symbol, const std::string& str)
83  {
84  if(!FdOperation::is_diacritic(str))
85  throw;
86 
87  FdOperator op = FdOperation::char_to_operator(str.at(1));
88  std::string feat;
89  std::string val;
90 
91  // Third character is always the first fullstop.
92  size_t first_full_stop_pos = 2;
93  // Find the second full stop, if there is one.
94  size_t second_full_stop_pos = str.find('.',first_full_stop_pos+1);
95  size_t last_char_pos = str.size() - 1;
96  if(second_full_stop_pos == std::string::npos)
97  {
98  assert(op == Cop || op == Dop || op == Rop);
99  feat = str.substr(first_full_stop_pos+1,
100  last_char_pos-first_full_stop_pos-1);
101  }
102  else
103  {
104  feat = str.substr(first_full_stop_pos+1,
105  second_full_stop_pos-first_full_stop_pos-1);
106  val = str.substr(second_full_stop_pos+1,
107  last_char_pos-second_full_stop_pos-1);
108  }
109 
110  if(feature_map.count(feat) == 0)
111  {
112  FdFeature next = feature_map.size();
113  feature_map[feat] = next;
114  }
115  if(value_map.count(val) == 0)
116  {
117  FdValue next = value_map.size()+1;
118  value_map[val] = next;
119  }
120 
121  operations.insert
122  (std::pair<T,FdOperation>
123  (symbol,
124  FdOperation(op, feature_map[feat], value_map[val], str)));
125  symbol_map.insert(std::pair<std::string,T>(str, symbol));
126  }
127 
128  FdFeature num_features() const { return feature_map.size(); }
129  bool is_diacritic(T symbol) const
130  { return operations.find(symbol) != operations.end(); }
131 
132  const FdOperation* get_operation(T symbol) const
133  {
134  // for some reason this fails to compile???
135  //std::map<T,FdOperation>::const_iterator i
136  // = operations.find(symbol);
137  //return (i==operations.end()) ? NULL : &(i->second);
138 
139  return (operations.find(symbol)==operations.end()) ? NULL :
140  &(operations.find(symbol)->second);
141  }
142  const FdOperation* get_operation(const std::string& symbol) const
143  {
144  return (symbol_map.find(symbol)==symbol_map.end()) ? NULL :
145  get_operation(symbol_map.find(symbol)->second);
146  }
147 
148  bool is_valid_string(const std::vector<T>& symbols) const
149  {
150  FdState<T> state(*this);
151 
152  for(size_t i=0; i<symbols.size(); i++)
153  {
154  if(!state.apply_operation(symbols[i]))
155  break;
156  }
157  return !state.fails();
158  }
159 
160  bool is_valid_string(const std::string& str) const
161  {
162  FdState<T> state(*this);
163  std::string remaining(str);
164  std::string::size_type length;
165 
166  while(true)
167  {
168  std::string::size_type next_diacritic_pos
169  = FdOperation::find_diacritic(remaining, length);
170  if(next_diacritic_pos == std::string::npos) // fixed
171  break;
172 
173  std::string diacritic = remaining.substr(0, length);
174  if(!state.apply_operation(diacritic))
175  break;
176  remaining = remaining.substr(length);
177  }
178  return !state.fails();
179  }
180 };
181 
185 template<class T>
186 class FdState
187 {
188 private:
189  const FdTable<T>* table;
190 
191  // This is indexed with values of type FdFeature
192  typename std::vector<FdValue> values;
193  T num_features;
194 
195  bool error_flag;
196 public:
197  FdState(const FdTable<T>& t):
198  table(&t), values(table->num_features()),
199  num_features(table->num_features()), error_flag(false)
200  {}
201 
202  FdState():
203  table(NULL), values(), num_features(0), error_flag(false)
204  {}
205 
206  const FdTable<T>& get_table() const {return *table;}
207 
208  const std::vector<FdValue> & get_values(void) const
209  { return values; }
210 
211  void assign_values(std::vector<FdValue> const & vals)
212  {
213  values = vals;
214  if (values.size() != num_features) {
215  error_flag = true;
216  }
217  }
218 
219  bool apply_operation(T symbol)
220  {
221  const FdOperation* op = table->get_operation(symbol);
222  if(op)
223  return apply_operation(*op);
224  return true; // if the symbol isn't a diacritic
225  }
226  bool apply_operation(const FdOperation& op)
227  {
228  switch(op.Operator()) {
229  case Pop: // positive set
230  values[op.Feature()] = op.Value();
231  return true;
232 
233  case Nop: // negative set (literally, in this implementation)
234  values[op.Feature()] = -1*op.Value();
235  return true;
236 
237  case Rop: // require
238  if (op.Value() == 0) // empty require
239  return (values[op.Feature()] != 0);
240  else // nonempty require
241  return (values[op.Feature()] == op.Value());
242 
243  case Dop: // disallow
244  if (op.Value() == 0) // empty disallow
245  return (values[op.Feature()] == 0);
246  else // nonempty disallow
247  return (values[op.Feature()] != op.Value());
248 
249  case Cop: // clear
250  values[op.Feature()] = 0;
251  return true;
252 
253  case Uop: // unification
254  if(values[op.Feature()] == 0 || /* if the feature is unset or */
255  values[op.Feature()] == op.Value() || /* the feature is at
256  this value already
257  or */
258  (values[op.Feature()] < 0 &&
259  (values[op.Feature()]*(-1) != op.Value())) /* the feature is
260  negatively set
261  to something
262  else */
263  )
264  {
265  values[op.Feature()] = op.Value();
266  return true;
267  }
268  return false;
269  }
270  throw; // for the compiler's peace of mind
271  }
272  bool apply_operation(const std::string& symbol)
273  {
274  const FdOperation* op = table->get_operation(symbol);
275  if(op)
276  return apply_operation(*op);
277  return true;
278  }
279 
280  bool fails() const {return error_flag;}
281  void reset()
282  {
283  error_flag = false;
284  values.clear();
285  values.insert(values.begin(), table->num_features(), 0);
286  }
287 };
288 
289 }
290 #endif