HFST - Helsinki Finite-State Transducer Technology API  version 3.7.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HfstXeroxRules.h
Go to the documentation of this file.
1 // This program is free software: you can redistribute it and/or modify
2 // it under the terms of the GNU General Public License as published by
3 // the Free Software Foundation, version 3 of the License.
4 //
5 // This program is distributed in the hope that it will be useful,
6 // but WITHOUT ANY WARRANTY; without even the implied warranty of
7 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8 // GNU General Public License for more details.
9 //
10 // You should have received a copy of the GNU General Public License
11 // along with this program. If not, see <http://www.gnu.org/licenses/>.
12 
13 
14 //#include "HfstDataTypes.h"
15 //#include "HfstSymbolDefs.h"
16 #include "HfstTransducer.h"
17 
22 namespace hfst
23 {
25  namespace xeroxRules
26  {
27  enum ReplaceType {REPL_UP, REPL_DOWN, REPL_RIGHT, REPL_LEFT};
28 
29 
30 
31 
32 
33 
34  // this enum is used in xre_parse.yy for the regex2pfst tool
35  // it is not in the xre_parse.yy file because we couldn't make it work there
36  enum ReplaceArrow { E_REPLACE_RIGHT,
37  E_OPTIONAL_REPLACE_RIGHT,
38  E_REPLACE_LEFT,
39  E_OPTIONAL_REPLACE_LEFT,
40  E_REPLACE_RIGHT_MARKUP,
41  E_RTL_LONGEST_MATCH,
42  E_RTL_SHORTEST_MATCH,
43  E_LTR_LONGEST_MATCH,
44  E_LTR_SHORTEST_MATCH
45  };
51  class Rule
52  {
53  /* cross product of mapping transducers */
55  /* context */
57  /* if there is a context, it needs to have a direction (up, left, down or right) */
58  ReplaceType replType;
59 
60  public:
61  // Rule ( const HfstTransducer& ); // mapping
62  // Rule ( const HfstTransducer&, const HfstTransducerPairVector&, ReplaceType); // mapping, context
63  Rule ( const HfstTransducerPairVector& );
64  Rule ( const HfstTransducerPairVector&, const HfstTransducerPairVector&, ReplaceType );
65 
66  HfstTransducerPairVector get_mapping() const;
67  HfstTransducerPairVector get_context() const;
68  ReplaceType get_replType() const;
69  };
70 
75  class MarkUpRule : public Rule
76  {
77  StringPair marks;
78 
79  public:
80  // for mark up replace
81 
82  // MarkUpRule ( const HfstTransducer&, StringPair ); // mapping
83  // MarkUpRule ( const HfstTransducer&, const HfstTransducerPairVector&, ReplaceType, StringPair); // mapping, context
86  StringPair get_marks() const;
87  };
88 
89  // Disjunct all transducers from TransducerVector
90  HfstTransducer disjunctVectorMembers( const HfstTransducerVector &trVector );
91 
96 
97  //Used in changing weights to zero (in constraintComposition function)
98  float zero_weight(float f);
99 
104  HfstTransducer constraintComposition( const HfstTransducer &t, const HfstTransducer &Constraint );
105 
109  void insertFreelyAllTheBrackets( HfstTransducer &t, bool optional );
110 
117  const HfstTransducer &mappingWithBracketsAndTmpBoundary,
118  const HfstTransducer &identityExpanded,
119  ReplaceType replType,
120  bool optional);
121 
122 
135  HfstTransducer bracketedReplace( const Rule &rule, bool optional);
136 
138  HfstTransducer parallelBracketedReplace( const std::vector<Rule> &ruleVector, bool optional);
139 
140 
141 
142 
143  //---------------------------------
144  // CONSTRAINTS
145  //---------------------------------
146 
147  // (help function)
148  // returns: [ B:0 | 0:B | ?-B ]*
149  // which is used in some constraints
150  HfstTransducer constraintsRightPart( ImplementationType type );
151 
152  // .#. ?* <:0 0:> ?* .#.
153  // filters out empty string
154  HfstTransducer oneBetterthanNoneConstraint( const HfstTransducer &uncondidtionalTr );
155 
156 
157  // ?* <:0 [B:0]* [I-B] [ B:0 | 0:B | ?-B ]*
158  HfstTransducer leftMostConstraint( const HfstTransducer &uncondidtionalTr );
159 
160  // [ B:0 | 0:B | ?-B ]* [I-B]+ >:0 [ ?-B ]*
161  HfstTransducer rightMostConstraint( const HfstTransducer &uncondidtionalTr );
162 
163  // Longest match
164  // it should be composed to left most transducer........
165  // ?* < [?-B]+ 0:> [ ? | 0:< | <:0 | 0:> | B ] [ B:0 | 0:B | ?-B ]*
166  HfstTransducer longestMatchLeftMostConstraint( const HfstTransducer &uncondidtionalTr );
167 
168 
169  // Longest match RIGHT most
170  HfstTransducer longestMatchRightMostConstraint(const HfstTransducer &uncondidtionalTr );
171 
172 
173  // Shortest match
174  // it should be composed to left most transducer........
175  // ?* < [?-B]+ >:0
176  // [?-B] or [ ? | 0:< | <:0 | >:0 | B ][?-B]+
177  // [ B:0 | 0:B | ?-B ]*
178  HfstTransducer shortestMatchLeftMostConstraint( const HfstTransducer &uncondidtionalTr );
179 
180 
181  // Shortest match
182  // it should be composed to left most transducer........
183  //[ B:0 | 0:B | ?-B ]*
184  // [?-B] or [?-B]+ [ ? | 0:> | >:0 | <:0 | B ]
185  // <:0 [?-B]+ > ?*
186  HfstTransducer shortestMatchRightMostConstraint( const HfstTransducer &uncondidtionalTr );
187 
188 
189  // ?* [ BL:0 (?-B)+ BR:0 ?* ]+
190  HfstTransducer mostBracketsPlusConstraint( const HfstTransducer &uncondidtionalTr );
191 
192 
193  // ?* [ BL:0 (?-B)* BR:0 ?* ]+
194  HfstTransducer mostBracketsStarConstraint( const HfstTransducer &uncondidtionalTr );
195 
196  // ?* B2 ?*
197  HfstTransducer removeB2Constraint( const HfstTransducer &t );
198 
199  // to avoid repetition in empty replace rule
200  HfstTransducer noRepetitionConstraint( const HfstTransducer &t );
201 
214 
215 
216  //---------------------------------
217  // INTERFACE HELPING FUNCTIONS
218  //---------------------------------
219  //used by hfst-regexp parser
220  HfstTransducerPair create_mapping_for_mark_up_replace( const HfstTransducerPair &mappingPair,
221  const HfstTransducerPair &marks );
222  HfstTransducerPairVector create_mapping_for_mark_up_replace( const HfstTransducerPairVector &mappingPairVector,
223  const StringPair &marks );
224 
225  HfstTransducerPairVector create_mapping_for_mark_up_replace( const HfstTransducerPairVector &mappingPairVector,
226  const HfstTransducerPair &marks );
227  //---------------------------------
228  // REPLACE FUNCTIONS - INTERFACE
229  //---------------------------------
230 
231  // replace up, left, right, down
232  HfstTransducer replace( const Rule &rule, bool optional);
233  // for parallel rules
234  HfstTransducer replace( const std::vector<Rule> &ruleVector, bool optional);
235  // replace up, left, right, down
236  HfstTransducer replace_left( const Rule &rule, bool optional);
237  // for parallel rules
238  HfstTransducer replace_left( const std::vector<Rule> &ruleVector, bool optional);
239  // left to right
240  HfstTransducer replace_leftmost_longest_match( const Rule &rule );
241  // left to right
242  HfstTransducer replace_leftmost_longest_match( const std::vector<Rule> &ruleVector );
243  // right to left
244  HfstTransducer replace_rightmost_longest_match( const Rule &rule );
245 
246  // right to left
247  HfstTransducer replace_rightmost_longest_match( const std::vector<Rule> &ruleVector );
248 
249  HfstTransducer replace_leftmost_shortest_match( const Rule &rule);
250 
251  HfstTransducer replace_leftmost_shortest_match(const std::vector<Rule> &ruleVector );
252  HfstTransducer replace_rightmost_shortest_match( const Rule &rule );
253  HfstTransducer replace_rightmost_shortest_match( const std::vector<Rule> &ruleVector );
254 
255 
256 
257  HfstTransducer mark_up_replace( const Rule &rule,
258  const StringPair &marks,
259  bool optional);
260 
261  HfstTransducer mark_up_replace(const Rule &rule,
262  const HfstTransducerPair &marks,
263  bool optional);
264 
265 /*
266  HfstTransducer mark_up_replace( const std::vector<MarkUpRule> &markUpRuleVector,
267  bool optional);
268 */
269  // replace up, left, right, down
270  HfstTransducer replace_epenthesis( const Rule &rule, bool optional);
271  // replace up, left, right, down
272  HfstTransducer replace_epenthesis( const std::vector<Rule> &ruleVector, bool optional);
273 
274 
275  //---------------------------------
276  // RESTRICTION FUNCTIONS
277  //---------------------------------
278 
279  // create marks for given i
280  //static StringPair restrictionMarks( int i);
281 
282  /*
283  * define AA1a [ [. 0 .] -> LEFT_MARK || _ center ];
284  * define AA1b [ [. 0 .] -> RIGHT_MARK || center _ ];
285  * retval = AA1 .o. AA2
286  */
287  //static HfstTransducer surroundCenterWithBrackets( const HfstTransducer &center,
288  // const HfstTransducer &leftMark,
289  // const HfstTransducer &rightMark);
290  // Contexts
291  // define NOS1 [ %[ -> 0 || b / B _ ];
292  // define NOF1 [ %] -> 0 || _ c / B ];
293  static HfstTransducer removeBracketsInContext( const HfstTransducerPairVector &context,
294  const HfstTransducer &leftMark,
295  const HfstTransducer &rightMark,
296  int i);
297  // Restriction function "=>"
298  HfstTransducer restriction( const HfstTransducer &automata, const HfstTransducerPairVector &context);
299  HfstTransducer before( const HfstTransducer &left, const HfstTransducer &right);
300  HfstTransducer after( const HfstTransducer &left, const HfstTransducer &right);
301  }
302 }
HfstTransducer expandContextsWithMapping(const HfstTransducerPairVector &ContextVector, const HfstTransducer &mappingWithBracketsAndTmpBoundary, const HfstTransducer &identityExpanded, ReplaceType replType, bool optional)
It is used in bracketedReplace, when the replace expression has context. Cr' = (Rc ...
Definition: HfstXeroxRules.cc:259
std::pair< String, String > StringPair
A symbol pair in a transition.
Definition: HfstSymbolDefs.h:71
Declarations of HFST API functions and datatypes.
Mark up rule has two markers on the right side of the mapping. Mapping is only left side of the mappi...
Definition: HfstXeroxRules.h:75
HfstTransducer removeMarkers(const HfstTransducer &tr)
Remove makers used in replace functions from a tr.
Definition: HfstXeroxRules.cc:133
HfstTransducer parallelBracketedReplace(const std::vector< Rule > &ruleVector, bool optional)
Bracketed replace for parallel rules.
Definition: HfstXeroxRules.cc:747
A synchronous finite-state transducer.
Definition: HfstTransducer.h:227
HfstTransducer applyBoundaryMark(const HfstTransducer &t)
It applies boundary marker from contexts (.#.) to t.
Definition: HfstXeroxRules.cc:2081
ImplementationType
The type of an HfstTransducer.
Definition: HfstDataTypes.h:43
std::vector< HfstTransducer > HfstTransducerVector
a vector of transducers for methods applying a cascade of automata
Definition: HfstDataTypes.h:35
A rule that contains mapping and context and replace type (if any). If rule is A -> B || L _ R ...
Definition: HfstXeroxRules.h:51
void insertFreelyAllTheBrackets(HfstTransducer &t, bool optional)
If optional is false, the function freely inserts in t @ and If it is true, it also inserts @ and ...
Definition: HfstXeroxRules.cc:227
std::pair< HfstTransducer, HfstTransducer > HfstTransducerPair
A pair of transducers.
Definition: HfstDataTypes.h:79
std::vector< HfstTransducerPair > HfstTransducerPairVector
A vector of transducer pairs.
Definition: HfstDataTypes.h:83
HfstTransducer constraintComposition(const HfstTransducer &t, const HfstTransducer &Constraint)
Generalized Lenient Composition (by Anssi Yli-Jyr�) of a t and a Constraint. More about this composit...
Definition: HfstXeroxRules.cc:196
HfstTransducer bracketedReplace(const Rule &rule, bool optional)
Unconditional replace, in multiple contexts first: (.* T<a:b>T .*) - [( .* L1 T<a:b>T R1 ...
Definition: HfstXeroxRules.cc:447