HFST - Helsinki Finite-State Transducer Technology API  version 3.7.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HarmonizeUnknownAndIdentitySymbols.h
1 // --- licensing stuff here?
2 
3 #ifndef HARMONIZE_UNKNOWN_AND_IDENTITY_SYMBOLS
4 #define HARMONIZE_UNKNOWN_AND_IDENTITY_SYMBOLS
5 
6 // ---
7 /* A more generalized class that takes an template <class C, class W>
8  HfstTransitionGraph as its constructor arguments. The functions could
9  take a HfstTransitionGraph::HfstTransitionGraphAlphabet as their
10  arguments and use functions C::get_unknown() etc.
11  */
12 
13 #include <set>
14 #include <string>
15 #include <iostream> // --- in the cc-file instead?
16 #include <algorithm> // --- "" ---
17 #include <cassert> // --- "" ---
18 
19 #include "HfstDataTypes.h"
20 #include "HfstSymbolDefs.h"
21 
22 namespace hfst
23 {
26 
27 #ifdef TEST_HARMONIZE_UNKNOWN_AND_IDENTITY_SYMBOLS
28 #define debug_harmonize 1
29 #else
30 #define debug_harmonize 0
31 #endif // TEST_HARMONIZE_UNKNOWN_AND_IDENTITY_SYMBOLS
32 
33 // --- a short documentation
34 class HarmonizeUnknownAndIdentitySymbols
35 {
36  // --- HfstSymbolDefs.h has members const std::string internal_identity
37  // and internal_unknown
38  public:
39  // --- const std::string instead
40  static const char * identity; // --- a short documentation
41  static const char * unknown; // --- "" ---
42 
43  // Constructor whose side effect it is to harmonize the identity and unknown
44  // symbols of its arguments.
45  HarmonizeUnknownAndIdentitySymbols
47  protected:
48 
51  StringSet t1_symbol_set; // --- symbols known to t1?
52  StringSet t2_symbol_set; // --- symbols known to t2?
53 
54  // Collect the symbols from the transitions of the argument transducer
55  // and store them in the argument set. This is needed since the alphabet
56  // of HfstBasicTransducers can sometimes unexplainedly be empty...
57  // --- the alphabet can contain also symbols that are not found in
58  // transitions...
59  void populate_symbol_set(const HfstBasicTransducer &,StringSet &);
60 
61  // Add all symbols in the StringSet to the alphabet of the transducer.
62  void add_symbols_to_alphabet(HfstBasicTransducer &, const StringSet &);
63 
64  // For every x in the set, add x:x transitions for every identity:identity
65  // transition in the argument transducer (the source and target states as
66  // well as the weights are the same as in the original identity transition.
67  void harmonize_identity_symbols
68  (HfstBasicTransducer &,const StringSet &);
69 
70  // For every x in the set
71  // 1. add, x:c transitions for every unknown:c transition in the argument
72  // transducer.
73  // 2. add, c:x transitions for every c:unknown transition in the argument
74  // transducer.
75  //
76  // For every x and y in the set with x != y, add x:y transitions for every
77  // unknown:unknown transition in the argument transducer.
78  //
79  // (the source and target states as well as the weights are the same as in
80  // the original identity transition)
81  void harmonize_unknown_symbols
82  (HfstBasicTransducer &,const StringSet &);
83 };
84 
85 void debug_harmonize_print(const StringSet &);
86 void debug_harmonize_print(const std::string &);
87 size_t max(size_t t1,size_t t2); // --- a short documentation
88 
89 }
90 
91 
92 #endif // HARMONIZE_UNKNOWN_AND_IDENTITY_SYMBOLS --- good
Datatypes that are needed when using the HFST API.
HfstTransition< HfstTropicalTransducerTransitionData > HfstBasicTransition
An HfstTransition with transition data of type HfstTropicalTransducerTransitionData.
Definition: HfstDataTypes.h:122
HfstTransitionGraph< HfstTropicalTransducerTransitionData > HfstBasicTransducer
An HfstTransitionGraph with transitions of type HfstTropicalTransducerTransitionData and weight type ...
Definition: HfstDataTypes.h:114
Typedefs and functions for symbols, symbol pairs and sets of symbols.