-
Notifications
You must be signed in to change notification settings - Fork 1
/
w2l.h
95 lines (79 loc) · 3.01 KB
/
w2l.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#ifdef __cplusplus
extern "C" {
#endif
#include <stdlib.h>
#include <stdbool.h>
typedef struct w2l_engine w2l_engine;
typedef struct w2l_decoder w2l_decoder;
typedef struct w2l_decoderesult w2l_decoderesult;
typedef struct w2l_emission w2l_emission;
typedef struct {
int beamsize;
float beamthresh;
float lmweight;
float wordscore;
float unkweight;
bool logadd;
float silweight;
} w2l_decode_options;
extern w2l_decode_options w2l_decode_defaults;
w2l_engine *w2l_engine_new(const char *acoustic_model_path, const char *tokens_path);
w2l_emission *w2l_engine_process(w2l_engine *engine, float *samples, size_t sample_count);
bool w2l_engine_export(w2l_engine *engine, const char *path);
void w2l_engine_free(w2l_engine *engine);
char *w2l_emission_text(w2l_emission *emission);
float *w2l_emission_values(w2l_emission *emission, int *frames, int *tokens);
void w2l_emission_free(w2l_emission *emission);
w2l_decoder *w2l_decoder_new(w2l_engine *engine, const char *kenlm_model_path, const char *lexicon_path, const char *flattrie_path, const w2l_decode_options *opts);
w2l_decoderesult *w2l_decoder_decode(w2l_decoder *decoder, w2l_emission *emission);
char *w2l_decoder_result_words(w2l_decoder *decoder, w2l_decoderesult *decoderesult);
char *w2l_decoder_result_tokens(w2l_decoder *decoder, w2l_decoderesult *decoderesult);
void w2l_decoderesult_free(w2l_decoderesult *decoderesult);
void w2l_decoder_free(w2l_decoder *decoder);
bool w2l_make_flattrie(const char *tokens_path, const char *kenlm_model_path, const char *lexicon_path, const char *flattrie_path);
#pragma pack(1)
typedef struct {
uint8_t token;
int32_t offset;
} w2l_dfa_edge;
typedef struct {
uint8_t flags;
uint8_t nEdges;
w2l_dfa_edge edges[0];
} w2l_dfa_node;
#pragma pack()
typedef struct {
/** Score for successfully decoded commands.
*
* Competes with language wordscore.
*/
float command_score;
/** Threshold for rejection.
*
* The emission-transmission score of rejection_window_frame adjacent tokens
* divided by the score of the same area in the viterbi path. If the fraction
* is below this threshold the decode will be rejected.
*
* Values around 0.55 work ok.
*/
float rejection_threshold;
/** Window size for decode vs viterbi comparison.
*
* Values around 8 make sense.
*/
int rejection_window_frames;
/** Whether to print debug messages to stdout. */
bool debug;
} w2l_dfa_decode_options;
/** Decode emisssions according to dfa model, return decoded text.
*
* If the decode fails or no good paths exist the result will be NULL.
* If it is not null, the caller is responsible for free()ing the string.
*
* The dfa argument points to the first w2l_dfa_node. It is expected that
* its address and edge offsets can be used to traverse the full dfa.
*/
char *w2l_decoder_dfa(w2l_engine *engine, w2l_decoder *decoder, w2l_emission *emission, w2l_dfa_node *dfa, w2l_dfa_decode_options *opts);
#ifdef __cplusplus
} // extern "C"
#endif