File size: 4,571 Bytes
c0943fb
 
 
dabc473
 
c0943fb
 
 
 
 
 
 
 
 
 
 
 
 
 
dabc473
 
 
 
 
 
c0943fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6113fbd
 
 
 
 
 
c0943fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35a7fcb
c0943fb
dabc473
606a6bc
dabc473
 
9a513f5
 
c0943fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dabc473
 
c0943fb
 
 
dabc473
c0943fb
 
 
dabc473
 
 
 
 
 
 
c0943fb
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#ifndef WHISPER_H
#define WHISPER_H

#include <stdint.h>

#ifdef WHISPER_SHARED
#    ifdef _WIN32
#        ifdef WHISPER_BUILD
#            define WHISPER_API __declspec(dllexport)
#        else
#            define WHISPER_API __declspec(dllimport)
#        endif
#    else
#        define WHISPER_API __attribute__ ((visibility ("default")))
#    endif
#else
#    define WHISPER_API
#endif

#define WHISPER_SAMPLE_RATE 16000
#define WHISPER_N_FFT       400
#define WHISPER_N_MEL       80
#define WHISPER_HOP_LENGTH  160
#define WHISPER_CHUNK_SIZE  30

#ifdef __cplusplus
extern "C" {
#endif

    //
    // C interface
    //

    // TODO: documentation will come soon

    struct whisper_context;

    typedef int whisper_token;

    WHISPER_API struct whisper_context * whisper_init(const char * path_model);
    WHISPER_API void whisper_free(struct whisper_context * ctx);

    WHISPER_API int whisper_pcm_to_mel(
            struct whisper_context * ctx,
            const float * samples,
            int n_samples,
            int n_threads);

    // n_mel must be 80
    WHISPER_API int whisper_set_mel(
            struct whisper_context * ctx,
            const float * data,
            int n_len,
            int n_mel);

    WHISPER_API int whisper_encode(
            struct whisper_context * ctx,
            int offset,
            int n_threads);

    WHISPER_API int whisper_decode(
            struct whisper_context * ctx,
            const whisper_token * tokens,
            int n_tokens,
            int n_past,
            int n_threads);

    WHISPER_API whisper_token whisper_sample_best(struct whisper_context * ctx, bool need_timestamp);
    WHISPER_API whisper_token whisper_sample_timestamp(struct whisper_context * ctx);

    // return the id of the specified language, returns -1 if not found
    WHISPER_API int whisper_lang_id(const char * lang);

    WHISPER_API int whisper_n_len          (struct whisper_context * ctx); // mel length
    WHISPER_API int whisper_n_vocab        (struct whisper_context * ctx);
    WHISPER_API int whisper_n_text_ctx     (struct whisper_context * ctx);
    WHISPER_API int whisper_is_multilingual(struct whisper_context * ctx);

    WHISPER_API float * whisper_get_probs(struct whisper_context * ctx);

    WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);

    WHISPER_API whisper_token whisper_token_eot (struct whisper_context * ctx);
    WHISPER_API whisper_token whisper_token_sot (struct whisper_context * ctx);
    WHISPER_API whisper_token whisper_token_prev(struct whisper_context * ctx);
    WHISPER_API whisper_token whisper_token_solm(struct whisper_context * ctx);
    WHISPER_API whisper_token whisper_token_not (struct whisper_context * ctx);
    WHISPER_API whisper_token whisper_token_beg (struct whisper_context * ctx);

    WHISPER_API whisper_token whisper_token_translate ();
    WHISPER_API whisper_token whisper_token_transcribe();

    WHISPER_API void whisper_print_timings(struct whisper_context * ctx);

    ////////////////////////////////////////////////////////////////////////////

    enum whisper_decode_strategy {
        WHISPER_DECODE_GREEDY,
        WHISPER_DECODE_BEAM_SEARCH,
    };

    struct whisper_full_params {
        enum whisper_decode_strategy strategy;

        int n_threads;
        int offset_ms;

        bool translate;
        bool no_context;
        bool print_special_tokens;
        bool print_progress;
        bool print_realtime;
        bool print_timestamps;

        const char * language;

        union {
            struct {
                int n_past;
            } greedy;

            struct {
                int n_past;
                int beam_width;
                int n_best;
            } beam_search;
        };
    };

    WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_decode_strategy strategy);

    // full whisper run - encode + decode
    WHISPER_API int whisper_full(
            struct whisper_context * ctx,
            struct whisper_full_params params,
            const float * samples,
            int n_samples);

    WHISPER_API int whisper_full_n_segments(struct whisper_context * ctx);

    WHISPER_API int64_t whisper_full_get_segment_t0(struct whisper_context * ctx, int i_segment);
    WHISPER_API int64_t whisper_full_get_segment_t1(struct whisper_context * ctx, int i_segment);

    WHISPER_API const char * whisper_full_get_segment_text(struct whisper_context * ctx, int i_segment);

#ifdef __cplusplus
}
#endif

#endif