Spaces:
Running
Running
examples : expose language detection probabilities to server example (#3044)
Browse files* feat: expose language detection probabilities to server.cpp
* feat: enhance language detection output in server.cpp
* Remove empty spaces.
- examples/server/server.cpp +14 -2
examples/server/server.cpp
CHANGED
|
@@ -926,14 +926,26 @@ int main(int argc, char ** argv) {
|
|
| 926 |
res.set_content(ss.str(), "text/vtt");
|
| 927 |
} else if (params.response_format == vjson_format) {
|
| 928 |
/* try to match openai/whisper's Python format */
|
| 929 |
-
std::string results = output_str(ctx, params, pcmf32s);
|
|
|
|
|
|
|
|
|
|
| 930 |
json jres = json{
|
| 931 |
{"task", params.translate ? "translate" : "transcribe"},
|
| 932 |
{"language", whisper_lang_str_full(whisper_full_lang_id(ctx))},
|
| 933 |
{"duration", float(pcmf32.size())/WHISPER_SAMPLE_RATE},
|
| 934 |
{"text", results},
|
| 935 |
-
{"segments", json::array()}
|
|
|
|
|
|
|
|
|
|
| 936 |
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 937 |
const int n_segments = whisper_full_n_segments(ctx);
|
| 938 |
for (int i = 0; i < n_segments; ++i)
|
| 939 |
{
|
|
|
|
| 926 |
res.set_content(ss.str(), "text/vtt");
|
| 927 |
} else if (params.response_format == vjson_format) {
|
| 928 |
/* try to match openai/whisper's Python format */
|
| 929 |
+
std::string results = output_str(ctx, params, pcmf32s);
|
| 930 |
+
// Get language probabilities
|
| 931 |
+
std::vector<float> lang_probs(whisper_lang_max_id() + 1, 0.0f);
|
| 932 |
+
const auto detected_lang_id = whisper_lang_auto_detect(ctx, 0, params.n_threads, lang_probs.data());
|
| 933 |
json jres = json{
|
| 934 |
{"task", params.translate ? "translate" : "transcribe"},
|
| 935 |
{"language", whisper_lang_str_full(whisper_full_lang_id(ctx))},
|
| 936 |
{"duration", float(pcmf32.size())/WHISPER_SAMPLE_RATE},
|
| 937 |
{"text", results},
|
| 938 |
+
{"segments", json::array()},
|
| 939 |
+
{"detected_language", whisper_lang_str_full(detected_lang_id)},
|
| 940 |
+
{"detected_language_probability", lang_probs[detected_lang_id]},
|
| 941 |
+
{"language_probabilities", json::object()}
|
| 942 |
};
|
| 943 |
+
// Add all language probabilities
|
| 944 |
+
for (int i = 0; i <= whisper_lang_max_id(); ++i) {
|
| 945 |
+
if (lang_probs[i] > 0.001f) { // Only include non-negligible probabilities
|
| 946 |
+
jres["language_probabilities"][whisper_lang_str(i)] = lang_probs[i];
|
| 947 |
+
}
|
| 948 |
+
}
|
| 949 |
const int n_segments = whisper_full_n_segments(ctx);
|
| 950 |
for (int i = 0; i < n_segments; ++i)
|
| 951 |
{
|