Spaces:
Running
Running
Commit
·
5ca6886
1
Parent(s):
f2b91fc
Building with MSVC
Browse files- CMakeLists.txt +25 -17
- ggml.c +31 -5
- ggml.h +1 -0
- msvc_thread_atomic.h +31 -0
- whisper.cpp +4 -2
CMakeLists.txt
CHANGED
|
@@ -26,19 +26,21 @@ option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
|
|
| 26 |
|
| 27 |
# sanitizers
|
| 28 |
|
| 29 |
-
if (
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
| 33 |
|
| 34 |
-
if (WHISPER_SANITIZE_ADDRESS)
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
endif()
|
| 38 |
|
| 39 |
-
if (WHISPER_SANITIZE_UNDEFINED)
|
| 40 |
-
|
| 41 |
-
|
|
|
|
| 42 |
endif()
|
| 43 |
|
| 44 |
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
|
|
@@ -47,7 +49,7 @@ endif()
|
|
| 47 |
# dependencies
|
| 48 |
|
| 49 |
set(CMAKE_C_STANDARD 11)
|
| 50 |
-
set(CMAKE_CXX_STANDARD
|
| 51 |
|
| 52 |
find_package(Threads REQUIRED)
|
| 53 |
|
|
@@ -69,7 +71,7 @@ if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
|
| 69 |
endif ()
|
| 70 |
|
| 71 |
if (WHISPER_ALL_WARNINGS)
|
| 72 |
-
if (
|
| 73 |
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \
|
| 74 |
-Wall \
|
| 75 |
-Wextra \
|
|
@@ -80,12 +82,14 @@ if (WHISPER_ALL_WARNINGS)
|
|
| 80 |
-Wpointer-arith \
|
| 81 |
")
|
| 82 |
else()
|
| 83 |
-
# todo :
|
| 84 |
endif()
|
| 85 |
endif()
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
| 89 |
|
| 90 |
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
|
| 91 |
|
|
@@ -93,7 +97,11 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES
|
|
| 93 |
message(STATUS "ARM detected")
|
| 94 |
else()
|
| 95 |
message(STATUS "x86 detected")
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
endif()
|
| 98 |
|
| 99 |
# whisper - this is the main library of the project
|
|
|
|
| 26 |
|
| 27 |
# sanitizers
|
| 28 |
|
| 29 |
+
if (NOT MSVC)
|
| 30 |
+
if (WHISPER_SANITIZE_THREAD)
|
| 31 |
+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
|
| 32 |
+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
|
| 33 |
+
endif()
|
| 34 |
|
| 35 |
+
if (WHISPER_SANITIZE_ADDRESS)
|
| 36 |
+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
|
| 37 |
+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
|
| 38 |
+
endif()
|
| 39 |
|
| 40 |
+
if (WHISPER_SANITIZE_UNDEFINED)
|
| 41 |
+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
|
| 42 |
+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
|
| 43 |
+
endif()
|
| 44 |
endif()
|
| 45 |
|
| 46 |
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
|
|
|
|
| 49 |
# dependencies
|
| 50 |
|
| 51 |
set(CMAKE_C_STANDARD 11)
|
| 52 |
+
set(CMAKE_CXX_STANDARD 20)
|
| 53 |
|
| 54 |
find_package(Threads REQUIRED)
|
| 55 |
|
|
|
|
| 71 |
endif ()
|
| 72 |
|
| 73 |
if (WHISPER_ALL_WARNINGS)
|
| 74 |
+
if (NOT MSVC)
|
| 75 |
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \
|
| 76 |
-Wall \
|
| 77 |
-Wextra \
|
|
|
|
| 82 |
-Wpointer-arith \
|
| 83 |
")
|
| 84 |
else()
|
| 85 |
+
# todo : msvc
|
| 86 |
endif()
|
| 87 |
endif()
|
| 88 |
|
| 89 |
+
if (NOT MSVC)
|
| 90 |
+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
|
| 91 |
+
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -ffinite-math-only -funsafe-math-optimizations")
|
| 92 |
+
endif()
|
| 93 |
|
| 94 |
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
|
| 95 |
|
|
|
|
| 97 |
message(STATUS "ARM detected")
|
| 98 |
else()
|
| 99 |
message(STATUS "x86 detected")
|
| 100 |
+
if (MSVC)
|
| 101 |
+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2 /D_CRT_SECURE_NO_WARNINGS=1")
|
| 102 |
+
else()
|
| 103 |
+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx -mavx2 -mfma -mf16c")
|
| 104 |
+
endif()
|
| 105 |
endif()
|
| 106 |
|
| 107 |
# whisper - this is the main library of the project
|
ggml.c
CHANGED
|
@@ -13,9 +13,15 @@
|
|
| 13 |
#include <string.h>
|
| 14 |
#include <stdint.h>
|
| 15 |
#include <stdio.h>
|
| 16 |
-
#include <stdatomic.h>
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
#include <pthread.h>
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
#define GGML_DEBUG 0
|
| 21 |
|
|
@@ -149,6 +155,25 @@ static ggml_fp16_t table_exp_f16[1 << 16];
|
|
| 149 |
// timing
|
| 150 |
//
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
int64_t ggml_time_ms(void) {
|
| 153 |
struct timespec ts;
|
| 154 |
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
@@ -160,6 +185,7 @@ int64_t ggml_time_us(void) {
|
|
| 160 |
clock_gettime(CLOCK_MONOTONIC, &ts);
|
| 161 |
return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
|
| 162 |
}
|
|
|
|
| 163 |
|
| 164 |
int64_t ggml_cycles(void) {
|
| 165 |
return clock();
|
|
@@ -6412,7 +6438,7 @@ void * ggml_graph_compute_one(void * data) {
|
|
| 6412 |
return NULL;
|
| 6413 |
}
|
| 6414 |
|
| 6415 |
-
|
| 6416 |
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
|
| 6417 |
|
| 6418 |
const int n_threads = state->shared->n_threads;
|
|
@@ -6423,7 +6449,7 @@ void * ggml_graph_compute_thread(void * data) {
|
|
| 6423 |
} else {
|
| 6424 |
while (atomic_load(&state->shared->has_work)) {
|
| 6425 |
if (atomic_load(&state->shared->stop)) {
|
| 6426 |
-
return
|
| 6427 |
}
|
| 6428 |
ggml_lock_lock (&state->shared->spin);
|
| 6429 |
ggml_lock_unlock(&state->shared->spin);
|
|
@@ -6435,7 +6461,7 @@ void * ggml_graph_compute_thread(void * data) {
|
|
| 6435 |
// wait for work
|
| 6436 |
while (!atomic_load(&state->shared->has_work)) {
|
| 6437 |
if (atomic_load(&state->shared->stop)) {
|
| 6438 |
-
return
|
| 6439 |
}
|
| 6440 |
ggml_lock_lock (&state->shared->spin);
|
| 6441 |
ggml_lock_unlock(&state->shared->spin);
|
|
@@ -6454,7 +6480,7 @@ void * ggml_graph_compute_thread(void * data) {
|
|
| 6454 |
}
|
| 6455 |
}
|
| 6456 |
|
| 6457 |
-
return
|
| 6458 |
}
|
| 6459 |
|
| 6460 |
void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
|
|
|
|
| 13 |
#include <string.h>
|
| 14 |
#include <stdint.h>
|
| 15 |
#include <stdio.h>
|
|
|
|
| 16 |
|
| 17 |
+
|
| 18 |
+
#if defined _MSC_VER
|
| 19 |
+
#include "msvc_thread_atomic.h"
|
| 20 |
+
#else
|
| 21 |
#include <pthread.h>
|
| 22 |
+
#include <stdatomic.h>
|
| 23 |
+
typedef void* thread_ret_t;
|
| 24 |
+
#endif
|
| 25 |
|
| 26 |
#define GGML_DEBUG 0
|
| 27 |
|
|
|
|
| 155 |
// timing
|
| 156 |
//
|
| 157 |
|
| 158 |
+
#if defined(_MSC_VER)
|
| 159 |
+
static int64_t timer_freq;
|
| 160 |
+
void ggml_time_init(void) {
|
| 161 |
+
LARGE_INTEGER frequency;
|
| 162 |
+
QueryPerformanceFrequency(&frequency);
|
| 163 |
+
timer_freq = frequency.QuadPart;
|
| 164 |
+
}
|
| 165 |
+
int64_t ggml_time_ms(void) {
|
| 166 |
+
LARGE_INTEGER t;
|
| 167 |
+
QueryPerformanceCounter(&t);
|
| 168 |
+
return (t.QuadPart * 1000) / timer_freq;
|
| 169 |
+
}
|
| 170 |
+
int64_t ggml_time_us(void) {
|
| 171 |
+
LARGE_INTEGER t;
|
| 172 |
+
QueryPerformanceCounter(&t);
|
| 173 |
+
return (t.QuadPart * 1000000) / timer_freq;
|
| 174 |
+
}
|
| 175 |
+
#else
|
| 176 |
+
void ggml_time_init(void) {}
|
| 177 |
int64_t ggml_time_ms(void) {
|
| 178 |
struct timespec ts;
|
| 179 |
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
|
|
| 185 |
clock_gettime(CLOCK_MONOTONIC, &ts);
|
| 186 |
return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
|
| 187 |
}
|
| 188 |
+
#endif
|
| 189 |
|
| 190 |
int64_t ggml_cycles(void) {
|
| 191 |
return clock();
|
|
|
|
| 6438 |
return NULL;
|
| 6439 |
}
|
| 6440 |
|
| 6441 |
+
thread_ret_t ggml_graph_compute_thread(void * data) {
|
| 6442 |
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
|
| 6443 |
|
| 6444 |
const int n_threads = state->shared->n_threads;
|
|
|
|
| 6449 |
} else {
|
| 6450 |
while (atomic_load(&state->shared->has_work)) {
|
| 6451 |
if (atomic_load(&state->shared->stop)) {
|
| 6452 |
+
return 0;
|
| 6453 |
}
|
| 6454 |
ggml_lock_lock (&state->shared->spin);
|
| 6455 |
ggml_lock_unlock(&state->shared->spin);
|
|
|
|
| 6461 |
// wait for work
|
| 6462 |
while (!atomic_load(&state->shared->has_work)) {
|
| 6463 |
if (atomic_load(&state->shared->stop)) {
|
| 6464 |
+
return 0;
|
| 6465 |
}
|
| 6466 |
ggml_lock_lock (&state->shared->spin);
|
| 6467 |
ggml_lock_unlock(&state->shared->spin);
|
|
|
|
| 6480 |
}
|
| 6481 |
}
|
| 6482 |
|
| 6483 |
+
return 0;
|
| 6484 |
}
|
| 6485 |
|
| 6486 |
void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
|
ggml.h
CHANGED
|
@@ -136,6 +136,7 @@ struct ggml_init_params {
|
|
| 136 |
void * mem_buffer; // if NULL, memory will be allocated internally
|
| 137 |
};
|
| 138 |
|
|
|
|
| 139 |
int64_t ggml_time_ms(void);
|
| 140 |
int64_t ggml_time_us(void);
|
| 141 |
int64_t ggml_cycles(void);
|
|
|
|
| 136 |
void * mem_buffer; // if NULL, memory will be allocated internally
|
| 137 |
};
|
| 138 |
|
| 139 |
+
void ggml_time_init(void);
|
| 140 |
int64_t ggml_time_ms(void);
|
| 141 |
int64_t ggml_time_us(void);
|
| 142 |
int64_t ggml_cycles(void);
|
msvc_thread_atomic.h
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include <Windows.h>
|
| 3 |
+
|
| 4 |
+
typedef volatile LONG atomic_int;
|
| 5 |
+
typedef atomic_int atomic_bool;
|
| 6 |
+
|
| 7 |
+
static void atomic_store(atomic_int* ptr, LONG val) {
|
| 8 |
+
InterlockedExchange(ptr, val);
|
| 9 |
+
}
|
| 10 |
+
static LONG atomic_load(atomic_int* ptr) {
|
| 11 |
+
return InterlockedCompareExchange(ptr, 0, 0);
|
| 12 |
+
}
|
| 13 |
+
static LONG atomic_fetch_add(atomic_int* ptr, LONG inc) {
|
| 14 |
+
return InterlockedExchangeAdd(ptr, inc);
|
| 15 |
+
}
|
| 16 |
+
static LONG atomic_fetch_sub(atomic_int* ptr, LONG dec) {
|
| 17 |
+
return atomic_fetch_add(ptr, -(dec));
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
typedef HANDLE pthread_t;
|
| 21 |
+
|
| 22 |
+
typedef DWORD thread_ret_t;
|
| 23 |
+
static int pthread_create(pthread_t* out, void* unused, thread_ret_t(*func)(void*), void* arg) {
|
| 24 |
+
out = CreateThread(NULL, 0, func, arg, 0, NULL);
|
| 25 |
+
return out != NULL;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
static int pthread_join(pthread_t thread, void* unused) {
|
| 29 |
+
return (int) WaitForSingleObject(thread, INFINITE);
|
| 30 |
+
}
|
| 31 |
+
|
whisper.cpp
CHANGED
|
@@ -2073,6 +2073,8 @@ bool log_mel_spectrogram(
|
|
| 2073 |
//
|
| 2074 |
|
| 2075 |
struct whisper_context * whisper_init(const char * path_model) {
|
|
|
|
|
|
|
| 2076 |
whisper_context * ctx = new whisper_context;
|
| 2077 |
|
| 2078 |
const int64_t t_start_us = ggml_time_us();
|
|
@@ -2260,7 +2262,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat
|
|
| 2260 |
switch (strategy) {
|
| 2261 |
case WHISPER_DECODE_GREEDY:
|
| 2262 |
{
|
| 2263 |
-
result =
|
| 2264 |
.strategy = WHISPER_DECODE_GREEDY,
|
| 2265 |
.n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
|
| 2266 |
.offset_ms = 0,
|
|
@@ -2281,7 +2283,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat
|
|
| 2281 |
} break;
|
| 2282 |
case WHISPER_DECODE_BEAM_SEARCH:
|
| 2283 |
{
|
| 2284 |
-
result =
|
| 2285 |
.strategy = WHISPER_DECODE_GREEDY,
|
| 2286 |
.n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
|
| 2287 |
.offset_ms = 0,
|
|
|
|
| 2073 |
//
|
| 2074 |
|
| 2075 |
struct whisper_context * whisper_init(const char * path_model) {
|
| 2076 |
+
ggml_time_init();
|
| 2077 |
+
|
| 2078 |
whisper_context * ctx = new whisper_context;
|
| 2079 |
|
| 2080 |
const int64_t t_start_us = ggml_time_us();
|
|
|
|
| 2262 |
switch (strategy) {
|
| 2263 |
case WHISPER_DECODE_GREEDY:
|
| 2264 |
{
|
| 2265 |
+
result = {
|
| 2266 |
.strategy = WHISPER_DECODE_GREEDY,
|
| 2267 |
.n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
|
| 2268 |
.offset_ms = 0,
|
|
|
|
| 2283 |
} break;
|
| 2284 |
case WHISPER_DECODE_BEAM_SEARCH:
|
| 2285 |
{
|
| 2286 |
+
result = {
|
| 2287 |
.strategy = WHISPER_DECODE_GREEDY,
|
| 2288 |
.n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
|
| 2289 |
.offset_ms = 0,
|