KitaitiMakoto commited on
Commit
8d0a50d
·
unverified ·
1 Parent(s): 0e991f8

ruby : make Ruby bindings installed with build options (#3056)

Browse files

* Fix signature of URI.new7s return value

* Use path instead of string | _ToPath

* Add document comment to RBS

* Remove unnecessary build flags

* Remove unnecessary line

* Remove files have become unnecessary

* Make gem install accept build options for whisper.cpp

* Add instraction for build options in README

* Add methods for check to Options

* Test build options

* Rename: configs -> options

* Add assert_installed assertion

* Use assert_installed

* Remove unused attribute

* Extract dependency check logic as Dependencies class

* Update README

* Add WHISPER_FFMPEG option

* Test extra build options only on local test

* Bump version to 1.3.2 [skip ci]

bindings/ruby/README.md CHANGED
@@ -16,6 +16,18 @@ If bundler is not being used to manage dependencies, install the gem by executin
16
 
17
  $ gem install whispercpp
18
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  Usage
20
  -----
21
 
 
16
 
17
  $ gem install whispercpp
18
 
19
+ You can pass build options for whisper.cpp, for instance:
20
+
21
+ $ bundle config build.whispercpp --enable-ggml-cuda
22
+
23
+ or,
24
+
25
+ $ gem install whispercpp -- --enable-ggml-cuda
26
+
27
+ See whisper.cpp's [README](https://github.com/ggml-org/whisper.cpp/blob/master/README.md) for available options. You need convert options present the README to Ruby-style options.
28
+ For boolean options like `GGML_CUDA`, the README says `-DGGML_CUDA=1`. You need strip `-D`, prepend `--enable-` for `1` or `ON` (`--disable-` for `0` or `OFF`) and make it kebab-case: `--enable-ggml-cuda`.
29
+ For options which require arguments like `CMAKE_CUDA_ARCHITECTURES`, the README says `-DCMAKE_CUDA_ARCHITECTURES="86"`. You need strip `-D`, prepend `--`, make it kebab-case, append `=` and append argument: `--cmake-cuda-architectures="86"`.
30
+
31
  Usage
32
  -----
33
 
bindings/ruby/ext/cpu.mk DELETED
@@ -1,13 +0,0 @@
1
- ggml/src/ggml-cpu/ggml-cpu-cpp.o: \
2
- ggml/src/ggml-cpu/ggml-cpu.cpp \
3
- ggml/src/ggml-cpu/unary-ops.cpp \
4
- ggml/src/ggml-cpu/binary-ops.cpp \
5
- ggml/src/ggml-cpu/vec.cpp \
6
- ggml/src/ggml-cpu/ops.cpp \
7
- ggml/include/ggml-backend.h \
8
- ggml/include/ggml.h \
9
- ggml/include/ggml-alloc.h \
10
- ggml/src/ggml-backend-impl.h \
11
- ggml/include/ggml-cpu.h \
12
- ggml/src/ggml-impl.h
13
- $(CXX) $(CXXFLAGS) -c $< -o $@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bindings/ruby/ext/dependencies.rb ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ require "tsort"
2
+
3
+ class Dependencies
4
+ def initialize(cmake, options)
5
+ @cmake = cmake
6
+ @options = options
7
+
8
+ generate_dot
9
+ @libs = parse_dot
10
+ end
11
+
12
+ def to_s
13
+ @libs.join(" ")
14
+ end
15
+
16
+ private
17
+
18
+ def dot_path
19
+ File.join(__dir__, "build", "whisper.cpp.dot")
20
+ end
21
+
22
+ def generate_dot
23
+ system @cmake, "-S", "sources", "-B", "build", "--graphviz", dot_path, "-D", "BUILD_SHARED_LIBS=OFF", @options.to_s, exception: true
24
+ end
25
+
26
+ def parse_dot
27
+ static_lib_shape = nil
28
+ nodes = {}
29
+ depends = Hash.new {|h, k| h[k] = []}
30
+
31
+ class << depends
32
+ include TSort
33
+ alias tsort_each_node each_key
34
+ def tsort_each_child(node, &block)
35
+ fetch(node, []).each(&block)
36
+ end
37
+ end
38
+
39
+ File.open(dot_path).each_line do |line|
40
+ case line
41
+ when /\[\s*label\s*=\s*"Static Library"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]/
42
+ static_lib_shape = $~[:shape]
43
+ when /\A\s*"(?<node>\w+)"\s*\[\s*label\s*=\s*"(?<label>\S+)"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]\s*;\s*\z/
44
+ node = $~[:node]
45
+ label = $~[:label]
46
+ shape = $~[:shape]
47
+ nodes[node] = [label, shape]
48
+ when /\A\s*"(?<depender>\w+)"\s*->\s*"(?<dependee>\w+)"/
49
+ depender = $~[:depender]
50
+ dependee = $~[:dependee]
51
+ depends[depender] ||= []
52
+ depends[depender] << dependee
53
+ end
54
+ end
55
+ depends.tsort.filter_map {|node|
56
+ label, shape = nodes[node]
57
+ shape == static_lib_shape ? label : nil
58
+ }.collect {|lib| "lib#{lib}.a"}
59
+ .reverse
60
+ end
61
+ end
bindings/ruby/ext/extconf.rb CHANGED
@@ -1,50 +1,12 @@
1
  require "mkmf"
2
- require "tsort"
3
-
4
- # TODO: options such as CoreML
5
 
6
  cmake = find_executable("cmake") || abort
7
-
8
  have_library("gomp") rescue nil
 
9
 
10
- prefix = File.join("build", "whisper.cpp.dot")
11
- system cmake, "-S", "sources", "-B", "build", "--graphviz", prefix, "-D", "BUILD_SHARED_LIBS=OFF", exception: true
12
-
13
- static_lib_shape = nil
14
- nodes = {}
15
- depends = {}
16
- class << depends
17
- include TSort
18
- alias tsort_each_node each_key
19
- def tsort_each_child(node, &block)
20
- fetch(node, []).each(&block)
21
- end
22
- end
23
- File.open(File.join("build", "whisper.cpp.dot")).each_line do |line|
24
- case line
25
- when /\[\s*label\s*=\s*"Static Library"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]/
26
- static_lib_shape = $~[:shape]
27
- when /\A\s*"(?<node>\w+)"\s*\[\s*label\s*=\s*"(?<label>\S+)"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]\s*;\s*\z/
28
- node = $~[:node]
29
- label = $~[:label]
30
- shape = $~[:shape]
31
- nodes[node] = [label, shape]
32
- when /\A\s*"(?<depender>\w+)"\s*->\s*"(?<dependee>\w+)"/
33
- depender = $~[:depender]
34
- dependee = $~[:dependee]
35
- depends[depender] ||= []
36
- depends[depender] << dependee
37
- end
38
- end
39
- libs = depends.tsort.filter_map {|node|
40
- label, shape = nodes[node]
41
- shape == static_lib_shape ? label : nil
42
- }.collect {|lib| "lib#{lib}.a"}
43
- .reverse
44
- .join(" ")
45
-
46
- $CFLAGS << " -std=c11 -fPIC"
47
- $CXXFLAGS << " -std=c++17 -O3 -DNDEBUG"
48
  $INCFLAGS << " -Isources/include -Isources/ggml/include -Isources/examples"
49
  $LOCAL_LIBS << " #{libs}"
50
  $cleanfiles << " build #{libs}"
@@ -54,8 +16,7 @@ create_makefile "whisper" do |conf|
54
  $(TARGET_SO): #{libs}
55
  #{libs}: cmake-targets
56
  cmake-targets:
57
- #{"\t"}#{cmake} -S sources -B build -D BUILD_SHARED_LIBS=OFF -D CMAKE_ARCHIVE_OUTPUT_DIRECTORY=#{__dir__} -D CMAKE_POSITION_INDEPENDENT_CODE=ON
58
  #{"\t"}#{cmake} --build build --config Release --target common whisper
59
- #{"\t"}
60
  EOF
61
  end
 
1
  require "mkmf"
2
+ require_relative "options"
3
+ require_relative "dependencies"
 
4
 
5
  cmake = find_executable("cmake") || abort
6
+ options = Options.new
7
  have_library("gomp") rescue nil
8
+ libs = Dependencies.new(cmake, options)
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  $INCFLAGS << " -Isources/include -Isources/ggml/include -Isources/examples"
11
  $LOCAL_LIBS << " #{libs}"
12
  $cleanfiles << " build #{libs}"
 
16
  $(TARGET_SO): #{libs}
17
  #{libs}: cmake-targets
18
  cmake-targets:
19
+ #{"\t"}#{cmake} -S sources -B build -D BUILD_SHARED_LIBS=OFF -D CMAKE_ARCHIVE_OUTPUT_DIRECTORY=#{__dir__} -D CMAKE_POSITION_INDEPENDENT_CODE=ON #{options}
20
  #{"\t"}#{cmake} --build build --config Release --target common whisper
 
21
  EOF
22
  end
bindings/ruby/ext/metal-embed.mk DELETED
@@ -1,17 +0,0 @@
1
- ggml/src/ggml-metal/ggml-metal-embed.o: \
2
- ggml/src/ggml-metal/ggml-metal.metal \
3
- ggml/src/ggml-metal/ggml-metal-impl.h \
4
- ggml/src/ggml-common.h
5
- @echo "Embedding Metal library"
6
- @sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
7
- @sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
8
- $(eval TEMP_ASSEMBLY=$(shell mktemp -d))
9
- @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
10
- @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
11
- @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
12
- @echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
13
- @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
14
- @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
15
- $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
16
- @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
17
- @rmdir ${TEMP_ASSEMBLY}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bindings/ruby/ext/metal.mk DELETED
@@ -1,6 +0,0 @@
1
- ggml/src/ggml-metal/ggml-metal.o: \
2
- ggml/src/ggml-metal/ggml-metal.m \
3
- ggml/src/ggml-metal/ggml-metal-impl.h \
4
- ggml/include/ggml-metal.h \
5
- ggml/include/ggml.h
6
- $(CC) $(CFLAGS) -c $< -o $@
 
 
 
 
 
 
 
bindings/ruby/ext/options.rb ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Options
2
+ def initialize
3
+ @options = {}
4
+ @pending_options = []
5
+ @ignored_options = []
6
+
7
+ configure
8
+ end
9
+
10
+ def help
11
+ @options
12
+ .collect_concat {|name, (type, value)|
13
+ option = option_name(name)
14
+ if type == :bool
15
+ ["--enable-#{option}", "--disable-#{option}"]
16
+ else
17
+ "--#{option}=#{type.upcase}"
18
+ end
19
+ }
20
+ .join($/)
21
+ end
22
+
23
+ def to_s
24
+ @options
25
+ .reject {|name, (type, value)| value.nil?}
26
+ .collect {|name, (type, value)| "-D #{name}=#{value == true ? "ON" : value == false ? "OFF" : value.shellescape}"}
27
+ .join(" ")
28
+ end
29
+
30
+ def cmake_options
31
+ return @cmake_options if @cmake_options
32
+
33
+ output = nil
34
+ Dir.chdir __dir__ do
35
+ output = `cmake -S sources -B build -L`
36
+ end
37
+ started = false
38
+ @cmake_options = output.lines.filter_map {|line|
39
+ if line.chomp == "-- Cache values"
40
+ started = true
41
+ next
42
+ end
43
+ next unless started
44
+ option, value = line.chomp.split("=", 2)
45
+ name, type = option.split(":", 2)
46
+ [name, type, value]
47
+ }
48
+ end
49
+
50
+ def missing_options
51
+ cmake_options.collect {|name, type, value| name} -
52
+ @options.keys - @pending_options - @ignored_options
53
+ end
54
+
55
+ def extra_options
56
+ @options.keys + @pending_options - @ignored_options -
57
+ cmake_options.collect {|name, type, value| name}
58
+ end
59
+
60
+ private
61
+
62
+ def configure
63
+ filepath "ACCELERATE_FRAMEWORK"
64
+ ignored "BUILD_SHARED_LIBS"
65
+ ignored "BUILD_TESTING"
66
+ ignored "CMAKE_BUILD_TYPE"
67
+ ignored "CMAKE_INSTALL_PREFIX"
68
+ string "CMAKE_OSX_ARCHITECTURES"
69
+ ignored "CMAKE_OSX_DEPLOYMENT_TARGET"
70
+ string "CMAKE_OSX_SYSROOT"
71
+ filepath "FOUNDATION_LIBRARY"
72
+ bool "GGML_ACCELERATE"
73
+ bool "GGML_ALL_WARNINGS_3RD_PARTY"
74
+ bool "GGML_AMX_BF16"
75
+ bool "GGML_AMX_INT8"
76
+ bool "GGML_AMX_TILE"
77
+ bool "GGML_AVX"
78
+ bool "GGML_AVX2"
79
+ bool "GGML_AVX512"
80
+ bool "GGML_AVX512_BF16"
81
+ bool "GGML_AVX512_VBMI"
82
+ bool "GGML_AVX512_VNNI"
83
+ bool "GGML_AVX_VNNI"
84
+ ignored "GGML_BACKEND_DL"
85
+ ignored "GGML_BIN_INSTALL_DIR"
86
+ bool "GGML_BLAS"
87
+ string "GGML_BLAS_VENDOR"
88
+ bool "GGML_BMI2"
89
+ ignored "GGML_BUILD_EXAMPLES"
90
+ ignored "GGML_BUILD_TESTS"
91
+ filepath "GGML_CCACHE_FOUND"
92
+ bool "GGML_CPU"
93
+ bool "GGML_CPU_AARCH64"
94
+ ignored "GGML_CPU_ALL_VARIANTS"
95
+ string "GGML_CPU_ARM_ARCH"
96
+ bool "GGML_CPU_HBM"
97
+ bool "GGML_CPU_KLEIDIAI"
98
+ string "GGML_CPU_POWERPC_CPUTYPE"
99
+ bool "GGML_CUDA"
100
+ string "GGML_CUDA_COMPRESSION_MODE"
101
+ bool "GGML_CUDA_F16"
102
+ bool "GGML_CUDA_FA"
103
+ bool "GGML_CUDA_FA_ALL_QUANTS"
104
+ bool "GGML_CUDA_FORCE_CUBLAS"
105
+ bool "GGML_CUDA_FORCE_MMQ"
106
+ ignored "GGML_CUDA_GRAPHS"
107
+ bool "GGML_CUDA_NO_PEER_COPY"
108
+ bool "GGML_CUDA_NO_VMM"
109
+ string "GGML_CUDA_PEER_MAX_BATCH_SIZE"
110
+ bool "GGML_F16C"
111
+ bool "GGML_FMA"
112
+ bool "GGML_GPROF"
113
+ bool "GGML_HIP"
114
+ bool "GGML_HIP_GRAPHS"
115
+ bool "GGML_HIP_NO_VMM"
116
+ bool "GGML_HIP_ROCWMMA_FATTN"
117
+ bool "GGML_HIP_UMA"
118
+ ignored "GGML_INCLUDE_INSTALL_DIR"
119
+ bool "GGML_KOMPUTE"
120
+ bool "GGML_LASX"
121
+ ignored "GGML_LIB_INSTALL_DIR"
122
+ ignored "GGML_LLAMAFILE"
123
+ bool "GGML_LSX"
124
+ bool "GGML_LTO"
125
+ bool "GGML_METAL"
126
+ bool "GGML_METAL_EMBED_LIBRARY"
127
+ string "GGML_METAL_MACOSX_VERSION_MIN"
128
+ bool "GGML_METAL_NDEBUG"
129
+ bool "GGML_METAL_SHADER_DEBUG"
130
+ string "GGML_METAL_STD"
131
+ bool "GGML_METAL_USE_BF16"
132
+ bool "GGML_MUSA"
133
+ bool "GGML_NATIVE"
134
+ bool "GGML_OPENCL"
135
+ bool "GGML_OPENCL_EMBED_KERNELS"
136
+ bool "GGML_OPENCL_PROFILING"
137
+ string "GGML_OPENCL_TARGET_VERSION"
138
+ bool "GGML_OPENCL_USE_ADRENO_KERNELS"
139
+ bool "GGML_OPENMP"
140
+ bool "GGML_RPC"
141
+ bool "GGML_RVV"
142
+ bool "GGML_RV_ZFH"
143
+ pending "GGML_SCCACHE_FOUND"
144
+ string "GGML_SCHED_MAX_COPIES"
145
+ ignored "GGML_STATIC"
146
+ bool "GGML_SYCL"
147
+ string "GGML_SYCL_DEVICE_ARCH"
148
+ bool "GGML_SYCL_F16"
149
+ bool "GGML_SYCL_GRAPH"
150
+ string "GGML_SYCL_TARGET"
151
+ bool "GGML_VULKAN"
152
+ bool "GGML_VULKAN_CHECK_RESULTS"
153
+ bool "GGML_VULKAN_DEBUG"
154
+ bool "GGML_VULKAN_MEMORY_DEBUG"
155
+ bool "GGML_VULKAN_PERF"
156
+ ignored "GGML_VULKAN_RUN_TESTS"
157
+ filepath "GGML_VULKAN_SHADERS_GEN_TOOLCHAIN"
158
+ bool "GGML_VULKAN_SHADER_DEBUG_INFO"
159
+ pending "GGML_VULKAN_VALIDATE"
160
+ bool "GGML_VXE"
161
+ filepath "GIT_EXE"
162
+ filepath "MATH_LIBRARY"
163
+ filepath "METALKIT_FRAMEWORK"
164
+ filepath "METAL_FRAMEWORK"
165
+ bool "WHISPER_ALL_WARNINGS"
166
+ bool "WHISPER_ALL_WARNINGS_3RD_PARTY"
167
+ ignored "WHISPER_BIN_INSTALL_DIR"
168
+ ignored "WHISPER_BUILD_EXAMPLES"
169
+ ignored "WHISPER_BUILD_SERVER"
170
+ ignored"WHISPER_BUILD_TESTS"
171
+ bool "WHISPER_CCACHE"
172
+ bool "WHISPER_COREML"
173
+ bool "WHISPER_COREML_ALLOW_FALLBACK"
174
+ ignored "WHISPER_CURL"
175
+ bool "WHISPER_FATAL_WARNINGS"
176
+ ignored "WHISPER_FFMPEG"
177
+ ignored "WHISPER_INCLUDE_INSTALL_DIR"
178
+ ignored "WHISPER_LIB_INSTALL_DIR"
179
+ bool "WHISPER_OPENVINO"
180
+ bool "WHISPER_SANITIZE_ADDRESS"
181
+ bool "WHISPER_SANITIZE_THREAD"
182
+ bool "WHISPER_SANITIZE_UNDEFINED"
183
+ ignored "WHISPER_SDL2"
184
+ pending "WHISPER_USE_SYSTEM_GGML"
185
+ end
186
+
187
+ def option_name(name)
188
+ name.downcase.gsub("_", "-")
189
+ end
190
+
191
+ def bool(name)
192
+ option = option_name(name)
193
+ value = enable_config(option)
194
+ @options[name] = [:bool, value]
195
+ end
196
+
197
+ def string(name, type=:string)
198
+ option = "--#{option_name(name)}"
199
+ value = arg_config(option)
200
+ raise "String expected for #{option}" if value == true || value&.empty?
201
+ @options[name] = [type, value]
202
+ end
203
+
204
+ def path(name)
205
+ string(name, :path)
206
+ end
207
+
208
+ def filepath(name)
209
+ string(name, :filepath)
210
+ end
211
+
212
+ def pending(name)
213
+ @pending_options << name
214
+ end
215
+
216
+ def ignored(name)
217
+ @ignored_options << name
218
+ end
219
+ end
bindings/ruby/sig/whisper.rbs CHANGED
@@ -23,9 +23,20 @@ module Whisper
23
  def self.log_set: (log_callback, Object? user_data) -> log_callback
24
 
25
  class Context
26
- def self.new: (string | _ToPath | ::URI::HTTP) -> instance
 
 
 
 
 
 
 
 
 
 
27
  def transcribe: (string, Params) -> self
28
  | (string, Params) { (String) -> void } -> self
 
29
  def model_n_vocab: () -> Integer
30
  def model_n_audio_ctx: () -> Integer
31
  def model_n_audio_state: () -> Integer
@@ -34,19 +45,72 @@ module Whisper
34
  def model_n_mels: () -> Integer
35
  def model_ftype: () -> Integer
36
  def model_type: () -> String
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  def each_segment: { (Segment) -> void } -> void
38
  | () -> Enumerator[Segment]
 
39
  def model: () -> Model
40
  def full_get_segment: (Integer nth) -> Segment
41
  def full_n_segments: () -> Integer
 
 
 
42
  def full_lang_id: () -> Integer
 
 
 
 
 
43
  def full_get_segment_t0: (Integer) -> Integer
 
 
 
 
 
44
  def full_get_segment_t1: (Integer) -> Integer
 
 
 
 
 
45
  def full_get_segment_speaker_turn_next: (Integer) -> (true | false)
 
 
 
 
 
46
  def full_get_segment_text: (Integer) -> String
 
47
  def full_get_segment_no_speech_prob: (Integer) -> Float
 
 
 
 
 
 
 
48
  def full: (Params, Array[Float] samples, ?Integer n_samples) -> self
49
  | (Params, _Samples, ?Integer n_samples) -> self
 
 
 
 
 
 
 
50
  def full_parallel: (Params, Array[Float], ?Integer n_samples) -> self
51
  | (Params, _Samples, ?Integer n_samples) -> self
52
  | (Params, _Samples, ?Integer? n_samples, Integer n_processors) -> self
@@ -85,68 +149,202 @@ module Whisper
85
  ?abort_callback: abort_callback,
86
  ?abort_callback_user_data: Object
87
  ) -> instance
 
 
 
88
  def language=: (String) -> String # TODO: Enumerate lang names
 
89
  def language: () -> String
90
  def translate=: (boolish) -> boolish
91
  def translate: () -> (true | false)
92
  def no_context=: (boolish) -> boolish
 
 
 
93
  def no_context: () -> (true | false)
 
94
  def single_segment=: (boolish) -> boolish
 
 
 
95
  def single_segment: () -> (true | false)
 
96
  def print_special=: (boolish) -> boolish
 
 
 
97
  def print_special: () -> (true | false)
 
98
  def print_progress=: (boolish) -> boolish
 
 
 
99
  def print_progress: () -> (true | false)
 
100
  def print_realtime=: (boolish) -> boolish
 
 
 
101
  def print_realtime: () -> (true | false)
 
 
 
102
  def print_timestamps=: (boolish) -> boolish
 
103
  def print_timestamps: () -> (true | false)
 
104
  def suppress_blank=: (boolish) -> boolish
 
 
 
105
  def suppress_blank: () -> (true | false)
 
106
  def suppress_nst=: (boolish) -> boolish
 
 
 
107
  def suppress_nst: () -> (true | false)
 
108
  def token_timestamps=: (boolish) -> boolish
 
 
 
109
  def token_timestamps: () -> (true | false)
 
110
  def split_on_word=: (boolish) -> boolish
 
 
 
111
  def split_on_word: () -> (true | false)
 
112
  def initial_prompt=: (_ToS) -> _ToS
 
 
 
 
 
 
113
  def initial_prompt: () -> (String | nil)
 
114
  def diarize=: (boolish) -> boolish
 
 
 
115
  def diarize: () -> (true | false)
 
116
  def offset=: (Integer) -> Integer
 
 
 
117
  def offset: () -> Integer
 
118
  def duration=: (Integer) -> Integer
 
 
 
119
  def duration: () -> Integer
 
120
  def max_text_tokens=: (Integer) -> Integer
 
 
 
121
  def max_text_tokens: () -> Integer
 
122
  def temperature=: (Float) -> Float
123
  def temperature: () -> Float
124
  def max_initial_ts=: (Float) -> Float
 
 
 
125
  def max_initial_ts: () -> Float
 
126
  def length_penalty=: (Float) -> Float
127
  def length_penalty: () -> Float
128
  def temperature_inc=: (Float) -> Float
129
  def temperature_inc: () -> Float
130
  def entropy_thold=: (Float) -> Float
 
 
 
131
  def entropy_thold: () -> Float
 
132
  def logprob_thold=: (Float) -> Float
133
  def logprob_thold: () -> Float
134
  def no_speech_thold=: (Float) -> Float
135
  def no_speech_thold: () -> Float
 
 
 
 
 
 
 
136
  def new_segment_callback=: (new_segment_callback) -> new_segment_callback
137
  def new_segment_callback: () -> (new_segment_callback | nil)
 
 
 
138
  def new_segment_callback_user_data=: (Object) -> Object
 
139
  def new_segment_callback_user_data: () -> Object
 
 
 
 
 
 
 
 
 
140
  def progress_callback=: (progress_callback) -> progress_callback
 
141
  def progress_callback: () -> (progress_callback | nil)
 
 
 
142
  def progress_callback_user_data=: (Object) -> Object
 
143
  def progress_callback_user_data: () -> Object
 
 
 
 
 
 
 
 
144
  def abort_callback=: (abort_callback) -> abort_callback
 
145
  def abort_callback: () -> (abort_callback | nil)
 
 
 
146
  def abort_callback_user_data=: (Object) -> Object
 
147
  def abort_callback_user_data: () -> Object
 
 
 
 
 
 
 
148
  def on_new_segment: { (Segment) -> void } -> void
 
 
 
149
  def on_progress: { (Integer progress) -> void } -> void
 
 
 
 
 
 
 
 
 
 
 
150
  def abort_on: { (Object user_data) -> boolish } -> void
151
  end
152
 
@@ -167,16 +365,24 @@ module Whisper
167
  def type: () -> String
168
 
169
  class URI
170
- def self.new: (string | ::URI::HTTP) -> self
171
  def to_path: -> String
172
  def clear_cache: -> void
173
  end
174
  end
175
 
176
  class Segment
 
 
177
  def start_time: () -> Integer
 
 
 
178
  def end_time: () -> Integer
 
 
179
  def speaker_next_turn?: () -> (true | false)
 
180
  def text: () -> String
181
  def no_speech_prob: () -> Float
182
  end
 
23
  def self.log_set: (log_callback, Object? user_data) -> log_callback
24
 
25
  class Context
26
+ def self.new: (path | ::URI::HTTP) -> instance
27
+
28
+ # transcribe a single file
29
+ # can emit to a block results
30
+ #
31
+ # params = Whisper::Params.new
32
+ # params.duration = 60_000
33
+ # whisper.transcribe "path/to/audio.wav", params do |text|
34
+ # puts text
35
+ # end
36
+ #
37
  def transcribe: (string, Params) -> self
38
  | (string, Params) { (String) -> void } -> self
39
+
40
  def model_n_vocab: () -> Integer
41
  def model_n_audio_ctx: () -> Integer
42
  def model_n_audio_state: () -> Integer
 
45
  def model_n_mels: () -> Integer
46
  def model_ftype: () -> Integer
47
  def model_type: () -> String
48
+
49
+ # Yields each Whisper::Segment:
50
+ #
51
+ # whisper.transcribe("path/to/audio.wav", params)
52
+ # whisper.each_segment do |segment|
53
+ # puts segment.text
54
+ # end
55
+ #
56
+ # Returns an Enumerator if no block given:
57
+ #
58
+ # whisper.transcribe("path/to/audio.wav", params)
59
+ # enum = whisper.each_segment
60
+ # enum.to_a # => [#<Whisper::Segment>, ...]
61
+ #
62
  def each_segment: { (Segment) -> void } -> void
63
  | () -> Enumerator[Segment]
64
+
65
  def model: () -> Model
66
  def full_get_segment: (Integer nth) -> Segment
67
  def full_n_segments: () -> Integer
68
+
69
+ # Language ID, which can be converted to string by Whisper.lang_str and Whisper.lang_str_full.
70
+ #
71
  def full_lang_id: () -> Integer
72
+
73
+ # Start time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
74
+ #
75
+ # full_get_segment_t0(3) # => 1668 (16680 ms)
76
+ #
77
  def full_get_segment_t0: (Integer) -> Integer
78
+
79
+ # End time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
80
+ #
81
+ # full_get_segment_t1(3) # => 1668 (16680 ms)
82
+ #
83
  def full_get_segment_t1: (Integer) -> Integer
84
+
85
+ # Whether the next segment indexed by +segment_index+ is predicated as a speaker turn.
86
+ #
87
+ # full_get_segment_speacker_turn_next(3) # => true
88
+ #
89
  def full_get_segment_speaker_turn_next: (Integer) -> (true | false)
90
+
91
+ # Text of a segment indexed by +segment_index+.
92
+ #
93
+ # full_get_segment_text(3) # => "ask not what your country can do for you, ..."
94
+ #
95
  def full_get_segment_text: (Integer) -> String
96
+
97
  def full_get_segment_no_speech_prob: (Integer) -> Float
98
+
99
+ # Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
100
+ # Not thread safe for same context
101
+ # Uses the specified decoding strategy to obtain the text.
102
+ #
103
+ # The second argument +samples+ must be an array of samples, respond to :length, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data.
104
+ #
105
  def full: (Params, Array[Float] samples, ?Integer n_samples) -> self
106
  | (Params, _Samples, ?Integer n_samples) -> self
107
+
108
+ # Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
109
+ # Result is stored in the default state of the context
110
+ # Not thread safe if executed in parallel on the same context.
111
+ # It seems this approach can offer some speedup in some cases.
112
+ # However, the transcription accuracy can be worse at the beginning and end of each chunk.
113
+ #
114
  def full_parallel: (Params, Array[Float], ?Integer n_samples) -> self
115
  | (Params, _Samples, ?Integer n_samples) -> self
116
  | (Params, _Samples, ?Integer? n_samples, Integer n_processors) -> self
 
149
  ?abort_callback: abort_callback,
150
  ?abort_callback_user_data: Object
151
  ) -> instance
152
+
153
+ # params.language = "auto" | "en", etc...
154
+ #
155
  def language=: (String) -> String # TODO: Enumerate lang names
156
+
157
  def language: () -> String
158
  def translate=: (boolish) -> boolish
159
  def translate: () -> (true | false)
160
  def no_context=: (boolish) -> boolish
161
+
162
+ # If true, does not use past transcription (if any) as initial prompt for the decoder.
163
+ #
164
  def no_context: () -> (true | false)
165
+
166
  def single_segment=: (boolish) -> boolish
167
+
168
+ # If true, forces single segment output (useful for streaming).
169
+ #
170
  def single_segment: () -> (true | false)
171
+
172
  def print_special=: (boolish) -> boolish
173
+
174
+ # If true, prints special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.).
175
+ #
176
  def print_special: () -> (true | false)
177
+
178
  def print_progress=: (boolish) -> boolish
179
+
180
+ # If true, prints progress information.
181
+ #
182
  def print_progress: () -> (true | false)
183
+
184
  def print_realtime=: (boolish) -> boolish
185
+
186
+ # If true, prints results from within whisper.cpp. (avoid it, use callback instead)
187
+ #
188
  def print_realtime: () -> (true | false)
189
+
190
+ # If true, prints timestamps for each text segment when printing realtime.
191
+ #
192
  def print_timestamps=: (boolish) -> boolish
193
+
194
  def print_timestamps: () -> (true | false)
195
+
196
  def suppress_blank=: (boolish) -> boolish
197
+
198
+ # If true, suppresses blank outputs.
199
+ #
200
  def suppress_blank: () -> (true | false)
201
+
202
  def suppress_nst=: (boolish) -> boolish
203
+
204
+ # If true, suppresses non-speech-tokens.
205
+ #
206
  def suppress_nst: () -> (true | false)
207
+
208
  def token_timestamps=: (boolish) -> boolish
209
+
210
+ # If true, enables token-level timestamps.
211
+ #
212
  def token_timestamps: () -> (true | false)
213
+
214
  def split_on_word=: (boolish) -> boolish
215
+
216
+ # If true, split on word rather than on token (when used with max_len).
217
+ #
218
  def split_on_word: () -> (true | false)
219
+
220
  def initial_prompt=: (_ToS) -> _ToS
221
+
222
+ # Tokens to provide to the whisper decoder as initial prompt
223
+ # these are prepended to any existing text context from a previous call
224
+ # use whisper_tokenize() to convert text to tokens.
225
+ # Maximum of whisper_n_text_ctx()/2 tokens are used (typically 224).
226
+ #
227
  def initial_prompt: () -> (String | nil)
228
+
229
  def diarize=: (boolish) -> boolish
230
+
231
+ # If true, enables diarization.
232
+ #
233
  def diarize: () -> (true | false)
234
+
235
  def offset=: (Integer) -> Integer
236
+
237
+ # Start offset in ms.
238
+ #
239
  def offset: () -> Integer
240
+
241
  def duration=: (Integer) -> Integer
242
+
243
+ # Audio duration to process in ms.
244
+ #
245
  def duration: () -> Integer
246
+
247
  def max_text_tokens=: (Integer) -> Integer
248
+
249
+ # Max tokens to use from past text as prompt for the decoder.
250
+ #
251
  def max_text_tokens: () -> Integer
252
+
253
  def temperature=: (Float) -> Float
254
  def temperature: () -> Float
255
  def max_initial_ts=: (Float) -> Float
256
+
257
+ # See https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97
258
+ #
259
  def max_initial_ts: () -> Float
260
+
261
  def length_penalty=: (Float) -> Float
262
  def length_penalty: () -> Float
263
  def temperature_inc=: (Float) -> Float
264
  def temperature_inc: () -> Float
265
  def entropy_thold=: (Float) -> Float
266
+
267
+ # Similar to OpenAI's "compression_ratio_threshold"
268
+ #
269
  def entropy_thold: () -> Float
270
+
271
  def logprob_thold=: (Float) -> Float
272
  def logprob_thold: () -> Float
273
  def no_speech_thold=: (Float) -> Float
274
  def no_speech_thold: () -> Float
275
+
276
+ # Sets new segment callback, called for every newly generated text segment.
277
+ #
278
+ # params.new_segment_callback = ->(context, _, n_new, user_data) {
279
+ # # ...
280
+ # }
281
+ #
282
  def new_segment_callback=: (new_segment_callback) -> new_segment_callback
283
  def new_segment_callback: () -> (new_segment_callback | nil)
284
+
285
+ # Sets user data passed to the last argument of new segment callback.
286
+ #
287
  def new_segment_callback_user_data=: (Object) -> Object
288
+
289
  def new_segment_callback_user_data: () -> Object
290
+
291
+ # Sets progress callback, called on each progress update.
292
+ #
293
+ # params.new_segment_callback = ->(context, _, progress, user_data) {
294
+ # # ...
295
+ # }
296
+ #
297
+ # +progress+ is an Integer between 0 and 100.
298
+ #
299
  def progress_callback=: (progress_callback) -> progress_callback
300
+
301
  def progress_callback: () -> (progress_callback | nil)
302
+
303
+ # Sets user data passed to the last argument of progress callback.
304
+ #
305
  def progress_callback_user_data=: (Object) -> Object
306
+
307
  def progress_callback_user_data: () -> Object
308
+
309
+ # Sets abort callback, called to check if the process should be aborted.
310
+ #
311
+ # params.abort_callback = ->(user_data) {
312
+ # # ...
313
+ # }
314
+ #
315
+ #
316
  def abort_callback=: (abort_callback) -> abort_callback
317
+
318
  def abort_callback: () -> (abort_callback | nil)
319
+
320
+ # Sets user data passed to the last argument of abort callback.
321
+ #
322
  def abort_callback_user_data=: (Object) -> Object
323
+
324
  def abort_callback_user_data: () -> Object
325
+
326
+ # Hook called on new segment. Yields each Whisper::Segment.
327
+ #
328
+ # whisper.on_new_segment do |segment|
329
+ # # ...
330
+ # end
331
+ #
332
  def on_new_segment: { (Segment) -> void } -> void
333
+
334
+ # Hook called on progress update. Yields each progress Integer between 0 and 100.
335
+ #
336
  def on_progress: { (Integer progress) -> void } -> void
337
+
338
+ # Call block to determine whether abort or not. Return +true+ when you want to abort.
339
+ #
340
+ # params.abort_on do
341
+ # if some_condition
342
+ # true # abort
343
+ # else
344
+ # false # continue
345
+ # end
346
+ # end
347
+ #
348
  def abort_on: { (Object user_data) -> boolish } -> void
349
  end
350
 
 
365
  def type: () -> String
366
 
367
  class URI
368
+ def self.new: (string | ::URI::HTTP) -> instance
369
  def to_path: -> String
370
  def clear_cache: -> void
371
  end
372
  end
373
 
374
  class Segment
375
+ # Start time in milliseconds.
376
+ #
377
  def start_time: () -> Integer
378
+
379
+ # End time in milliseconds.
380
+ #
381
  def end_time: () -> Integer
382
+
383
+ # Whether the next segment is predicted as a speaker turn.
384
  def speaker_next_turn?: () -> (true | false)
385
+
386
  def text: () -> String
387
  def no_speech_prob: () -> Float
388
  end
bindings/ruby/tests/helper.rb CHANGED
@@ -21,4 +21,15 @@ class TestBase < Test::Unit::TestCase
21
  def whisper
22
  self.class.whisper
23
  end
 
 
 
 
 
 
 
 
 
 
 
24
  end
 
21
  def whisper
22
  self.class.whisper
23
  end
24
+
25
+ module BuildOptions
26
+ load "ext/options.rb", self
27
+ Options.include self
28
+
29
+ def enable_config(name)
30
+ end
31
+
32
+ def arg_config(name)
33
+ end
34
+ end
35
  end
bindings/ruby/tests/test_package.rb CHANGED
@@ -21,13 +21,26 @@ class TestPackage < TestBase
21
  match_data = `rake -Tbuild`.match(/(whispercpp-(.+)\.gem)/)
22
  filename = match_data[1]
23
  version = match_data[2]
24
- basename = "whisper.#{RbConfig::CONFIG["DLEXT"]}"
25
  Dir.mktmpdir do |dir|
26
  system "gem", "install", "--install-dir", dir.shellescape, "--no-document", "pkg/#{filename.shellescape}", exception: true
27
- assert_path_exist File.join(dir, "gems/whispercpp-#{version}/lib", basename)
28
- assert_path_exist File.join(dir, "gems/whispercpp-#{version}/LICENSE")
29
- assert_path_not_exist File.join(dir, "gems/whispercpp-#{version}/ext/build")
30
  end
31
  end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  end
33
  end
 
21
  match_data = `rake -Tbuild`.match(/(whispercpp-(.+)\.gem)/)
22
  filename = match_data[1]
23
  version = match_data[2]
 
24
  Dir.mktmpdir do |dir|
25
  system "gem", "install", "--install-dir", dir.shellescape, "--no-document", "pkg/#{filename.shellescape}", exception: true
26
+ assert_installed dir, version
 
 
27
  end
28
  end
29
+
30
+ private
31
+
32
+ def assert_installed(dir, version)
33
+ assert_path_exist File.join(dir, "gems/whispercpp-#{version}/lib", "whisper.#{RbConfig::CONFIG["DLEXT"]}")
34
+ assert_path_exist File.join(dir, "gems/whispercpp-#{version}/LICENSE")
35
+ assert_path_not_exist File.join(dir, "gems/whispercpp-#{version}/ext/build")
36
+ end
37
+ end
38
+
39
+ def test_build_options
40
+ options = BuildOptions::Options.new
41
+ assert_empty options.missing_options
42
+ unless ENV["CI"]
43
+ assert_empty options.extra_options
44
+ end
45
  end
46
  end
bindings/ruby/whispercpp.gemspec CHANGED
@@ -3,8 +3,8 @@ require_relative "extsources"
3
  Gem::Specification.new do |s|
4
  s.name = "whispercpp"
5
  s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
6
- s.version = '1.3.1'
7
- s.date = '2024-12-19'
8
  s.description = %q{High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model via Ruby}
9
  s.email = '[email protected]'
10
  s.extra_rdoc_files = ['LICENSE', 'README.md']
 
3
  Gem::Specification.new do |s|
4
  s.name = "whispercpp"
5
  s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
6
+ s.version = '1.3.2'
7
+ s.date = '2025-04-17'
8
  s.description = %q{High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model via Ruby}
9
  s.email = '[email protected]'
10
  s.extra_rdoc_files = ['LICENSE', 'README.md']