Spaces:

Nymbo
/

Supertonic-66M

Running

App Files Files Community

Nymbo commited on Nov 20

Commit

e84a565

verified ·

1 Parent(s): fc3c2b8

Update app.py

Browse files

Files changed (1) hide show

app.py +153 -161

app.py CHANGED Viewed

@@ -1,161 +1,153 @@
-import gradio as gr
-import os
-import io
-import wave
-import numpy as np
-import soundfile as sf
-from huggingface_hub import snapshot_download
-from helper import load_text_to_speech, load_voice_style
-_SUPERTONIC_STATE = {"initialized": False, "tts": None, "assets_dir": None}
-def _init_supertonic() -> None:
-    if _SUPERTONIC_STATE["initialized"]:
-        return
-    print("Initializing Supertonic...")
-    # Download models if not present
-    assets_dir = os.path.join(os.path.dirname(__file__), "assets")
-    if not os.path.exists(assets_dir):
-        print(f"Downloading Supertonic models to {assets_dir}...")
-        snapshot_download(repo_id="Supertone/supertonic", local_dir=assets_dir)
-    onnx_dir = os.path.join(assets_dir, "onnx")
-    tts = load_text_to_speech(onnx_dir, use_gpu=False)
-    _SUPERTONIC_STATE.update({"initialized": True, "tts": tts, "assets_dir": assets_dir})
-    print("Supertonic initialized.")
-def get_supertonic_voices():
-    """Get list of available Supertonic voice styles."""
-    # Ensure assets are downloaded to list voices
-    assets_dir = os.path.join(os.path.dirname(__file__), "assets")
-    if not os.path.exists(assets_dir):
-         # If not initialized/downloaded yet, we might not see voices.
-         # But we can try to download just to list, or just init.
-         _init_supertonic()
-         assets_dir = _SUPERTONIC_STATE["assets_dir"]
-    voice_styles_dir = os.path.join(assets_dir, "voice_styles")
-    if not os.path.exists(voice_styles_dir):
-        return []
-    files = os.listdir(voice_styles_dir)
-    voices = [f.replace('.json', '') for f in files if f.endswith('.json')]
-    return sorted(voices)
-def _audio_np_to_int16(audio_np: np.ndarray) -> np.ndarray:
-    audio_clipped = np.clip(audio_np, -1.0, 1.0)
-    return (audio_clipped * 32767.0).astype(np.int16)
-def _wav_bytes_from_int16(audio_int16: np.ndarray, sample_rate: int) -> bytes:
-    buffer = io.BytesIO()
-    with wave.open(buffer, "wb") as wf:
-        wf.setnchannels(1)
-        wf.setsampwidth(2)
-        wf.setframerate(sample_rate)
-        wf.writeframes(audio_int16.tobytes())
-    return buffer.getvalue()
-def supertonic_tts(text: str, speed: float, voice: str, steps: int):
-    if not text or not text.strip():
-        raise gr.Error("Please enter text to synthesize.")
-    _init_supertonic()
-    tts = _SUPERTONIC_STATE["tts"]
-    assets_dir = _SUPERTONIC_STATE["assets_dir"]
-    voice_path = os.path.join(assets_dir, "voice_styles", f"{voice}.json")
-    if not os.path.exists(voice_path):
-        raise gr.Error(f"Voice style {voice} not found.")
-    style = load_voice_style([voice_path])
-    try:
-        sr = tts.sample_rate
-        for audio_chunk in tts.stream(text, style, steps, speed):
-             audio_int16 = _audio_np_to_int16(audio_chunk)
-             yield _wav_bytes_from_int16(audio_int16, sr)
-    except Exception as e:
-        raise gr.Error(f"Error during speech generation: {str(e)}")
-with gr.Blocks(theme='Nymbo/Nymbo_Theme') as demo:
-    gr.HTML("<h1 style='text-align: center;'>Supertonic-Hub</h1><p style='text-align: center;'>Powered by Supertone/supertonic</p>")
-    # We need to initialize to get voices, but we don't want to block startup too long if download is needed.
-    # For now, let's try to get voices, if empty, user might need to click generate to trigger download/init first?
-    # Or we can just list a default if not found.
-    try:
-        available_voices = get_supertonic_voices()
-    except Exception:
-        available_voices = []
-    default_voice = available_voices[0] if available_voices else None
-    with gr.Row(variant='panel'):
-        speed_slider = gr.Slider(
-            minimum=0.5,
-            maximum=2.0,
-            value=1.0,
-            step=0.1,
-            label='Speed'
-        )
-        steps_slider = gr.Slider(
-            minimum=1,
-            maximum=50,
-            value=5,
-            step=1,
-            label='Steps (Quality vs Speed)'
-        )
-        voice_dropdown = gr.Dropdown(
-            choices=available_voices,
-            label='Voice',
-            value=default_voice,
-            allow_custom_value=True
-        )
-    text_input = gr.Textbox(
-        label="Input Text",
-        placeholder="Enter the text you want to convert to speech here...",
-        lines=5,
-        value="This morning, I took a walk in the park, and the sound of the birds and the breeze was so pleasant that I stopped for a long time just to listen."
-    )
-    generate_btn = gr.Button(
-        "Generate Speech",
-        variant="primary",
-    )
-    audio_output = gr.Audio(
-        label="Generated Speech",
-        streaming=True,
-        autoplay=True
-    )
-    def update_voices():
-        voices = get_supertonic_voices()
-        return gr.Dropdown(choices=voices, value=voices[0] if voices else None)
-    # Add a refresh button for voices in case they weren't loaded initially
-    refresh_btn = gr.Button("Refresh Voices (Downloads Model if needed)")
-    refresh_btn.click(fn=update_voices, outputs=voice_dropdown)
-    generate_inputs = [text_input, speed_slider, voice_dropdown, steps_slider]
-    generate_btn.click(
-        fn=supertonic_tts,
-        inputs=generate_inputs,
-        outputs=audio_output,
-        api_name="generate_speech"
-    )
-    text_input.submit(
-        fn=supertonic_tts,
-        inputs=generate_inputs,
-        outputs=audio_output,
-        api_name="generate_speech_enter"
-    )
-if __name__ == "__main__":
-    demo.queue().launch()

+import gradio as gr
+import os
+import io
+import wave
+import numpy as np
+import soundfile as sf
+from huggingface_hub import snapshot_download
+from helper import load_text_to_speech, load_voice_style
+_SUPERTONIC_STATE = {"initialized": False, "tts": None, "assets_dir": None}
+def _init_supertonic() -> None:
+    if _SUPERTONIC_STATE["initialized"]:
+        return
+    print("Initializing Supertonic...")
+    # Download models if not present
+    assets_dir = os.path.join(os.path.dirname(__file__), "assets")
+    if not os.path.exists(assets_dir):
+        print(f"Downloading Supertonic models to {assets_dir}...")
+        snapshot_download(repo_id="Supertone/supertonic", local_dir=assets_dir)
+    onnx_dir = os.path.join(assets_dir, "onnx")
+    tts = load_text_to_speech(onnx_dir, use_gpu=False)
+    _SUPERTONIC_STATE.update({"initialized": True, "tts": tts, "assets_dir": assets_dir})
+    print("Supertonic initialized.")
+def get_supertonic_voices():
+    """Get list of available Supertonic voice styles."""
+    # Ensure assets are downloaded to list voices
+    assets_dir = os.path.join(os.path.dirname(__file__), "assets")
+    if not os.path.exists(assets_dir):
+         # If not initialized/downloaded yet, we might not see voices.
+         # But we can try to download just to list, or just init.
+         _init_supertonic()
+         assets_dir = _SUPERTONIC_STATE["assets_dir"]
+    voice_styles_dir = os.path.join(assets_dir, "voice_styles")
+    if not os.path.exists(voice_styles_dir):
+        return []
+    files = os.listdir(voice_styles_dir)
+    voices = [f.replace('.json', '') for f in files if f.endswith('.json')]
+    return sorted(voices)
+def _audio_np_to_int16(audio_np: np.ndarray) -> np.ndarray:
+    audio_clipped = np.clip(audio_np, -1.0, 1.0)
+    return (audio_clipped * 32767.0).astype(np.int16)
+def _wav_bytes_from_int16(audio_int16: np.ndarray, sample_rate: int) -> bytes:
+    buffer = io.BytesIO()
+    with wave.open(buffer, "wb") as wf:
+        wf.setnchannels(1)
+        wf.setsampwidth(2)
+        wf.setframerate(sample_rate)
+        wf.writeframes(audio_int16.tobytes())
+    return buffer.getvalue()
+def supertonic_tts(text: str, speed: float, voice: str, steps: int):
+    if not text or not text.strip():
+        raise gr.Error("Please enter text to synthesize.")
+    _init_supertonic()
+    tts = _SUPERTONIC_STATE["tts"]
+    assets_dir = _SUPERTONIC_STATE["assets_dir"]
+    voice_path = os.path.join(assets_dir, "voice_styles", f"{voice}.json")
+    if not os.path.exists(voice_path):
+        raise gr.Error(f"Voice style {voice} not found.")
+    style = load_voice_style([voice_path])
+    try:
+        sr = tts.sample_rate
+        for audio_chunk in tts.stream(text, style, steps, speed):
+             audio_int16 = _audio_np_to_int16(audio_chunk)
+             yield _wav_bytes_from_int16(audio_int16, sr)
+    except Exception as e:
+        raise gr.Error(f"Error during speech generation: {str(e)}")
+with gr.Blocks(theme='Nymbo/Nymbo_Theme') as demo:
+    gr.HTML("<h1 style='text-align: center;'>Supertonic-Hub</h1><p style='text-align: center;'>Powered by Supertone/supertonic</p>")
+    # We need to initialize to get voices, but we don't want to block startup too long if download is needed.
+    # For now, let's try to get voices, if empty, user might need to click generate to trigger download/init first?
+    # Or we can just list a default if not found.
+    try:
+        available_voices = get_supertonic_voices()
+    except Exception:
+        available_voices = []
+    default_voice = available_voices[0] if available_voices else None
+    with gr.Row(variant='panel'):
+        speed_slider = gr.Slider(
+            minimum=0.5,
+            maximum=2.0,
+            value=1.0,
+            step=0.1,
+            label='Speed'
+        )
+        steps_slider = gr.Slider(
+            minimum=1,
+            maximum=50,
+            value=5,
+            step=1,
+            label='Steps (Quality vs Speed)'
+        )
+        voice_dropdown = gr.Dropdown(
+            choices=available_voices,
+            label='Voice',
+            value=default_voice,
+            allow_custom_value=True
+        )
+    text_input = gr.Textbox(
+        label="Input Text",
+        placeholder="Enter the text you want to convert to speech here...",
+        lines=5,
+        value="This morning, I took a walk in the park, and the sound of the birds and the breeze was so pleasant that I stopped for a long time just to listen."
+    )
+    generate_btn = gr.Button(
+        "Generate Speech",
+        variant="primary",
+    )
+    audio_output = gr.Audio(
+        label="Generated Speech",
+        streaming=True,
+        autoplay=True
+    )
+    generate_inputs = [text_input, speed_slider, voice_dropdown, steps_slider]
+    generate_btn.click(
+        fn=supertonic_tts,
+        inputs=generate_inputs,
+        outputs=audio_output,
+        api_name="generate_speech"
+    )
+    text_input.submit(
+        fn=supertonic_tts,
+        inputs=generate_inputs,
+        outputs=audio_output,
+        api_name="generate_speech_enter"
+    )
+if __name__ == "__main__":
+    demo.queue().launch()