OutofLipSync

Sleeping

App Files Files Community

alex commited on Dec 17, 2025

Commit

bac2101

1 Parent(s): bbee8a4

minor cleanup

Browse files

Files changed (3) hide show

app.py +8 -12
lipsync.py +1 -1
requirements.txt +6 -8

app.py CHANGED Viewed

@@ -4,12 +4,6 @@ from huggingface_hub import snapshot_download, hf_hub_download
 def sh(cmd): subprocess.check_call(cmd, shell=True)
-snapshot_download(
-    repo_id = "alexnasa/outofsync",
-    local_dir = "./outofsync"
-)
-sh("cd outofsync && pip install . && cd ..")
 sh("pip uninstall onnxruntime onnxruntime-gpu -y && pip install onnxruntime-gpu")
 import os
@@ -94,7 +88,6 @@ snapshot_download("IndexTeam/IndexTTS-2", local_dir=os.path.join(current_dir,"ch
 dnr_model = tigersound.look2hear.models.TIGERDNR.from_pretrained("JusperLee/TIGER-DnR").to("cuda").eval()
-sh(f"pip install --no-deps git+https://github.com/OutofAi/index-tts.git")
 from indextts.infer_v2 import IndexTTS2
@@ -1030,7 +1023,7 @@ with gr.Blocks(css=css) as demo:
             """
             <div style="text-align: center;">
                 <p style="font-size:16px; display: inline; margin: 0;">
-                    Translate and lipsync your clips to English
                 </p>
             </div>
             <div style="text-align: center;">
@@ -1052,7 +1045,7 @@ with gr.Blocks(css=css) as demo:
             with gr.Column(elem_id="step-column"):
                 gr.HTML("""
                 <div>
-                    <span style="font-size: 24px;">1. Upload a Video</span><br>
                 </div>
                 """)
@@ -1064,7 +1057,11 @@ with gr.Blocks(css=css) as demo:
                 uncached_examples = gr.Examples(
                     examples=[
                         [
                             "assets/sofia-esp.mp4",
                         ],
@@ -1142,7 +1139,6 @@ with gr.Blocks(css=css) as demo:
                     ],
-                    label="Cached Examples",
                     fn=run_example,
                     inputs=[video_input, lipsync, duration],
                     outputs=[video_output, srt_output, vocal_16k_output],
@@ -1166,4 +1162,4 @@ with gr.Blocks(css=css) as demo:
 if __name__ == "__main__":
     demo.unload(cleanup)
     demo.queue()
-    demo.launch()

 def sh(cmd): subprocess.check_call(cmd, shell=True)
 sh("pip uninstall onnxruntime onnxruntime-gpu -y && pip install onnxruntime-gpu")
 import os
 dnr_model = tigersound.look2hear.models.TIGERDNR.from_pretrained("JusperLee/TIGER-DnR").to("cuda").eval()
 from indextts.infer_v2 import IndexTTS2
             """
             <div style="text-align: center;">
                 <p style="font-size:16px; display: inline; margin: 0;">
+                    Translate and lipsync your clips from any language to English
                 </p>
             </div>
             <div style="text-align: center;">
             with gr.Column(elem_id="step-column"):
                 gr.HTML("""
                 <div>
+                    <span style="font-size: 24px;">1. Upload or Record a Video</span><br>
                 </div>
                 """)
                 uncached_examples = gr.Examples(
                     examples=[
+                        [
+                            "assets/popup-2.mp4",
+                        ],
                         [
                             "assets/sofia-esp.mp4",
                         ],
                     ],
                     fn=run_example,
                     inputs=[video_input, lipsync, duration],
                     outputs=[video_output, srt_output, vocal_16k_output],
 if __name__ == "__main__":
     demo.unload(cleanup)
     demo.queue()
+    demo.launch(ssr_mode=False)

lipsync.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from outofsync.latentsync.pipelines.lipsync_pipeline import LipsyncPipeline
 import torch
 pipeline = LipsyncPipeline(

+from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline
 import torch
 pipeline = LipsyncPipeline(

requirements.txt CHANGED Viewed

@@ -4,11 +4,12 @@ torchvision==0.23.0
 triton
 deepspeed==0.17.1
 flash-attn-3 @ https://huggingface.co/alexnasa/flash-attn-3/resolve/main/128/flash_attn_3-3.0.0b1-cp39-abi3-linux_x86_64.whl
-# --- Whisper / ASR / Audio Processing ---
 pyannote.audio
 ctranslate2
-faster-whisper[cuda12x]
 pydub==0.25.1
 srt
 audio-separator==0.24.1
@@ -16,20 +17,18 @@ ffmpeg-python==0.2.0
 python_speech_features==0.6
 librosa==0.10.2.post1  # higher than 0.10.1
-# --- NLP / Transformers / ML Utils ---
-accelerate==1.8.1        # higher than 0.26.1
-transformers==4.52.3     # higher than 4.52.1
 tokenizers==0.21.0
 sentencepiece
 g2p-en==2.1.0
 cn2an==0.5.22
 textstat
-omegaconf==2.3.0          # explicit highest version
 munch==4.0.0
 tqdm
 json5==0.10.0
-# --- Vision / Diffusion / Media ---
 diffusers==0.33.1
 huggingface-hub<1.0
 imageio==2.27.0
@@ -48,7 +47,6 @@ moviepy==1.0.3
 # --- Numerical / Scientific ---
 numpy==1.26.2   # higher than 1.24.4
-pandas==2.1.3   # higher than 2.0.3
 matplotlib==3.8.2
 numba==0.58.1
 Cython==3.0.7

 triton
 deepspeed==0.17.1
 flash-attn-3 @ https://huggingface.co/alexnasa/flash-attn-3/resolve/main/128/flash_attn_3-3.0.0b1-cp39-abi3-linux_x86_64.whl
+indextts @ git+https://github.com/OutofAi/index-tts-batch.git
+latentsync @ git+https://github.com/OutofAi/LatentSync-batch.git
 pyannote.audio
 ctranslate2
+faster-whisper
 pydub==0.25.1
 srt
 audio-separator==0.24.1
 python_speech_features==0.6
 librosa==0.10.2.post1  # higher than 0.10.1
+accelerate==1.8.1
+transformers==4.52.3
 tokenizers==0.21.0
 sentencepiece
 g2p-en==2.1.0
 cn2an==0.5.22
 textstat
+omegaconf==2.3.0
 munch==4.0.0
 tqdm
 json5==0.10.0
 diffusers==0.33.1
 huggingface-hub<1.0
 imageio==2.27.0
 # --- Numerical / Scientific ---
 numpy==1.26.2   # higher than 1.24.4
 matplotlib==3.8.2
 numba==0.58.1
 Cython==3.0.7