Spaces:
Sleeping
Sleeping
alex
commited on
Commit
·
bac2101
1
Parent(s):
bbee8a4
minor cleanup
Browse files- app.py +8 -12
- lipsync.py +1 -1
- requirements.txt +6 -8
app.py
CHANGED
|
@@ -4,12 +4,6 @@ from huggingface_hub import snapshot_download, hf_hub_download
|
|
| 4 |
|
| 5 |
def sh(cmd): subprocess.check_call(cmd, shell=True)
|
| 6 |
|
| 7 |
-
snapshot_download(
|
| 8 |
-
repo_id = "alexnasa/outofsync",
|
| 9 |
-
local_dir = "./outofsync"
|
| 10 |
-
)
|
| 11 |
-
|
| 12 |
-
sh("cd outofsync && pip install . && cd ..")
|
| 13 |
sh("pip uninstall onnxruntime onnxruntime-gpu -y && pip install onnxruntime-gpu")
|
| 14 |
|
| 15 |
import os
|
|
@@ -94,7 +88,6 @@ snapshot_download("IndexTeam/IndexTTS-2", local_dir=os.path.join(current_dir,"ch
|
|
| 94 |
|
| 95 |
dnr_model = tigersound.look2hear.models.TIGERDNR.from_pretrained("JusperLee/TIGER-DnR").to("cuda").eval()
|
| 96 |
|
| 97 |
-
sh(f"pip install --no-deps git+https://github.com/OutofAi/index-tts.git")
|
| 98 |
|
| 99 |
from indextts.infer_v2 import IndexTTS2
|
| 100 |
|
|
@@ -1030,7 +1023,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 1030 |
"""
|
| 1031 |
<div style="text-align: center;">
|
| 1032 |
<p style="font-size:16px; display: inline; margin: 0;">
|
| 1033 |
-
Translate and lipsync your clips to English
|
| 1034 |
</p>
|
| 1035 |
</div>
|
| 1036 |
<div style="text-align: center;">
|
|
@@ -1052,7 +1045,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 1052 |
with gr.Column(elem_id="step-column"):
|
| 1053 |
gr.HTML("""
|
| 1054 |
<div>
|
| 1055 |
-
<span style="font-size: 24px;">1. Upload a Video</span><br>
|
| 1056 |
</div>
|
| 1057 |
""")
|
| 1058 |
|
|
@@ -1064,7 +1057,11 @@ with gr.Blocks(css=css) as demo:
|
|
| 1064 |
|
| 1065 |
uncached_examples = gr.Examples(
|
| 1066 |
examples=[
|
| 1067 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1068 |
[
|
| 1069 |
"assets/sofia-esp.mp4",
|
| 1070 |
],
|
|
@@ -1142,7 +1139,6 @@ with gr.Blocks(css=css) as demo:
|
|
| 1142 |
|
| 1143 |
|
| 1144 |
],
|
| 1145 |
-
label="Cached Examples",
|
| 1146 |
fn=run_example,
|
| 1147 |
inputs=[video_input, lipsync, duration],
|
| 1148 |
outputs=[video_output, srt_output, vocal_16k_output],
|
|
@@ -1166,4 +1162,4 @@ with gr.Blocks(css=css) as demo:
|
|
| 1166 |
if __name__ == "__main__":
|
| 1167 |
demo.unload(cleanup)
|
| 1168 |
demo.queue()
|
| 1169 |
-
demo.launch()
|
|
|
|
| 4 |
|
| 5 |
def sh(cmd): subprocess.check_call(cmd, shell=True)
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
sh("pip uninstall onnxruntime onnxruntime-gpu -y && pip install onnxruntime-gpu")
|
| 8 |
|
| 9 |
import os
|
|
|
|
| 88 |
|
| 89 |
dnr_model = tigersound.look2hear.models.TIGERDNR.from_pretrained("JusperLee/TIGER-DnR").to("cuda").eval()
|
| 90 |
|
|
|
|
| 91 |
|
| 92 |
from indextts.infer_v2 import IndexTTS2
|
| 93 |
|
|
|
|
| 1023 |
"""
|
| 1024 |
<div style="text-align: center;">
|
| 1025 |
<p style="font-size:16px; display: inline; margin: 0;">
|
| 1026 |
+
Translate and lipsync your clips from any language to English
|
| 1027 |
</p>
|
| 1028 |
</div>
|
| 1029 |
<div style="text-align: center;">
|
|
|
|
| 1045 |
with gr.Column(elem_id="step-column"):
|
| 1046 |
gr.HTML("""
|
| 1047 |
<div>
|
| 1048 |
+
<span style="font-size: 24px;">1. Upload or Record a Video</span><br>
|
| 1049 |
</div>
|
| 1050 |
""")
|
| 1051 |
|
|
|
|
| 1057 |
|
| 1058 |
uncached_examples = gr.Examples(
|
| 1059 |
examples=[
|
| 1060 |
+
|
| 1061 |
+
[
|
| 1062 |
+
"assets/popup-2.mp4",
|
| 1063 |
+
],
|
| 1064 |
+
|
| 1065 |
[
|
| 1066 |
"assets/sofia-esp.mp4",
|
| 1067 |
],
|
|
|
|
| 1139 |
|
| 1140 |
|
| 1141 |
],
|
|
|
|
| 1142 |
fn=run_example,
|
| 1143 |
inputs=[video_input, lipsync, duration],
|
| 1144 |
outputs=[video_output, srt_output, vocal_16k_output],
|
|
|
|
| 1162 |
if __name__ == "__main__":
|
| 1163 |
demo.unload(cleanup)
|
| 1164 |
demo.queue()
|
| 1165 |
+
demo.launch(ssr_mode=False)
|
lipsync.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
|
| 2 |
-
from
|
| 3 |
import torch
|
| 4 |
|
| 5 |
pipeline = LipsyncPipeline(
|
|
|
|
| 1 |
|
| 2 |
+
from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline
|
| 3 |
import torch
|
| 4 |
|
| 5 |
pipeline = LipsyncPipeline(
|
requirements.txt
CHANGED
|
@@ -4,11 +4,12 @@ torchvision==0.23.0
|
|
| 4 |
triton
|
| 5 |
deepspeed==0.17.1
|
| 6 |
flash-attn-3 @ https://huggingface.co/alexnasa/flash-attn-3/resolve/main/128/flash_attn_3-3.0.0b1-cp39-abi3-linux_x86_64.whl
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
# --- Whisper / ASR / Audio Processing ---
|
| 9 |
pyannote.audio
|
| 10 |
ctranslate2
|
| 11 |
-
faster-whisper
|
| 12 |
pydub==0.25.1
|
| 13 |
srt
|
| 14 |
audio-separator==0.24.1
|
|
@@ -16,20 +17,18 @@ ffmpeg-python==0.2.0
|
|
| 16 |
python_speech_features==0.6
|
| 17 |
librosa==0.10.2.post1 # higher than 0.10.1
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
transformers==4.52.3 # higher than 4.52.1
|
| 22 |
tokenizers==0.21.0
|
| 23 |
sentencepiece
|
| 24 |
g2p-en==2.1.0
|
| 25 |
cn2an==0.5.22
|
| 26 |
textstat
|
| 27 |
-
omegaconf==2.3.0
|
| 28 |
munch==4.0.0
|
| 29 |
tqdm
|
| 30 |
json5==0.10.0
|
| 31 |
|
| 32 |
-
# --- Vision / Diffusion / Media ---
|
| 33 |
diffusers==0.33.1
|
| 34 |
huggingface-hub<1.0
|
| 35 |
imageio==2.27.0
|
|
@@ -48,7 +47,6 @@ moviepy==1.0.3
|
|
| 48 |
|
| 49 |
# --- Numerical / Scientific ---
|
| 50 |
numpy==1.26.2 # higher than 1.24.4
|
| 51 |
-
pandas==2.1.3 # higher than 2.0.3
|
| 52 |
matplotlib==3.8.2
|
| 53 |
numba==0.58.1
|
| 54 |
Cython==3.0.7
|
|
|
|
| 4 |
triton
|
| 5 |
deepspeed==0.17.1
|
| 6 |
flash-attn-3 @ https://huggingface.co/alexnasa/flash-attn-3/resolve/main/128/flash_attn_3-3.0.0b1-cp39-abi3-linux_x86_64.whl
|
| 7 |
+
indextts @ git+https://github.com/OutofAi/index-tts-batch.git
|
| 8 |
+
latentsync @ git+https://github.com/OutofAi/LatentSync-batch.git
|
| 9 |
|
|
|
|
| 10 |
pyannote.audio
|
| 11 |
ctranslate2
|
| 12 |
+
faster-whisper
|
| 13 |
pydub==0.25.1
|
| 14 |
srt
|
| 15 |
audio-separator==0.24.1
|
|
|
|
| 17 |
python_speech_features==0.6
|
| 18 |
librosa==0.10.2.post1 # higher than 0.10.1
|
| 19 |
|
| 20 |
+
accelerate==1.8.1
|
| 21 |
+
transformers==4.52.3
|
|
|
|
| 22 |
tokenizers==0.21.0
|
| 23 |
sentencepiece
|
| 24 |
g2p-en==2.1.0
|
| 25 |
cn2an==0.5.22
|
| 26 |
textstat
|
| 27 |
+
omegaconf==2.3.0
|
| 28 |
munch==4.0.0
|
| 29 |
tqdm
|
| 30 |
json5==0.10.0
|
| 31 |
|
|
|
|
| 32 |
diffusers==0.33.1
|
| 33 |
huggingface-hub<1.0
|
| 34 |
imageio==2.27.0
|
|
|
|
| 47 |
|
| 48 |
# --- Numerical / Scientific ---
|
| 49 |
numpy==1.26.2 # higher than 1.24.4
|
|
|
|
| 50 |
matplotlib==3.8.2
|
| 51 |
numba==0.58.1
|
| 52 |
Cython==3.0.7
|