alex commited on
Commit
bac2101
·
1 Parent(s): bbee8a4

minor cleanup

Browse files
Files changed (3) hide show
  1. app.py +8 -12
  2. lipsync.py +1 -1
  3. requirements.txt +6 -8
app.py CHANGED
@@ -4,12 +4,6 @@ from huggingface_hub import snapshot_download, hf_hub_download
4
 
5
  def sh(cmd): subprocess.check_call(cmd, shell=True)
6
 
7
- snapshot_download(
8
- repo_id = "alexnasa/outofsync",
9
- local_dir = "./outofsync"
10
- )
11
-
12
- sh("cd outofsync && pip install . && cd ..")
13
  sh("pip uninstall onnxruntime onnxruntime-gpu -y && pip install onnxruntime-gpu")
14
 
15
  import os
@@ -94,7 +88,6 @@ snapshot_download("IndexTeam/IndexTTS-2", local_dir=os.path.join(current_dir,"ch
94
 
95
  dnr_model = tigersound.look2hear.models.TIGERDNR.from_pretrained("JusperLee/TIGER-DnR").to("cuda").eval()
96
 
97
- sh(f"pip install --no-deps git+https://github.com/OutofAi/index-tts.git")
98
 
99
  from indextts.infer_v2 import IndexTTS2
100
 
@@ -1030,7 +1023,7 @@ with gr.Blocks(css=css) as demo:
1030
  """
1031
  <div style="text-align: center;">
1032
  <p style="font-size:16px; display: inline; margin: 0;">
1033
- Translate and lipsync your clips to English
1034
  </p>
1035
  </div>
1036
  <div style="text-align: center;">
@@ -1052,7 +1045,7 @@ with gr.Blocks(css=css) as demo:
1052
  with gr.Column(elem_id="step-column"):
1053
  gr.HTML("""
1054
  <div>
1055
- <span style="font-size: 24px;">1. Upload a Video</span><br>
1056
  </div>
1057
  """)
1058
 
@@ -1064,7 +1057,11 @@ with gr.Blocks(css=css) as demo:
1064
 
1065
  uncached_examples = gr.Examples(
1066
  examples=[
1067
-
 
 
 
 
1068
  [
1069
  "assets/sofia-esp.mp4",
1070
  ],
@@ -1142,7 +1139,6 @@ with gr.Blocks(css=css) as demo:
1142
 
1143
 
1144
  ],
1145
- label="Cached Examples",
1146
  fn=run_example,
1147
  inputs=[video_input, lipsync, duration],
1148
  outputs=[video_output, srt_output, vocal_16k_output],
@@ -1166,4 +1162,4 @@ with gr.Blocks(css=css) as demo:
1166
  if __name__ == "__main__":
1167
  demo.unload(cleanup)
1168
  demo.queue()
1169
- demo.launch()
 
4
 
5
  def sh(cmd): subprocess.check_call(cmd, shell=True)
6
 
 
 
 
 
 
 
7
  sh("pip uninstall onnxruntime onnxruntime-gpu -y && pip install onnxruntime-gpu")
8
 
9
  import os
 
88
 
89
  dnr_model = tigersound.look2hear.models.TIGERDNR.from_pretrained("JusperLee/TIGER-DnR").to("cuda").eval()
90
 
 
91
 
92
  from indextts.infer_v2 import IndexTTS2
93
 
 
1023
  """
1024
  <div style="text-align: center;">
1025
  <p style="font-size:16px; display: inline; margin: 0;">
1026
+ Translate and lipsync your clips from any language to English
1027
  </p>
1028
  </div>
1029
  <div style="text-align: center;">
 
1045
  with gr.Column(elem_id="step-column"):
1046
  gr.HTML("""
1047
  <div>
1048
+ <span style="font-size: 24px;">1. Upload or Record a Video</span><br>
1049
  </div>
1050
  """)
1051
 
 
1057
 
1058
  uncached_examples = gr.Examples(
1059
  examples=[
1060
+
1061
+ [
1062
+ "assets/popup-2.mp4",
1063
+ ],
1064
+
1065
  [
1066
  "assets/sofia-esp.mp4",
1067
  ],
 
1139
 
1140
 
1141
  ],
 
1142
  fn=run_example,
1143
  inputs=[video_input, lipsync, duration],
1144
  outputs=[video_output, srt_output, vocal_16k_output],
 
1162
  if __name__ == "__main__":
1163
  demo.unload(cleanup)
1164
  demo.queue()
1165
+ demo.launch(ssr_mode=False)
lipsync.py CHANGED
@@ -1,5 +1,5 @@
1
 
2
- from outofsync.latentsync.pipelines.lipsync_pipeline import LipsyncPipeline
3
  import torch
4
 
5
  pipeline = LipsyncPipeline(
 
1
 
2
+ from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline
3
  import torch
4
 
5
  pipeline = LipsyncPipeline(
requirements.txt CHANGED
@@ -4,11 +4,12 @@ torchvision==0.23.0
4
  triton
5
  deepspeed==0.17.1
6
  flash-attn-3 @ https://huggingface.co/alexnasa/flash-attn-3/resolve/main/128/flash_attn_3-3.0.0b1-cp39-abi3-linux_x86_64.whl
 
 
7
 
8
- # --- Whisper / ASR / Audio Processing ---
9
  pyannote.audio
10
  ctranslate2
11
- faster-whisper[cuda12x]
12
  pydub==0.25.1
13
  srt
14
  audio-separator==0.24.1
@@ -16,20 +17,18 @@ ffmpeg-python==0.2.0
16
  python_speech_features==0.6
17
  librosa==0.10.2.post1 # higher than 0.10.1
18
 
19
- # --- NLP / Transformers / ML Utils ---
20
- accelerate==1.8.1 # higher than 0.26.1
21
- transformers==4.52.3 # higher than 4.52.1
22
  tokenizers==0.21.0
23
  sentencepiece
24
  g2p-en==2.1.0
25
  cn2an==0.5.22
26
  textstat
27
- omegaconf==2.3.0 # explicit highest version
28
  munch==4.0.0
29
  tqdm
30
  json5==0.10.0
31
 
32
- # --- Vision / Diffusion / Media ---
33
  diffusers==0.33.1
34
  huggingface-hub<1.0
35
  imageio==2.27.0
@@ -48,7 +47,6 @@ moviepy==1.0.3
48
 
49
  # --- Numerical / Scientific ---
50
  numpy==1.26.2 # higher than 1.24.4
51
- pandas==2.1.3 # higher than 2.0.3
52
  matplotlib==3.8.2
53
  numba==0.58.1
54
  Cython==3.0.7
 
4
  triton
5
  deepspeed==0.17.1
6
  flash-attn-3 @ https://huggingface.co/alexnasa/flash-attn-3/resolve/main/128/flash_attn_3-3.0.0b1-cp39-abi3-linux_x86_64.whl
7
+ indextts @ git+https://github.com/OutofAi/index-tts-batch.git
8
+ latentsync @ git+https://github.com/OutofAi/LatentSync-batch.git
9
 
 
10
  pyannote.audio
11
  ctranslate2
12
+ faster-whisper
13
  pydub==0.25.1
14
  srt
15
  audio-separator==0.24.1
 
17
  python_speech_features==0.6
18
  librosa==0.10.2.post1 # higher than 0.10.1
19
 
20
+ accelerate==1.8.1
21
+ transformers==4.52.3
 
22
  tokenizers==0.21.0
23
  sentencepiece
24
  g2p-en==2.1.0
25
  cn2an==0.5.22
26
  textstat
27
+ omegaconf==2.3.0
28
  munch==4.0.0
29
  tqdm
30
  json5==0.10.0
31
 
 
32
  diffusers==0.33.1
33
  huggingface-hub<1.0
34
  imageio==2.27.0
 
47
 
48
  # --- Numerical / Scientific ---
49
  numpy==1.26.2 # higher than 1.24.4
 
50
  matplotlib==3.8.2
51
  numba==0.58.1
52
  Cython==3.0.7