|
|
import gradio as gr |
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
from data_loaders import ( |
|
|
load_language_list, load_language_taxonomy, load_common_voice_data, |
|
|
load_app_content, get_common_voice_stats |
|
|
) |
|
|
from commercial_services import ( |
|
|
fetch_azure_asr_languages, fetch_azure_tts_languages, |
|
|
fetch_google_stt_languages, fetch_google_tts_languages, |
|
|
fetch_aws_transcribe_languages, fetch_aws_polly_languages, |
|
|
get_azure_locales_for_language, get_google_locales_for_language, |
|
|
get_aws_locales_for_language, |
|
|
check_elevenlabs_multilingual_v2_support, check_elevenlabs_turbo_v3_support |
|
|
) |
|
|
from huggingface_search import ( |
|
|
search_huggingface_models, search_huggingface_datasets, deduplicate_models |
|
|
) |
|
|
from language_metadata import get_language_metadata_html, get_default_metadata_html |
|
|
|
|
|
|
|
|
LANGUAGE_CODES_FILE = "language-codes-full.csv" |
|
|
APP_CONTENT_FILE = "app_content.md" |
|
|
LANGUAGE_TAXONOMY_URL = "https://microsoft.github.io/linguisticdiversity/assets/lang2tax.txt" |
|
|
COMMON_VOICE_DATA_FILE = "cv-corpus-24.0-2025-12-05.json" |
|
|
COMMON_VOICE_VERSION = "24.0 (2025-12-05)" |
|
|
|
|
|
|
|
|
|
|
|
LANGUAGES = {} |
|
|
|
|
|
|
|
|
|
|
|
LANGUAGE_TAXONOMY = {} |
|
|
|
|
|
|
|
|
|
|
|
COMMON_VOICE_DATA = {} |
|
|
|
|
|
|
|
|
TAXONOMY_LEVELS = { |
|
|
0: "The Left-Behinds", |
|
|
1: "The Scraping-Bys", |
|
|
2: "The Hopefuls", |
|
|
3: "The Rising Stars", |
|
|
4: "The Underdogs", |
|
|
5: "The Winners" |
|
|
} |
|
|
|
|
|
|
|
|
APP_CONTENT = { |
|
|
"title": "Speech Resource Finder", |
|
|
"description": "Search for speech resources", |
|
|
"full_content": "" |
|
|
} |
|
|
|
|
|
def search_language_resources(language_code, deduplicate=False): |
|
|
""" |
|
|
Search for ASR/TTS resources for a given language |
|
|
Returns results organized by service type |
|
|
deduplicate: if True, remove duplicate models (same base name) and keep only the one with most downloads |
|
|
""" |
|
|
all_logs = [] |
|
|
|
|
|
if not language_code: |
|
|
return None, None, None, None, 0, 0, None, None, 0, 0, "" |
|
|
|
|
|
lang_info = LANGUAGES.get(language_code) |
|
|
if not lang_info: |
|
|
return None, None, None, None, 0, 0, None, None, 0, 0, "" |
|
|
|
|
|
language_name = lang_info['name'] |
|
|
iso_639_1 = lang_info['iso_639_1'] |
|
|
iso_639_2 = language_code |
|
|
|
|
|
all_logs.append(f"=== Searching for {language_name} ({language_code}) ===") |
|
|
all_logs.append(f"Language codes: ISO 639-1={iso_639_1}, ISO 639-2={iso_639_2}") |
|
|
|
|
|
|
|
|
all_logs.append("\n[Common Voice Dataset]") |
|
|
cv_stats = get_common_voice_stats(iso_639_2, iso_639_1, COMMON_VOICE_DATA) |
|
|
if cv_stats: |
|
|
all_logs.append(f" β
Available in Common Voice (locale: {cv_stats['locale']})") |
|
|
all_logs.append(f" Valid hours: {cv_stats['valid_hrs']:.1f}h, Total hours: {cv_stats['total_hrs']:.1f}h") |
|
|
all_logs.append(f" Gender balance: {cv_stats['male_pct']:.1f}% male, {cv_stats['female_pct']:.1f}% female") |
|
|
else: |
|
|
all_logs.append(f" β Not available in Common Voice") |
|
|
|
|
|
|
|
|
all_logs.append("\n[Azure Speech Services]") |
|
|
azure_asr = fetch_azure_asr_languages() |
|
|
azure_tts = fetch_azure_tts_languages() |
|
|
all_logs.append(f" Fetched {len(azure_asr)} ASR languages and {len(azure_tts)} TTS languages from Azure") |
|
|
|
|
|
|
|
|
azure_locales = get_azure_locales_for_language(iso_639_1) |
|
|
all_logs.append(f" Matching Azure locales: {azure_locales}") |
|
|
|
|
|
|
|
|
azure_asr_locales = [loc for loc in azure_locales if loc in azure_asr] |
|
|
azure_asr_available = len(azure_asr_locales) > 0 |
|
|
all_logs.append(f" Azure ASR: {'β
Supported' if azure_asr_available else 'β Not supported'} ({len(azure_asr_locales)} locales)") |
|
|
|
|
|
|
|
|
azure_tts_locales = [loc for loc in azure_locales if loc in azure_tts] |
|
|
azure_tts_available = len(azure_tts_locales) > 0 |
|
|
azure_total_voices = sum(azure_tts[loc]['voice_count'] for loc in azure_tts_locales) |
|
|
all_logs.append(f" Azure TTS: {'β
Supported' if azure_tts_available else 'β Not supported'} ({len(azure_tts_locales)} locales, {azure_total_voices} voices)") |
|
|
|
|
|
|
|
|
all_logs.append("\n[Google Cloud Speech]") |
|
|
google_stt = fetch_google_stt_languages() |
|
|
google_tts = fetch_google_tts_languages() |
|
|
all_logs.append(f" Fetched {len(google_stt)} STT languages and {len(google_tts)} TTS languages from Google Cloud") |
|
|
|
|
|
|
|
|
google_locales = get_google_locales_for_language(iso_639_1) |
|
|
all_logs.append(f" Matching Google Cloud locales: {google_locales}") |
|
|
|
|
|
|
|
|
google_stt_locales = [loc for loc in google_locales if loc in google_stt] |
|
|
google_stt_available = len(google_stt_locales) > 0 |
|
|
all_logs.append(f" Google STT: {'β
Supported' if google_stt_available else 'β Not supported'} ({len(google_stt_locales)} locales)") |
|
|
|
|
|
|
|
|
google_tts_locales = [loc for loc in google_locales if loc in google_tts] |
|
|
google_tts_available = len(google_tts_locales) > 0 |
|
|
google_total_voices = sum(google_tts[loc]['voice_count'] for loc in google_tts_locales) |
|
|
all_logs.append(f" Google TTS: {'β
Supported' if google_tts_available else 'β Not supported'} ({len(google_tts_locales)} locales, {google_total_voices} voices)") |
|
|
|
|
|
|
|
|
all_logs.append("\n[AWS (Transcribe + Polly)]") |
|
|
aws_transcribe = fetch_aws_transcribe_languages() |
|
|
aws_polly = fetch_aws_polly_languages() |
|
|
all_logs.append(f" Fetched {len(aws_transcribe)} Transcribe languages and {len(aws_polly)} Polly languages from AWS") |
|
|
|
|
|
|
|
|
aws_locales = get_aws_locales_for_language(iso_639_1) |
|
|
all_logs.append(f" Matching AWS locales: {aws_locales}") |
|
|
|
|
|
|
|
|
aws_transcribe_locales = [loc for loc in aws_locales if loc in aws_transcribe] |
|
|
aws_transcribe_available = len(aws_transcribe_locales) > 0 |
|
|
all_logs.append(f" AWS Transcribe: {'β
Supported' if aws_transcribe_available else 'β Not supported'} ({len(aws_transcribe_locales)} locales)") |
|
|
|
|
|
|
|
|
aws_polly_locales = [loc for loc in aws_locales if loc in aws_polly] |
|
|
aws_polly_available = len(aws_polly_locales) > 0 |
|
|
aws_total_voices = sum(aws_polly[loc]['voice_count'] for loc in aws_polly_locales) |
|
|
all_logs.append(f" AWS Polly: {'β
Supported' if aws_polly_available else 'β Not supported'} ({len(aws_polly_locales)} locales, {aws_total_voices} voices)") |
|
|
|
|
|
|
|
|
commercial_rows = [] |
|
|
|
|
|
|
|
|
if azure_asr_available: |
|
|
azure_asr_text = f"β
{len(azure_asr_locales)} locale(s)" |
|
|
else: |
|
|
azure_asr_text = "β N/A" |
|
|
|
|
|
if azure_tts_available: |
|
|
azure_tts_text = f"β
{len(azure_tts_locales)} locale(s), {azure_total_voices} voice(s)" |
|
|
else: |
|
|
azure_tts_text = "β N/A" |
|
|
|
|
|
commercial_rows.append({ |
|
|
"Service": "Azure Speech", |
|
|
"ASR": azure_asr_text, |
|
|
"TTS": azure_tts_text, |
|
|
}) |
|
|
|
|
|
|
|
|
if google_stt_available: |
|
|
google_stt_text = f"β
{len(google_stt_locales)} locale(s)" |
|
|
else: |
|
|
google_stt_text = "β N/A" |
|
|
|
|
|
if google_tts_available: |
|
|
google_tts_text = f"β
{len(google_tts_locales)} locale(s), {google_total_voices} voice(s)" |
|
|
else: |
|
|
google_tts_text = "β N/A" |
|
|
|
|
|
commercial_rows.append({ |
|
|
"Service": "Google Cloud Speech", |
|
|
"ASR": google_stt_text, |
|
|
"TTS": google_tts_text, |
|
|
}) |
|
|
|
|
|
|
|
|
if aws_transcribe_available: |
|
|
aws_transcribe_text = f"β
{len(aws_transcribe_locales)} locale(s)" |
|
|
else: |
|
|
aws_transcribe_text = "β N/A" |
|
|
|
|
|
if aws_polly_available: |
|
|
aws_polly_text = f"β
{len(aws_polly_locales)} locale(s), {aws_total_voices} voice(s)" |
|
|
else: |
|
|
aws_polly_text = "β N/A" |
|
|
|
|
|
commercial_rows.append({ |
|
|
"Service": "AWS (Transcribe + Polly)", |
|
|
"ASR": aws_transcribe_text, |
|
|
"TTS": aws_polly_text, |
|
|
}) |
|
|
|
|
|
|
|
|
all_logs.append("\n[ElevenLabs]") |
|
|
elevenlabs_v2_supported = check_elevenlabs_multilingual_v2_support(iso_639_1) |
|
|
all_logs.append(f" Multilingual v2: {'β
Supported' if elevenlabs_v2_supported else 'β Not supported'}") |
|
|
|
|
|
if elevenlabs_v2_supported: |
|
|
elevenlabs_v2_tts_text = "β
Supported" |
|
|
else: |
|
|
elevenlabs_v2_tts_text = "β N/A" |
|
|
|
|
|
commercial_rows.append({ |
|
|
"Service": "ElevenLabs Multilingual v2", |
|
|
"ASR": "N/A", |
|
|
"TTS": elevenlabs_v2_tts_text, |
|
|
}) |
|
|
|
|
|
|
|
|
elevenlabs_v3_supported = check_elevenlabs_turbo_v3_support(iso_639_2) |
|
|
all_logs.append(f" Turbo v3: {'β
Supported' if elevenlabs_v3_supported else 'β Not supported'}") |
|
|
|
|
|
if elevenlabs_v3_supported: |
|
|
elevenlabs_v3_tts_text = "β
Supported" |
|
|
else: |
|
|
elevenlabs_v3_tts_text = "β N/A" |
|
|
|
|
|
commercial_rows.append({ |
|
|
"Service": "ElevenLabs Turbo v3", |
|
|
"ASR": "N/A", |
|
|
"TTS": elevenlabs_v3_tts_text, |
|
|
}) |
|
|
|
|
|
commercial_df = pd.DataFrame(commercial_rows) |
|
|
|
|
|
|
|
|
all_logs.append("\n[HuggingFace Models]") |
|
|
|
|
|
asr_models, asr_model_logs = search_huggingface_models(iso_639_1, iso_639_2, 'automatic-speech-recognition', max_results=100, max_pages=5) |
|
|
all_logs.extend([f" [ASR] {log}" for log in asr_model_logs]) |
|
|
|
|
|
tts_models, tts_model_logs = search_huggingface_models(iso_639_1, iso_639_2, 'text-to-speech', max_results=100, max_pages=5) |
|
|
all_logs.extend([f" [TTS] {log}" for log in tts_model_logs]) |
|
|
|
|
|
|
|
|
if deduplicate: |
|
|
all_logs.append(f"\n[Deduplication]") |
|
|
asr_before = len(asr_models) |
|
|
asr_models = deduplicate_models(asr_models) |
|
|
all_logs.append(f" ASR models: {asr_before} β {len(asr_models)} (removed {asr_before - len(asr_models)} duplicates)") |
|
|
|
|
|
tts_before = len(tts_models) |
|
|
tts_models = deduplicate_models(tts_models) |
|
|
all_logs.append(f" TTS models: {tts_before} β {len(tts_models)} (removed {tts_before - len(tts_models)} duplicates)") |
|
|
else: |
|
|
|
|
|
for model in asr_models: |
|
|
model['duplicates'] = 1 |
|
|
for model in tts_models: |
|
|
model['duplicates'] = 1 |
|
|
|
|
|
|
|
|
asr_models_data = [] |
|
|
for model in asr_models: |
|
|
asr_models_data.append({ |
|
|
"Model Name": f"[{model['name']}]({model['url']})", |
|
|
"Downloads": model['downloads'], |
|
|
"Likes": model['likes'], |
|
|
"Size": model.get('size', ''), |
|
|
"Duplicates": model.get('duplicates', 1) |
|
|
}) |
|
|
|
|
|
if asr_models_data: |
|
|
asr_models_df = pd.DataFrame(asr_models_data) |
|
|
else: |
|
|
|
|
|
asr_models_df = pd.DataFrame(columns=["Model Name", "Downloads", "Likes", "Size", "Duplicates"]) |
|
|
|
|
|
|
|
|
tts_models_data = [] |
|
|
for model in tts_models: |
|
|
tts_models_data.append({ |
|
|
"Model Name": f"[{model['name']}]({model['url']})", |
|
|
"Downloads": model['downloads'], |
|
|
"Likes": model['likes'], |
|
|
"Size": model.get('size', ''), |
|
|
"Duplicates": model.get('duplicates', 1) |
|
|
}) |
|
|
|
|
|
if tts_models_data: |
|
|
tts_models_df = pd.DataFrame(tts_models_data) |
|
|
else: |
|
|
|
|
|
tts_models_df = pd.DataFrame(columns=["Model Name", "Downloads", "Likes", "Size", "Duplicates"]) |
|
|
|
|
|
|
|
|
all_logs.append("\n[HuggingFace Datasets]") |
|
|
asr_datasets, asr_dataset_logs = search_huggingface_datasets(iso_639_1, iso_639_2, 'automatic-speech-recognition', max_results=100, max_pages=5) |
|
|
all_logs.extend([f" [ASR] {log}" for log in asr_dataset_logs]) |
|
|
|
|
|
tts_datasets, tts_dataset_logs = search_huggingface_datasets(iso_639_1, iso_639_2, 'text-to-speech', max_results=100, max_pages=5) |
|
|
all_logs.extend([f" [TTS] {log}" for log in tts_dataset_logs]) |
|
|
|
|
|
|
|
|
asr_datasets_data = [] |
|
|
for dataset in asr_datasets: |
|
|
asr_datasets_data.append({ |
|
|
"Dataset Name": f"[{dataset['name']}]({dataset['url']})", |
|
|
"Downloads": dataset['downloads'], |
|
|
"Likes": dataset['likes'], |
|
|
"Size": dataset.get('size', '') |
|
|
}) |
|
|
|
|
|
if asr_datasets_data: |
|
|
asr_datasets_df = pd.DataFrame(asr_datasets_data) |
|
|
else: |
|
|
|
|
|
asr_datasets_df = pd.DataFrame(columns=["Dataset Name", "Downloads", "Likes", "Size"]) |
|
|
|
|
|
|
|
|
tts_datasets_data = [] |
|
|
for dataset in tts_datasets: |
|
|
tts_datasets_data.append({ |
|
|
"Dataset Name": f"[{dataset['name']}]({dataset['url']})", |
|
|
"Downloads": dataset['downloads'], |
|
|
"Likes": dataset['likes'], |
|
|
"Size": dataset.get('size', '') |
|
|
}) |
|
|
|
|
|
if tts_datasets_data: |
|
|
tts_datasets_df = pd.DataFrame(tts_datasets_data) |
|
|
else: |
|
|
|
|
|
tts_datasets_df = pd.DataFrame(columns=["Dataset Name", "Downloads", "Likes", "Size"]) |
|
|
|
|
|
|
|
|
log_text = "\n".join(all_logs) |
|
|
|
|
|
|
|
|
return cv_stats, commercial_df, asr_models_df, tts_models_df, len(asr_models), len(tts_models), asr_datasets_df, tts_datasets_df, len(asr_datasets), len(tts_datasets), log_text |
|
|
|
|
|
|
|
|
print("Initializing Speech Resource Finder...") |
|
|
APP_CONTENT = load_app_content(APP_CONTENT_FILE) |
|
|
LANGUAGES = load_language_list(LANGUAGE_CODES_FILE) |
|
|
LANGUAGE_TAXONOMY = load_language_taxonomy(LANGUAGE_TAXONOMY_URL) |
|
|
COMMON_VOICE_DATA = load_common_voice_data(COMMON_VOICE_DATA_FILE) |
|
|
|
|
|
|
|
|
language_choices = [f"{code}: {info['name']}" for code, info in sorted(LANGUAGES.items(), key=lambda x: x[1]['name'])] |
|
|
print(f"Created dropdown with {len(language_choices)} language options") |
|
|
|
|
|
with gr.Blocks(title=APP_CONTENT["title"]) as demo: |
|
|
gr.Markdown(f"# π {APP_CONTENT['title']}") |
|
|
gr.Markdown(APP_CONTENT["description"]) |
|
|
|
|
|
with gr.Row(equal_height=True): |
|
|
with gr.Column(scale=70): |
|
|
language_dropdown = gr.Dropdown( |
|
|
choices=language_choices, |
|
|
label="Select Language", |
|
|
info="Type to search for a language", |
|
|
allow_custom_value=False, |
|
|
filterable=True, |
|
|
) |
|
|
with gr.Column(scale=30): |
|
|
language_metadata = gr.HTML( |
|
|
"""<div style='padding: 15px; border: 2px solid #e0e0e0; border-radius: 4px; background-color: #fafafa; height: 100%; display: flex; align-items: center; justify-content: center; box-sizing: border-box;'> |
|
|
<p style='margin: 0; color: #333; font-size: 14px;'>Select a language to see resource classification</p> |
|
|
</div>""", |
|
|
elem_id="language-metadata" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=70): |
|
|
gr.Markdown("## Commercial Services") |
|
|
commercial_table = gr.Dataframe( |
|
|
headers=["Service", "ASR", "TTS"], |
|
|
interactive=False, |
|
|
wrap=True, |
|
|
) |
|
|
|
|
|
with gr.Column(scale=30): |
|
|
gr.Markdown("## Common Voice") |
|
|
cv_info = gr.HTML( |
|
|
"""<div style='padding: 15px; border: 2px solid #e0e0e0; border-radius: 4px; background-color: #fafafa;'> |
|
|
<p style='margin: 0; color: #666; font-size: 13px;'>Select a language</p> |
|
|
</div>""", |
|
|
elem_id="cv-info" |
|
|
) |
|
|
|
|
|
gr.Markdown("## HuggingFace Models") |
|
|
|
|
|
with gr.Row(): |
|
|
deduplicate_checkbox = gr.Checkbox( |
|
|
label="Deduplicate models", |
|
|
value=True, |
|
|
info="Keep only the model with most downloads for each base name" |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Tabs(): |
|
|
with gr.Tab(label="ASR Models") as asr_tab: |
|
|
asr_count_label = gr.Markdown("*Loading...*") |
|
|
asr_models_table = gr.Dataframe( |
|
|
headers=["Model Name", "Downloads", "Likes", "Size", "Duplicates"], |
|
|
interactive=False, |
|
|
wrap=True, |
|
|
datatype=["markdown", "number", "number", "str", "number"], |
|
|
) |
|
|
|
|
|
with gr.Tab(label="TTS Models") as tts_tab: |
|
|
tts_count_label = gr.Markdown("*Loading...*") |
|
|
tts_models_table = gr.Dataframe( |
|
|
headers=["Model Name", "Downloads", "Likes", "Size", "Duplicates"], |
|
|
interactive=False, |
|
|
wrap=True, |
|
|
datatype=["markdown", "number", "number", "str", "number"], |
|
|
) |
|
|
|
|
|
gr.Markdown("## HuggingFace Datasets") |
|
|
|
|
|
|
|
|
with gr.Tabs(): |
|
|
with gr.Tab(label="ASR Datasets") as asr_datasets_tab: |
|
|
asr_datasets_count_label = gr.Markdown("*Loading...*") |
|
|
asr_datasets_table = gr.Dataframe( |
|
|
headers=["Dataset Name", "Downloads", "Likes", "Size"], |
|
|
interactive=False, |
|
|
wrap=True, |
|
|
datatype=["markdown", "number", "number", "str"], |
|
|
) |
|
|
|
|
|
with gr.Tab(label="TTS Datasets") as tts_datasets_tab: |
|
|
tts_datasets_count_label = gr.Markdown("*Loading...*") |
|
|
tts_datasets_table = gr.Dataframe( |
|
|
headers=["Dataset Name", "Downloads", "Likes", "Size"], |
|
|
interactive=False, |
|
|
wrap=True, |
|
|
datatype=["markdown", "number", "number", "str"], |
|
|
) |
|
|
|
|
|
with gr.Accordion("Logs", open=False): |
|
|
log_textbox = gr.Textbox( |
|
|
show_label=False, |
|
|
lines=15, |
|
|
max_lines=30, |
|
|
interactive=False, |
|
|
placeholder="Logs will appear here...", |
|
|
autoscroll=True, |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Accordion("About this tool", open=False): |
|
|
gr.Markdown(APP_CONTENT["full_content"]) |
|
|
|
|
|
def on_search(language_selection, deduplicate): |
|
|
if not language_selection: |
|
|
cv_default_html = """<div style='padding: 15px; border: 2px solid #e0e0e0; border-radius: 4px; background-color: #fafafa;'> |
|
|
<p style='margin: 0; color: #666; font-size: 13px;'>Select a language</p> |
|
|
</div>""" |
|
|
return get_default_metadata_html(), cv_default_html, None, "", None, "", None, "", None, "", None, "" |
|
|
|
|
|
|
|
|
language_code = language_selection.split(":")[0].strip() |
|
|
|
|
|
|
|
|
language_name = LANGUAGES.get(language_code, {}).get("name", "") |
|
|
iso_639_1 = LANGUAGES.get(language_code, {}).get("iso_639_1", "") |
|
|
|
|
|
|
|
|
metadata_html = get_language_metadata_html(language_code, language_name, iso_639_1, LANGUAGE_TAXONOMY) |
|
|
|
|
|
cv_stats, commercial_df, asr_models_df, tts_models_df, asr_models_count, tts_models_count, asr_datasets_df, tts_datasets_df, asr_datasets_count, tts_datasets_count, logs = search_language_resources(language_code, deduplicate=deduplicate) |
|
|
|
|
|
|
|
|
if cv_stats: |
|
|
cv_info_html = f"""<div style='padding: 15px; border: 2px solid #4caf50; border-radius: 4px; background-color: #ffffff;'> |
|
|
<div style='margin-bottom: 12px;'> |
|
|
<span style='font-size: 18px;'>β
</span> |
|
|
<span style='font-weight: bold; color: #2e7d32; font-size: 14px; margin-left: 4px;'>Available</span> |
|
|
</div> |
|
|
<table style='width: 100%; border-collapse: collapse; font-size: 13px;'> |
|
|
<tr> |
|
|
<td style='padding: 3px 8px 3px 0; color: #666; width: 45%;'>Locale</td> |
|
|
<td style='padding: 3px 0; color: #000; font-weight: 500;'>{cv_stats['locale']}</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td style='padding: 3px 8px 3px 0; color: #666;'>Valid Hours</td> |
|
|
<td style='padding: 3px 0; color: #000; font-weight: 500;'>{cv_stats['valid_hrs']:.1f}h</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td style='padding: 3px 8px 3px 0; color: #666;'>Total Hours</td> |
|
|
<td style='padding: 3px 0; color: #000; font-weight: 500;'>{cv_stats['total_hrs']:.1f}h</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td style='padding: 3px 8px 3px 0; color: #666;'>Contributors</td> |
|
|
<td style='padding: 3px 0; color: #000; font-weight: 500;'>{cv_stats['users_formatted']}</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td style='padding: 3px 8px 3px 0; color: #666;'>Gender</td> |
|
|
<td style='padding: 3px 0; color: #000; font-weight: 500;'>{cv_stats['male_pct']:.0f}% M / {cv_stats['female_pct']:.0f}% F</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td style='padding: 3px 8px 3px 0; color: #666;'>Version</td> |
|
|
<td style='padding: 3px 0; color: #000; font-weight: 500;'>{COMMON_VOICE_VERSION}</td> |
|
|
</tr> |
|
|
</table> |
|
|
</div>""" |
|
|
else: |
|
|
cv_info_html = """<div style='padding: 15px; border: 2px solid #e0e0e0; border-radius: 4px; background-color: #fafafa;'> |
|
|
<div style='margin-bottom: 8px;'> |
|
|
<span style='font-size: 18px;'>β</span> |
|
|
<span style='font-weight: bold; color: #666; font-size: 14px; margin-left: 4px;'>Not Available</span> |
|
|
</div> |
|
|
<p style='margin: 0; color: #999; font-size: 12px;'>Not in Common Voice dataset</p> |
|
|
</div>""" |
|
|
|
|
|
|
|
|
asr_models_label = f"**Found {asr_models_count} ASR model(s)**" |
|
|
tts_models_label = f"**Found {tts_models_count} TTS model(s)**" |
|
|
asr_datasets_label = f"**Found {asr_datasets_count} ASR dataset(s)**" |
|
|
tts_datasets_label = f"**Found {tts_datasets_count} TTS dataset(s)**" |
|
|
|
|
|
return metadata_html, cv_info_html, commercial_df, asr_models_label, asr_models_df, tts_models_label, tts_models_df, asr_datasets_label, asr_datasets_df, tts_datasets_label, tts_datasets_df, logs |
|
|
|
|
|
|
|
|
language_dropdown.change( |
|
|
fn=on_search, |
|
|
inputs=[language_dropdown, deduplicate_checkbox], |
|
|
outputs=[language_metadata, cv_info, commercial_table, asr_count_label, asr_models_table, tts_count_label, tts_models_table, asr_datasets_count_label, asr_datasets_table, tts_datasets_count_label, tts_datasets_table, log_textbox], |
|
|
) |
|
|
|
|
|
|
|
|
deduplicate_checkbox.change( |
|
|
fn=on_search, |
|
|
inputs=[language_dropdown, deduplicate_checkbox], |
|
|
outputs=[language_metadata, cv_info, commercial_table, asr_count_label, asr_models_table, tts_count_label, tts_models_table, asr_datasets_count_label, asr_datasets_table, tts_datasets_count_label, tts_datasets_table, log_textbox], |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(server_name="0.0.0.0", server_port=7860, share=False, show_error=True) |