Spaces:

Livengood
/

Instance-VRAM-Calculator

Running

Livengood Claude commited on Nov 29, 2025

Commit

b502091

1 Parent(s): 3df3dc4

Add enhanced features: colored GPU tables, cloud costs, more examples

Features added:
- Color-coded status indicators (🟢🟡🔴) for GPU fit
- Separate sections for Consumer GPUs, Apple Silicon, Cloud GPUs
- Expanded cloud GPU options with hourly/daily/monthly costs
- Best value cloud recommendation
- GPU Reference tab with all hardware specs
- 12 popular model examples (Llama, Mistral, Qwen, Gemma, Phi, DeepSeek)
- Quick comparison sets for model families
- Improved memory breakdown tables
- Quantization options with fit indicators
- Soft theme for better readability

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <[email protected]>

Files changed (1) hide show

app.py +214 -74

app.py CHANGED Viewed

@@ -8,22 +8,39 @@ from functools import lru_cache
 api = HfApi()
-GPU_SPECS = {
-    "RTX 3080": (10, 0),
-    "RTX 3090": (24, 0),
-    "RTX 4080": (16, 0),
-    "RTX 4090": (24, 0),
-    "RTX 5090": (32, 0),
-    "M2 Ultra": (192, 0),
-    "M3 Max": (128, 0),
-    "M4 Max": (128, 0),
-    "RTX A6000": (48, 0),
-    "L40S": (48, 1.00),
-    "A10G": (24, 1.00),
     "L4": (24, 0.70),
     "A100 40GB": (40, 3.00),
     "A100 80GB": (80, 5.00),
     "H100 80GB": (80, 8.00),
 }
 DTYPE_BYTES = {
@@ -42,6 +59,15 @@ FRAMEWORKS = {
     "Ollama": 1.08,
 }
 def bytes_to_gb(b):
     return b / (1024 ** 3)
@@ -120,79 +146,135 @@ def calculate(model_id, context, batch, mode, framework, num_gpus, lora_rank):
             opt_gb = bytes_to_gb(params * 8)
             act_gb = weights_gb * 2 * batch
             total = weights_gb + grad_gb + opt_gb + act_gb
-            out.append("### Training Memory")
-            out.append("- Weights: " + str(round(weights_gb, 1)) + " GB")
-            out.append("- Gradients: " + str(round(grad_gb, 1)) + " GB")
-            out.append("- Optimizer: " + str(round(opt_gb, 1)) + " GB")
-            out.append("- Activations: " + str(round(act_gb, 1)) + " GB")
         elif mode == "LoRA":
             base = weights_gb
             lora_params = int(params * lora_rank * 0.0001)
             lora_gb = bytes_to_gb(lora_params * dtype_bytes)
             act_gb = base * 0.3
             total = base + lora_gb + act_gb
-            out.append("### LoRA Memory")
-            out.append("- Base (frozen): " + str(round(base, 1)) + " GB")
-            out.append("- LoRA adapters: " + str(round(lora_gb, 2)) + " GB")
-            out.append("- Activations: " + str(round(act_gb, 1)) + " GB")
         elif mode == "QLoRA":
             base = bytes_to_gb(params * 0.5)
             lora_params = int(params * lora_rank * 0.0001)
             lora_gb = bytes_to_gb(lora_params * dtype_bytes)
             act_gb = base * 0.3
             total = base + lora_gb + act_gb
-            out.append("### QLoRA Memory")
-            out.append("- Base (4-bit): " + str(round(base, 1)) + " GB")
-            out.append("- LoRA adapters: " + str(round(lora_gb, 2)) + " GB")
-            out.append("- Activations: " + str(round(act_gb, 1)) + " GB")
         else:
             overhead = FRAMEWORKS.get(framework, 1.15)
             extra = (weights_gb + kv_gb) * (overhead - 1)
             total = weights_gb + kv_gb + extra
-            out.append("### Inference Memory")
-            out.append("- Weights: " + str(round(weights_gb, 1)) + " GB")
-            out.append("- KV Cache: " + str(round(kv_gb, 1)) + " GB")
-            out.append("- Overhead (" + framework + "): " + str(round(extra, 1)) + " GB")
         if num_gpus > 1:
             per_gpu = total / num_gpus * 1.05
             out.append("")
-            out.append("**Multi-GPU (" + str(num_gpus) + "x):** " + str(round(per_gpu, 1)) + " GB/GPU")
             effective = per_gpu
         else:
             effective = total
         out.append("")
-        out.append("## Total: " + str(round(total, 1)) + " GB")
         out.append("")
-        out.append("### GPU Options")
-        out.append("| GPU | VRAM | Fits | Headroom |")
-        out.append("|-----|------|------|----------|")
-        for gpu, (vram, cost) in GPU_SPECS.items():
-            fits = "Yes" if vram >= effective else "No"
             hr = vram - effective
             sign = "+" if hr >= 0 else ""
-            out.append("| " + gpu + " | " + str(vram) + "GB | " + fits + " | " + sign + str(round(hr, 1)) + "GB |")
-        if effective > 24:
             out.append("")
-            out.append("### Quantization to fit 24GB")
-            out.append("| Method | Size |")
-            out.append("|--------|------|")
-            for name, mult in [("INT8", 1.0), ("4-bit", 0.5), ("3-bit", 0.375)]:
-                size = bytes_to_gb(params * mult) * 1.1
-                out.append("| " + name + " | " + str(round(size, 1)) + "GB |")
-        costs = [(gpu, cost) for gpu, (vram, cost) in GPU_SPECS.items() if vram >= effective and cost > 0]
-        if costs:
-            costs.sort(key=lambda x: x[1])
             out.append("")
-            out.append("### Cloud Costs (8hr/day)")
-            out.append("| GPU | $/hr | $/month |")
-            out.append("|-----|------|---------|")
-            for gpu, cost in costs[:4]:
-                out.append("| " + gpu + " | $" + str(round(cost, 2)) + " | $" + str(int(cost * 176)) + " |")
         return "\n".join(out)
     except Exception as e:
@@ -210,17 +292,18 @@ def compare(models_text, context):
             return "Need at least 2 models"
         out = []
-        out.append("## Comparison")
-        out.append("| Model | Params | Inference | Training | QLoRA |")
-        out.append("|-------|--------|-----------|----------|-------|")
-        for mid in models[:5]:
             try:
                 info = fetch_model_info(mid)
                 config = fetch_config(mid)
                 params, dtype = get_params(info)
                 if params == 0:
-                    out.append("| " + mid + " | Error | - | - | - |")
                     continue
                 db = DTYPE_BYTES.get(dtype, 2)
@@ -234,10 +317,14 @@ def compare(models_text, context):
                 train = w * 4 + w * 2
                 qlora = bytes_to_gb(params * 0.5) * 1.5
-                name = mid.split("/")[-1][:20]
-                out.append("| " + name + " | " + str(round(params / 1e9, 1)) + "B | " + str(round(inf, 1)) + "GB | " + str(round(train, 1)) + "GB | " + str(round(qlora, 1)) + "GB |")
             except Exception:
-                out.append("| " + mid + " | Error | - | - | - |")
         return "\n".join(out)
     except Exception as e:
@@ -245,16 +332,16 @@ def compare(models_text, context):
 # Build the interface
-with gr.Blocks(title="VRAM Calculator") as demo:
     gr.Markdown("# VRAM Calculator for LLMs")
-    gr.Markdown("Estimate VRAM requirements for HuggingFace models")
     with gr.Tabs():
         with gr.TabItem("Calculator"):
             model_in = gr.Textbox(
                 label="Model ID",
                 placeholder="meta-llama/Llama-3.1-8B",
-                info="Enter a HuggingFace model ID"
             )
             mode_in = gr.Radio(
@@ -269,38 +356,42 @@ with gr.Blocks(title="VRAM Calculator") as demo:
                     maximum=131072,
                     value=4096,
                     step=512,
-                    label="Context Length"
                 )
                 batch_in = gr.Slider(
                     minimum=1,
                     maximum=64,
                     value=1,
                     step=1,
-                    label="Batch Size"
                 )
             with gr.Accordion("Advanced Options", open=False):
                 framework_in = gr.Dropdown(
                     choices=list(FRAMEWORKS.keys()),
                     value="vLLM",
-                    label="Framework"
                 )
                 gpus_in = gr.Slider(
                     minimum=1,
                     maximum=8,
                     value=1,
                     step=1,
-                    label="Number of GPUs"
                 )
                 lora_in = gr.Slider(
                     minimum=4,
                     maximum=128,
                     value=16,
                     step=4,
-                    label="LoRA Rank"
                 )
-            calc_btn = gr.Button("Calculate", variant="primary")
             output = gr.Markdown()
             calc_btn.click(
@@ -309,21 +400,32 @@ with gr.Blocks(title="VRAM Calculator") as demo:
                 outputs=output
             )
             gr.Examples(
                 examples=[
                     ["meta-llama/Llama-3.1-8B"],
                     ["meta-llama/Llama-3.1-70B"],
                     ["mistralai/Mistral-7B-v0.1"],
                 ],
                 inputs=[model_in],
-                label="Example Models"
             )
         with gr.TabItem("Compare Models"):
             cmp_in = gr.Textbox(
                 label="Models (one per line)",
-                lines=4,
-                placeholder="meta-llama/Llama-3.1-8B\nmistralai/Mistral-7B-v0.1"
             )
             cmp_ctx = gr.Slider(
                 minimum=512,
@@ -332,7 +434,7 @@ with gr.Blocks(title="VRAM Calculator") as demo:
                 step=512,
                 label="Context Length"
             )
-            cmp_btn = gr.Button("Compare", variant="primary")
             cmp_out = gr.Markdown()
             cmp_btn.click(
@@ -341,8 +443,46 @@ with gr.Blocks(title="VRAM Calculator") as demo:
                 outputs=cmp_out
             )
     gr.Markdown("---")
-    gr.Markdown("*Estimates are approximate. Actual usage may vary.*")
 if __name__ == "__main__":
     demo.launch()

 api = HfApi()
+# Consumer GPUs (no hourly cost)
+CONSUMER_GPUS = {
+    "RTX 3080": 10,
+    "RTX 3080 Ti": 12,
+    "RTX 3090": 24,
+    "RTX 3090 Ti": 24,
+    "RTX 4080": 16,
+    "RTX 4080 Super": 16,
+    "RTX 4090": 24,
+    "RTX 5090": 32,
+}
+# Apple Silicon (no hourly cost)
+APPLE_GPUS = {
+    "M1 Max": 64,
+    "M2 Max": 96,
+    "M2 Ultra": 192,
+    "M3 Max": 128,
+    "M4 Max": 128,
+}
+# Cloud/Datacenter GPUs (with hourly costs from major providers)
+CLOUD_GPUS = {
+    "T4": (16, 0.35),
     "L4": (24, 0.70),
+    "A10G": (24, 1.00),
+    "RTX A5000": (24, 0.80),
+    "RTX A6000": (48, 1.50),
+    "L40S": (48, 1.20),
     "A100 40GB": (40, 3.00),
     "A100 80GB": (80, 5.00),
     "H100 80GB": (80, 8.00),
+    "H100 NVL": (94, 10.00),
 }
 DTYPE_BYTES = {
     "Ollama": 1.08,
 }
+CONTEXT_PRESETS = {
+    "2K (fast chat)": 2048,
+    "4K (standard)": 4096,
+    "8K (extended)": 8192,
+    "16K (long docs)": 16384,
+    "32K (very long)": 32768,
+    "128K (full context)": 131072,
+}
 def bytes_to_gb(b):
     return b / (1024 ** 3)
             opt_gb = bytes_to_gb(params * 8)
             act_gb = weights_gb * 2 * batch
             total = weights_gb + grad_gb + opt_gb + act_gb
+            out.append("### Training Memory Breakdown")
+            out.append("| Component | Size |")
+            out.append("|-----------|------|")
+            out.append("| Weights | " + str(round(weights_gb, 1)) + " GB |")
+            out.append("| Gradients | " + str(round(grad_gb, 1)) + " GB |")
+            out.append("| Optimizer (AdamW) | " + str(round(opt_gb, 1)) + " GB |")
+            out.append("| Activations | " + str(round(act_gb, 1)) + " GB |")
         elif mode == "LoRA":
             base = weights_gb
             lora_params = int(params * lora_rank * 0.0001)
             lora_gb = bytes_to_gb(lora_params * dtype_bytes)
             act_gb = base * 0.3
             total = base + lora_gb + act_gb
+            out.append("### LoRA Memory Breakdown")
+            out.append("| Component | Size |")
+            out.append("|-----------|------|")
+            out.append("| Base model (frozen) | " + str(round(base, 1)) + " GB |")
+            out.append("| LoRA adapters (rank " + str(lora_rank) + ") | " + str(round(lora_gb, 2)) + " GB |")
+            out.append("| Activations | " + str(round(act_gb, 1)) + " GB |")
         elif mode == "QLoRA":
             base = bytes_to_gb(params * 0.5)
             lora_params = int(params * lora_rank * 0.0001)
             lora_gb = bytes_to_gb(lora_params * dtype_bytes)
             act_gb = base * 0.3
             total = base + lora_gb + act_gb
+            out.append("### QLoRA Memory Breakdown")
+            out.append("| Component | Size |")
+            out.append("|-----------|------|")
+            out.append("| Base model (4-bit) | " + str(round(base, 1)) + " GB |")
+            out.append("| LoRA adapters (rank " + str(lora_rank) + ") | " + str(round(lora_gb, 2)) + " GB |")
+            out.append("| Activations | " + str(round(act_gb, 1)) + " GB |")
         else:
             overhead = FRAMEWORKS.get(framework, 1.15)
             extra = (weights_gb + kv_gb) * (overhead - 1)
             total = weights_gb + kv_gb + extra
+            out.append("### Inference Memory Breakdown")
+            out.append("| Component | Size |")
+            out.append("|-----------|------|")
+            out.append("| Model weights | " + str(round(weights_gb, 1)) + " GB |")
+            out.append("| KV Cache (" + str(context) + " ctx) | " + str(round(kv_gb, 1)) + " GB |")
+            out.append("| Framework overhead (" + framework + ") | " + str(round(extra, 1)) + " GB |")
         if num_gpus > 1:
             per_gpu = total / num_gpus * 1.05
             out.append("")
+            out.append("**Multi-GPU (" + str(num_gpus) + "x):** " + str(round(per_gpu, 1)) + " GB per GPU (includes 5% communication overhead)")
             effective = per_gpu
         else:
             effective = total
         out.append("")
+        out.append("## Total Required: " + str(round(total, 1)) + " GB")
+        # Consumer GPUs section with colors
         out.append("")
+        out.append("### Consumer GPUs")
+        out.append("| GPU | VRAM | Status | Headroom |")
+        out.append("|-----|------|--------|----------|")
+        for gpu, vram in CONSUMER_GPUS.items():
             hr = vram - effective
+            if hr >= 2:
+                status = "🟢 Good fit"
+            elif hr >= 0:
+                status = "🟡 Tight fit"
+            else:
+                status = "🔴 Too small"
             sign = "+" if hr >= 0 else ""
+            out.append("| " + gpu + " | " + str(vram) + "GB | " + status + " | " + sign + str(round(hr, 1)) + "GB |")
+        # Apple Silicon section
+        out.append("")
+        out.append("### Apple Silicon (Unified Memory)")
+        out.append("| Chip | Memory | Status | Headroom |")
+        out.append("|------|--------|--------|----------|")
+        for gpu, vram in APPLE_GPUS.items():
+            hr = vram - effective
+            if hr >= 10:
+                status = "🟢 Excellent"
+            elif hr >= 0:
+                status = "🟡 Usable"
+            else:
+                status = "🔴 Too small"
+            sign = "+" if hr >= 0 else ""
+            out.append("| " + gpu + " | " + str(vram) + "GB | " + status + " | " + sign + str(round(hr, 1)) + "GB |")
+        # Cloud GPUs section with costs
+        out.append("")
+        out.append("### Cloud GPU Options")
+        out.append("| GPU | VRAM | Status | $/hour | $/day (8hr) | $/month |")
+        out.append("|-----|------|--------|--------|-------------|---------|")
+        cloud_options = []
+        for gpu, (vram, cost) in CLOUD_GPUS.items():
+            hr = vram - effective
+            if hr >= 2:
+                status = "🟢 Good"
+            elif hr >= 0:
+                status = "🟡 Tight"
+            else:
+                status = "🔴 No"
+            daily = cost * 8
+            monthly = cost * 176  # 22 days * 8 hours
+            cloud_options.append((gpu, vram, hr, status, cost, daily, monthly))
+        # Sort by cost for those that fit
+        cloud_options.sort(key=lambda x: (x[2] < 0, x[4]))
+        for gpu, vram, hr, status, cost, daily, monthly in cloud_options:
+            sign = "+" if hr >= 0 else ""
+            out.append("| " + gpu + " | " + str(vram) + "GB | " + status + " | $" + str(round(cost, 2)) + " | $" + str(round(daily, 2)) + " | $" + str(int(monthly)) + " |")
+        # Best value recommendation
+        fitting_gpus = [(gpu, cost) for gpu, (vram, cost) in CLOUD_GPUS.items() if vram >= effective]
+        if fitting_gpus:
+            fitting_gpus.sort(key=lambda x: x[1])
+            best = fitting_gpus[0]
             out.append("")
+            out.append("**Best value cloud option:** " + best[0] + " at $" + str(round(best[1], 2)) + "/hour")
+        # Quantization suggestions if model is large
+        if effective > 24:
             out.append("")
+            out.append("### Quantization Options (to fit consumer GPUs)")
+            out.append("| Method | Estimated Size | Fits 24GB |")
+            out.append("|--------|----------------|-----------|")
+            for name, mult in [("INT8", 1.0), ("4-bit (GPTQ/AWQ)", 0.5), ("3-bit", 0.375), ("2-bit (extreme)", 0.25)]:
+                size = bytes_to_gb(params * mult) * 1.1
+                fits = "🟢 Yes" if size <= 24 else "🔴 No"
+                out.append("| " + name + " | " + str(round(size, 1)) + "GB | " + fits + " |")
         return "\n".join(out)
     except Exception as e:
             return "Need at least 2 models"
         out = []
+        out.append("## Model Comparison")
+        out.append("")
+        out.append("| Model | Params | Inference | Training | QLoRA | Fits 24GB |")
+        out.append("|-------|--------|-----------|----------|-------|-----------|")
+        for mid in models[:8]:
             try:
                 info = fetch_model_info(mid)
                 config = fetch_config(mid)
                 params, dtype = get_params(info)
                 if params == 0:
+                    out.append("| " + mid + " | Error | - | - | - | - |")
                     continue
                 db = DTYPE_BYTES.get(dtype, 2)
                 train = w * 4 + w * 2
                 qlora = bytes_to_gb(params * 0.5) * 1.5
+                fits = "🟢 Yes" if inf <= 24 else "🔴 No"
+                name = mid.split("/")[-1][:25]
+                out.append("| " + name + " | " + str(round(params / 1e9, 1)) + "B | " + str(round(inf, 1)) + "GB | " + str(round(train, 1)) + "GB | " + str(round(qlora, 1)) + "GB | " + fits + " |")
             except Exception:
+                out.append("| " + mid + " | Error | - | - | - | - |")
+        out.append("")
+        out.append("*Context length: " + str(context) + " tokens*")
         return "\n".join(out)
     except Exception as e:
 # Build the interface
+with gr.Blocks(title="VRAM Calculator", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# VRAM Calculator for LLMs")
+    gr.Markdown("Estimate VRAM requirements for HuggingFace models - inference, training, LoRA, and QLoRA")
     with gr.Tabs():
         with gr.TabItem("Calculator"):
             model_in = gr.Textbox(
                 label="Model ID",
                 placeholder="meta-llama/Llama-3.1-8B",
+                info="Enter a HuggingFace model ID (e.g., organization/model-name)"
             )
             mode_in = gr.Radio(
                     maximum=131072,
                     value=4096,
                     step=512,
+                    label="Context Length",
+                    info="Max tokens for KV cache"
                 )
                 batch_in = gr.Slider(
                     minimum=1,
                     maximum=64,
                     value=1,
                     step=1,
+                    label="Batch Size",
+                    info="Concurrent sequences"
                 )
             with gr.Accordion("Advanced Options", open=False):
                 framework_in = gr.Dropdown(
                     choices=list(FRAMEWORKS.keys()),
                     value="vLLM",
+                    label="Inference Framework"
                 )
                 gpus_in = gr.Slider(
                     minimum=1,
                     maximum=8,
                     value=1,
                     step=1,
+                    label="Number of GPUs",
+                    info="For tensor parallelism"
                 )
                 lora_in = gr.Slider(
                     minimum=4,
                     maximum=128,
                     value=16,
                     step=4,
+                    label="LoRA Rank",
+                    info="Higher = more parameters"
                 )
+            calc_btn = gr.Button("Calculate VRAM", variant="primary")
             output = gr.Markdown()
             calc_btn.click(
                 outputs=output
             )
+            gr.Markdown("### Popular Models")
             gr.Examples(
                 examples=[
                     ["meta-llama/Llama-3.1-8B"],
                     ["meta-llama/Llama-3.1-70B"],
+                    ["meta-llama/Llama-3.2-1B"],
+                    ["meta-llama/Llama-3.2-3B"],
                     ["mistralai/Mistral-7B-v0.1"],
+                    ["mistralai/Mixtral-8x7B-v0.1"],
+                    ["Qwen/Qwen2.5-7B"],
+                    ["Qwen/Qwen2.5-72B"],
+                    ["google/gemma-2-9b"],
+                    ["google/gemma-2-27b"],
+                    ["microsoft/phi-3-mini-4k-instruct"],
+                    ["deepseek-ai/DeepSeek-V2-Lite"],
                 ],
                 inputs=[model_in],
+                label="Click to load"
             )
         with gr.TabItem("Compare Models"):
+            gr.Markdown("Compare VRAM requirements across multiple models")
             cmp_in = gr.Textbox(
                 label="Models (one per line)",
+                lines=6,
+                placeholder="meta-llama/Llama-3.1-8B\nmeta-llama/Llama-3.1-70B\nmistralai/Mistral-7B-v0.1\nQwen/Qwen2.5-7B"
             )
             cmp_ctx = gr.Slider(
                 minimum=512,
                 step=512,
                 label="Context Length"
             )
+            cmp_btn = gr.Button("Compare Models", variant="primary")
             cmp_out = gr.Markdown()
             cmp_btn.click(
                 outputs=cmp_out
             )
+            gr.Markdown("### Quick Comparison Sets")
+            gr.Examples(
+                examples=[
+                    ["meta-llama/Llama-3.1-8B\nmeta-llama/Llama-3.1-70B\nmeta-llama/Llama-3.2-3B"],
+                    ["mistralai/Mistral-7B-v0.1\nmistralai/Mixtral-8x7B-v0.1"],
+                    ["Qwen/Qwen2.5-7B\nQwen/Qwen2.5-14B\nQwen/Qwen2.5-72B"],
+                    ["google/gemma-2-2b\ngoogle/gemma-2-9b\ngoogle/gemma-2-27b"],
+                ],
+                inputs=[cmp_in],
+                label="Click to load comparison"
+            )
+        with gr.TabItem("GPU Reference"):
+            gr.Markdown("## GPU VRAM Reference")
+            gr.Markdown("### Consumer GPUs (NVIDIA GeForce)")
+            consumer_md = "| GPU | VRAM | Notes |\n|-----|------|-------|\n"
+            for gpu, vram in CONSUMER_GPUS.items():
+                consumer_md += "| " + gpu + " | " + str(vram) + "GB | Consumer |\n"
+            gr.Markdown(consumer_md)
+            gr.Markdown("### Apple Silicon")
+            apple_md = "| Chip | Unified Memory | Notes |\n|------|----------------|-------|\n"
+            for gpu, vram in APPLE_GPUS.items():
+                apple_md += "| " + gpu + " | " + str(vram) + "GB | Shared CPU/GPU |\n"
+            gr.Markdown(apple_md)
+            gr.Markdown("### Cloud/Datacenter GPUs")
+            cloud_md = "| GPU | VRAM | Typical $/hr | Best For |\n|-----|------|--------------|----------|\n"
+            for gpu, (vram, cost) in CLOUD_GPUS.items():
+                if vram <= 24:
+                    use = "7B models, fine-tuning"
+                elif vram <= 48:
+                    use = "13B-30B models"
+                else:
+                    use = "70B+ models, training"
+                cloud_md += "| " + gpu + " | " + str(vram) + "GB | $" + str(round(cost, 2)) + " | " + use + " |\n"
+            gr.Markdown(cloud_md)
     gr.Markdown("---")
+    gr.Markdown("*Estimates are approximate. Actual usage varies by implementation, batch size, and optimizations.*")
 if __name__ == "__main__":
     demo.launch()