Spaces:

n00b001
/

llm-compressor-my-repo

Running

App Files Files Community

n00b001 commited on 22 days ago

Commit

d371d24

unverified ·

1 Parent(s): 6f7fa65

feat: Add comprehensive unit tests for app.py and update dependencies

Browse files

Files changed (3) hide show

app.py +64 -51
requirements.txt +488 -8
tests/test_app.py +157 -0

app.py CHANGED Viewed

@@ -1,23 +1,28 @@
 import gradio as gr
 from huggingface_hub import HfApi, ModelCard, whoami
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
-import os
 from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import QuantizationModifier, GPTQModifier
 from llmcompressor.modifiers.awq import AWQModifier, AWQMapping
-from transformers import AutoModelForCausalLM, AutoTokenizer
 # --- Helper Functions ---
 def get_quantization_recipe(method, model_architecture):
     """
     Returns the appropriate llm-compressor recipe based on the selected method.
     """
     if method == "AWQ":
         mappings = [
-            AWQMapping("re:.*input_layernorm", ["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"]),
             AWQMapping("re:.*v_proj", ["re:.*o_proj"]),
-            AWQMapping("re:.*post_attention_layernorm", ["re:.*gate_proj", "re:.*up_proj"]),
             AWQMapping("re:.*up_proj", ["re:.*down_proj"]),
         ]
         return [
@@ -25,7 +30,7 @@ def get_quantization_recipe(method, model_architecture):
                 ignore=["lm_head"],
                 scheme="W4A16_ASYM",
                 targets=["Linear"],
-                mappings=mappings
             ),
         ]
     elif method == "GPTQ":
@@ -34,7 +39,9 @@ def get_quantization_recipe(method, model_architecture):
             "MistralForCausalLM": "MistralDecoderLayer",
             "MixtralForCausalLM": "MixtralDecoderLayer",
         }
-        sequential_target = sequential_target_map.get(model_architecture, "LlamaDecoderLayer")
         return [
             GPTQModifier(
@@ -49,11 +56,9 @@ def get_quantization_recipe(method, model_architecture):
         if "Mixtral" in model_architecture:
             ignore_layers.append("re:.*block_sparse_moe.gate")
-        return QuantizationModifier(
-            scheme="FP8",
-            targets="Linear",
-            ignore=ignore_layers
-        )
     else:
         raise ValueError(f"Unsupported quantization method: {method}")
@@ -62,18 +67,16 @@ def compress_and_upload(
     model_id: str,
     quant_method: str,
     oauth_token: gr.OAuthToken | None,
-    *,
-    request: gr.Request
 ):
     """
     Compresses a model using llm-compressor and uploads it to a new HF repo.
     """
     if not model_id:
         raise gr.Error("Please select a model from the search bar.")
     if oauth_token is None:
         raise gr.Error("Authentication error. Please log in to continue.")
     token = oauth_token.token
     try:
@@ -81,12 +84,15 @@ def compress_and_upload(
         username = whoami(token=token)["name"]
         # --- 1. Load Model and Tokenizer ---
-        model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map=None, token=token)
-        tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
         output_dir = f"{model_id.split('/')[-1]}-{quant_method}"
         # --- 2. Get Recipe ---
         recipe = get_quantization_recipe(quant_method, model.config.architectures[0])
         # --- 3. Run Compression ---
@@ -140,41 +146,48 @@ For more details on the recipe used, refer to the `recipe.yaml` file in this rep
         return f'<h1>✅ Success!</h1><br/>Model compressed and saved to your new repo: <a href="{repo_url}" target="_blank" style="text-decoration:underline">{repo_id}</a>'
     except Exception as e:
         error_message = str(e).replace("\n", "<br/>")
         return f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{error_message}</pre>'
 # --- Gradio Interface ---
-with gr.Blocks(css="footer {display: none !important;}") as demo:
-    gr.Markdown("# LLM-Compressor My Repo")
-    gr.Markdown(
-        "Log in, choose a model, select a quantization method, and this Space will create a new compressed model repository on your Hugging Face profile."
-    )
-    with gr.Row():
-        login_button = gr.LoginButton(min_width=250)
-    gr.Markdown("### 1. Select a Model from the Hugging Face Hub")
-    model_input = HuggingfaceHubSearch(
-        label="Search for a Model",
-        search_type="model",
-    )
-    gr.Markdown("### 2. Choose a Quantization Method")
-    quant_method_dropdown = gr.Dropdown(
-        ["AWQ", "GPTQ", "FP8"],
-        label="Quantization Method",
-        value="AWQ"
-    )
-    compress_button = gr.Button("Compress and Create Repo", variant="primary")
-    output_html = gr.HTML(label="Result")
-    # The inputs list correctly provides 3 arguments. Gradio will add the 4th (request) implicitly.
-    # The function signature now correctly expects all 4.
-    compress_button.click(
-        fn=compress_and_upload,
-        inputs=[model_input, quant_method_dropdown, login_button],
-        outputs=output_html
-    )
-demo.queue(max_size=5).launch()

 import gradio as gr
 from huggingface_hub import HfApi, ModelCard, whoami
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import QuantizationModifier, GPTQModifier
 from llmcompressor.modifiers.awq import AWQModifier, AWQMapping
+from transformers import AutoModelForCausalLM
 # --- Helper Functions ---
 def get_quantization_recipe(method, model_architecture):
     """
     Returns the appropriate llm-compressor recipe based on the selected method.
     """
     if method == "AWQ":
         mappings = [
+            AWQMapping(
+                "re:.*input_layernorm", ["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"]
+            ),
             AWQMapping("re:.*v_proj", ["re:.*o_proj"]),
+            AWQMapping(
+                "re:.*post_attention_layernorm", ["re:.*gate_proj", "re:.*up_proj"]
+            ),
             AWQMapping("re:.*up_proj", ["re:.*down_proj"]),
         ]
         return [
                 ignore=["lm_head"],
                 scheme="W4A16_ASYM",
                 targets=["Linear"],
+                mappings=mappings,
             ),
         ]
     elif method == "GPTQ":
             "MistralForCausalLM": "MistralDecoderLayer",
             "MixtralForCausalLM": "MixtralDecoderLayer",
         }
+        sequential_target = sequential_target_map.get(
+            model_architecture, "LlamaDecoderLayer"
+        )
         return [
             GPTQModifier(
         if "Mixtral" in model_architecture:
             ignore_layers.append("re:.*block_sparse_moe.gate")
+        return [QuantizationModifier(
+            scheme="FP8", targets="Linear", ignore=ignore_layers
+        )]
     else:
         raise ValueError(f"Unsupported quantization method: {method}")
     model_id: str,
     quant_method: str,
     oauth_token: gr.OAuthToken | None,
 ):
     """
     Compresses a model using llm-compressor and uploads it to a new HF repo.
     """
     if not model_id:
         raise gr.Error("Please select a model from the search bar.")
     if oauth_token is None:
         raise gr.Error("Authentication error. Please log in to continue.")
     token = oauth_token.token
     try:
         username = whoami(token=token)["name"]
         # --- 1. Load Model and Tokenizer ---
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id, torch_dtype="auto", device_map=None, token=token
+        )
         output_dir = f"{model_id.split('/')[-1]}-{quant_method}"
         # --- 2. Get Recipe ---
+        if not model.config.architectures:
+            raise gr.Error("Could not determine model architecture.")
         recipe = get_quantization_recipe(quant_method, model.config.architectures[0])
         # --- 3. Run Compression ---
         return f'<h1>✅ Success!</h1><br/>Model compressed and saved to your new repo: <a href="{repo_url}" target="_blank" style="text-decoration:underline">{repo_id}</a>'
+    except gr.Error as e:
+        raise e
     except Exception as e:
         error_message = str(e).replace("\n", "<br/>")
         return f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{error_message}</pre>'
 # --- Gradio Interface ---
+def build_gradio_app():
+    with gr.Blocks(css="footer {display: none !important;}") as demo:
+        gr.Markdown("# LLM-Compressor My Repo")
+        gr.Markdown(
+            "Log in, choose a model, select a quantization method, and this Space will create a new compressed model repository on your Hugging Face profile."
+        )
+        with gr.Row():
+            login_button = gr.LoginButton(min_width=250)
+        gr.Markdown("### 1. Select a Model from the Hugging Face Hub")
+        model_input = HuggingfaceHubSearch(
+            label="Search for a Model",
+            search_type="model",
+        )
+        gr.Markdown("### 2. Choose a Quantization Method")
+        quant_method_dropdown = gr.Dropdown(
+            ["AWQ", "GPTQ", "FP8"], label="Quantization Method", value="AWQ"
+        )
+        compress_button = gr.Button("Compress and Create Repo", variant="primary")
+        output_html = gr.HTML(label="Result")
+        compress_button.click(
+            fn=compress_and_upload,
+            inputs=[model_input, quant_method_dropdown],
+            outputs=output_html,
+        )
+    return demo
+def main():
+    demo = build_gradio_app()
+    demo.queue(max_size=5).launch()
+if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

@@ -1,8 +1,488 @@
-gradio
-gradio_huggingfacehub_search
-huggingface-hub>=1.0.0
-torch
-accelerate
-datasets
-llmcompressor@git+https://github.com/vllm-project/llm-compressor.git
-transformers[torch]@git+https://github.com/huggingface/transformers.git

+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml -o requirements.txt
+absl-py==2.3.1
+    # via rouge-score
+accelerate==1.12.0
+    # via
+    #   llm-compressor-my-repo (pyproject.toml)
+    #   auto-round
+    #   llmcompressor
+    #   lm-eval
+    #   peft
+    #   transformers
+aiofiles==24.1.0
+    # via gradio
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.13.2
+    # via fsspec
+aiosignal==1.4.0
+    # via aiohttp
+annotated-doc==0.0.4
+    # via fastapi
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.12.0
+    # via
+    #   gradio
+    #   httpx
+    #   starlette
+attrs==25.4.0
+    # via
+    #   aiohttp
+    #   jsonlines
+authlib==1.6.5
+    # via gradio
+auto-round @ git+https://github.com/intel/auto-round.git@5ffe56ddc51cbc69cd6fe87a0b8a7d91e28bf522
+    # via llmcompressor
+brotli==1.2.0
+    # via gradio
+certifi==2025.11.12
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==2.0.0
+    # via cryptography
+chardet==5.2.0
+    # via mbstrdecoder
+charset-normalizer==3.4.4
+    # via requests
+click==8.3.1
+    # via
+    #   nltk
+    #   typer
+    #   typer-slim
+    #   uvicorn
+colorama==0.4.6
+    # via
+    #   sacrebleu
+    #   tqdm-multiprocess
+compressed-tensors==0.12.3a20251114
+    # via llmcompressor
+cryptography==46.0.3
+    # via authlib
+dataproperty==1.1.0
+    # via
+    #   pytablewriter
+    #   tabledata
+datasets==4.4.1
+    # via
+    #   llm-compressor-my-repo (pyproject.toml)
+    #   auto-round
+    #   evaluate
+    #   llmcompressor
+    #   lm-eval
+dill==0.4.0
+    # via
+    #   datasets
+    #   evaluate
+    #   lm-eval
+    #   multiprocess
+evaluate==0.4.6
+    # via lm-eval
+fastapi==0.122.0
+    # via gradio
+ffmpy==1.0.0
+    # via gradio
+filelock==3.20.0
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   torch
+    #   transformers
+frozenlist==1.8.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2025.10.0
+    # via
+    #   datasets
+    #   evaluate
+    #   gradio-client
+    #   huggingface-hub
+    #   torch
+gradio==5.50.0
+    # via
+    #   llm-compressor-my-repo (pyproject.toml)
+    #   gradio-huggingfacehub-search
+gradio-client==1.14.0
+    # via gradio
+gradio-huggingfacehub-search==0.0.12
+    # via llm-compressor-my-repo (pyproject.toml)
+groovy==0.1.2
+    # via gradio
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+hf-xet==1.2.0
+    # via
+    #   llm-compressor-my-repo (pyproject.toml)
+    #   huggingface-hub
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via
+    #   datasets
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   safehttpx
+huggingface-hub==1.1.6
+    # via
+    #   llm-compressor-my-repo (pyproject.toml)
+    #   accelerate
+    #   datasets
+    #   evaluate
+    #   gradio
+    #   gradio-client
+    #   peft
+    #   tokenizers
+    #   transformers
+idna==3.11
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+itsdangerous==2.2.0
+    # via gradio
+jinja2==3.1.6
+    # via
+    #   gradio
+    #   torch
+joblib==1.5.2
+    # via
+    #   nltk
+    #   scikit-learn
+jsonlines==4.0.0
+    # via lm-eval
+llmcompressor @ git+https://github.com/vllm-project/llm-compressor.git@db0b68d9faf09066e9b7d679b39a977e484d9b91
+    # via llm-compressor-my-repo (pyproject.toml)
+lm-eval==0.4.9.2
+    # via auto-round
+loguru==0.7.3
+    # via
+    #   compressed-tensors
+    #   llmcompressor
+lxml==6.0.2
+    # via sacrebleu
+markdown-it-py==4.0.0
+    # via rich
+markupsafe==3.0.3
+    # via
+    #   gradio
+    #   jinja2
+mbstrdecoder==1.1.4
+    # via
+    #   dataproperty
+    #   pytablewriter
+    #   typepy
+mdurl==0.1.2
+    # via markdown-it-py
+more-itertools==10.8.0
+    # via lm-eval
+mpmath==1.3.0
+    # via sympy
+multidict==6.7.0
+    # via
+    #   aiohttp
+    #   yarl
+multiprocess==0.70.18
+    # via
+    #   datasets
+    #   evaluate
+networkx==3.6
+    # via torch
+nltk==3.9.2
+    # via rouge-score
+numexpr==2.14.1
+    # via lm-eval
+numpy==2.3.5
+    # via
+    #   accelerate
+    #   auto-round
+    #   datasets
+    #   evaluate
+    #   gradio
+    #   llmcompressor
+    #   numexpr
+    #   pandas
+    #   peft
+    #   rouge-score
+    #   sacrebleu
+    #   safetensors
+    #   scikit-learn
+    #   scipy
+    #   transformers
+nvidia-cublas-cu12==12.8.4.1
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.8.90
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.8.93
+    # via torch
+nvidia-cuda-runtime-cu12==12.8.90
+    # via torch
+nvidia-cudnn-cu12==9.10.2.21
+    # via torch
+nvidia-cufft-cu12==11.3.3.83
+    # via torch
+nvidia-cufile-cu12==1.13.1.3
+    # via torch
+nvidia-curand-cu12==10.3.9.90
+    # via torch
+nvidia-cusolver-cu12==11.7.3.90
+    # via torch
+nvidia-cusparse-cu12==12.5.8.93
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cusparselt-cu12==0.7.1
+    # via torch
+nvidia-ml-py==13.580.82
+    # via llmcompressor
+nvidia-nccl-cu12==2.27.5
+    # via torch
+nvidia-nvjitlink-cu12==12.8.93
+    # via
+    #   nvidia-cufft-cu12
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvshmem-cu12==3.3.20
+    # via torch
+nvidia-nvtx-cu12==12.8.90
+    # via torch
+orjson==3.11.4
+    # via gradio
+packaging==25.0
+    # via
+    #   accelerate
+    #   auto-round
+    #   datasets
+    #   evaluate
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   peft
+    #   safetensors
+    #   transformers
+    #   typepy
+pandas==2.3.3
+    # via
+    #   datasets
+    #   evaluate
+    #   gradio
+pathvalidate==3.3.1
+    # via pytablewriter
+peft==0.18.0
+    # via lm-eval
+pillow==11.3.0
+    # via
+    #   auto-round
+    #   gradio
+    #   llmcompressor
+portalocker==3.2.0
+    # via sacrebleu
+propcache==0.4.1
+    # via
+    #   aiohttp
+    #   yarl
+psutil==7.1.3
+    # via
+    #   accelerate
+    #   peft
+py-cpuinfo==9.0.0
+    # via auto-round
+pyarrow==22.0.0
+    # via datasets
+pybind11==3.0.1
+    # via lm-eval
+pycparser==2.23
+    # via cffi
+pydantic==2.12.3
+    # via
+    #   compressed-tensors
+    #   fastapi
+    #   gradio
+pydantic-core==2.41.4
+    # via pydantic
+pydub==0.25.1
+    # via gradio
+pygments==2.19.2
+    # via rich
+pytablewriter==1.2.1
+    # via lm-eval
+python-dateutil==2.9.0.post0
+    # via
+    #   pandas
+    #   typepy
+python-multipart==0.0.20
+    # via gradio
+pytz==2025.2
+    # via
+    #   pandas
+    #   typepy
+pyyaml==6.0.3
+    # via
+    #   accelerate
+    #   datasets
+    #   gradio
+    #   huggingface-hub
+    #   llmcompressor
+    #   peft
+    #   transformers
+regex==2025.11.3
+    # via
+    #   nltk
+    #   sacrebleu
+    #   transformers
+requests==2.32.5
+    # via
+    #   datasets
+    #   evaluate
+    #   llmcompressor
+    #   transformers
+rich==14.2.0
+    # via typer
+rouge-score==0.1.2
+    # via lm-eval
+ruff==0.14.7
+    # via gradio
+sacrebleu==2.5.1
+    # via lm-eval
+safehttpx==0.1.7
+    # via gradio
+safetensors==0.7.0
+    # via
+    #   accelerate
+    #   huggingface-hub
+    #   peft
+    #   transformers
+scikit-learn==1.7.2
+    # via lm-eval
+scipy==1.16.3
+    # via scikit-learn
+semantic-version==2.10.0
+    # via gradio
+sentencepiece==0.2.1
+    # via auto-round
+setuptools==80.9.0
+    # via
+    #   pytablewriter
+    #   torch
+shellingham==1.5.4
+    # via
+    #   huggingface-hub
+    #   typer
+six==1.17.0
+    # via
+    #   python-dateutil
+    #   rouge-score
+sqlitedict==2.1.0
+    # via lm-eval
+starlette==0.50.0
+    # via
+    #   fastapi
+    #   gradio
+sympy==1.14.0
+    # via torch
+tabledata==1.3.4
+    # via pytablewriter
+tabulate==0.9.0
+    # via sacrebleu
+tcolorpy==0.1.7
+    # via pytablewriter
+threadpoolctl==3.6.0
+    # via
+    #   auto-round
+    #   scikit-learn
+tokenizers==0.22.1
+    # via transformers
+tomlkit==0.13.3
+    # via gradio
+torch==2.9.1
+    # via
+    #   llm-compressor-my-repo (pyproject.toml)
+    #   accelerate
+    #   auto-round
+    #   compressed-tensors
+    #   huggingface-hub
+    #   llmcompressor
+    #   lm-eval
+    #   peft
+    #   safetensors
+    #   transformers
+tqdm==4.67.1
+    # via
+    #   auto-round
+    #   datasets
+    #   evaluate
+    #   huggingface-hub
+    #   llmcompressor
+    #   nltk
+    #   peft
+    #   tqdm-multiprocess
+    #   transformers
+tqdm-multiprocess==0.0.11
+    # via lm-eval
+transformers @ git+https://github.com/huggingface/transformers.git@cac0a28c83cf87b7a05495de3177099c635ba852
+    # via
+    #   llm-compressor-my-repo (pyproject.toml)
+    #   auto-round
+    #   compressed-tensors
+    #   llmcompressor
+    #   lm-eval
+    #   peft
+triton==3.5.1
+    # via torch
+typepy==1.3.4
+    # via
+    #   dataproperty
+    #   pytablewriter
+    #   tabledata
+typer==0.20.0
+    # via gradio
+typer-slim==0.20.0
+    # via
+    #   huggingface-hub
+    #   transformers
+typing-extensions==4.15.0
+    # via
+    #   aiosignal
+    #   anyio
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   pydantic
+    #   pydantic-core
+    #   starlette
+    #   torch
+    #   typer
+    #   typer-slim
+    #   typing-inspection
+typing-inspection==0.4.2
+    # via pydantic
+tzdata==2025.2
+    # via pandas
+urllib3==2.5.0
+    # via requests
+uvicorn==0.38.0
+    # via gradio
+websockets==15.0.1
+    # via gradio-client
+word2number==1.1
+    # via lm-eval
+xxhash==3.6.0
+    # via
+    #   datasets
+    #   evaluate
+yarl==1.22.0
+    # via aiohttp
+zstandard==0.25.0
+    # via lm-eval

tests/test_app.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import pytest
+from unittest.mock import MagicMock, patch
+from app import get_quantization_recipe, compress_and_upload
+import gradio as gr
+from transformers import AutoModelForCausalLM
+from huggingface_hub import HfApi, ModelCard, whoami
+from llmcompressor import oneshot
+from llmcompressor.modifiers.quantization import QuantizationModifier, GPTQModifier
+from llmcompressor.modifiers.awq import AWQModifier, AWQMapping
+# Mock external dependencies for compress_and_upload
+@pytest.fixture
+def mock_hf_api():
+    with patch('app.HfApi') as mock_api:
+        mock_api_instance = mock_api.return_value
+        mock_api_instance.create_repo.return_value = "https://huggingface.co/test_user/test_model-AWQ"
+        yield mock_api_instance
+@pytest.fixture
+def mock_whoami():
+    with patch('app.whoami') as mock_whoami_func:
+        mock_whoami_func.return_value = {"name": "test_user"}
+        yield mock_whoami_func
+@pytest.fixture
+def mock_auto_model_for_causal_lm():
+    with patch('app.AutoModelForCausalLM') as mock_model_class:
+        mock_model_instance = MagicMock()
+        mock_model_instance.config.architectures = ["LlamaForCausalLM"]
+        mock_model_class.from_pretrained.return_value = mock_model_instance
+        yield mock_model_class
+@pytest.fixture
+def mock_oneshot():
+    with patch('app.oneshot') as mock_oneshot_func:
+        yield mock_oneshot_func
+@pytest.fixture
+def mock_model_card():
+    with patch('app.ModelCard') as mock_card_class:
+        mock_card_instance = MagicMock()
+        mock_card_class.return_value = mock_card_instance
+        yield mock_card_class
+@pytest.fixture
+def mock_gr_oauth_token():
+    mock_token = MagicMock(spec=gr.OAuthToken)
+    mock_token.token = "test_token"
+    return mock_token
+# --- Test get_quantization_recipe ---
+def test_get_quantization_recipe_awq():
+    recipe = get_quantization_recipe("AWQ", "LlamaForCausalLM")
+    assert len(recipe) == 1
+    assert isinstance(recipe[0], AWQModifier)
+def test_get_quantization_recipe_gptq():
+    recipe = get_quantization_recipe("GPTQ", "LlamaForCausalLM")
+    assert len(recipe) == 1
+    assert isinstance(recipe[0], GPTQModifier)
+def test_get_quantization_recipe_gptq_mistral():
+    recipe = get_quantization_recipe("GPTQ", "MistralForCausalLM")
+    assert len(recipe) == 1
+    assert isinstance(recipe[0], GPTQModifier)
+    assert recipe[0].sequential_targets == ["MistralDecoderLayer"]
+def test_get_quantization_recipe_gptq_mixtral():
+    recipe = get_quantization_recipe("GPTQ", "MixtralForCausalLM")
+    assert len(recipe) == 1
+    assert isinstance(recipe[0], GPTQModifier)
+    assert recipe[0].sequential_targets == ["MixtralDecoderLayer"]
+def test_get_quantization_recipe_fp8():
+    recipe = get_quantization_recipe("FP8", "LlamaForCausalLM")
+    assert len(recipe) == 1
+    assert isinstance(recipe[0], QuantizationModifier)
+    assert recipe[0].scheme == "FP8"
+    assert recipe[0].ignore == ["lm_head"]
+def test_get_quantization_recipe_fp8_mixtral():
+    recipe = get_quantization_recipe("FP8", "MixtralForCausalLM")
+    assert len(recipe) == 1
+    assert isinstance(recipe[0], QuantizationModifier)
+    assert recipe[0].scheme == "FP8"
+    assert "re:.*block_sparse_moe.gate" in recipe[0].ignore
+def test_get_quantization_recipe_unsupported():
+    with pytest.raises(ValueError, match="Unsupported quantization method: INVALID"):
+        get_quantization_recipe("INVALID", "LlamaForCausalLM")
+# --- Test compress_and_upload ---
+def test_compress_and_upload_no_model_id(mock_gr_oauth_token):
+    with pytest.raises(gr.Error, match="Please select a model from the search bar."):
+        compress_and_upload("", "AWQ", mock_gr_oauth_token)
+def test_compress_and_upload_no_oauth_token():
+    with pytest.raises(gr.Error, match="Authentication error. Please log in to continue."):
+        compress_and_upload("test_model", "AWQ", None)
+def test_compress_and_upload_success(
+    mock_hf_api,
+    mock_whoami,
+    mock_auto_model_for_causal_lm,
+    mock_oneshot,
+    mock_model_card,
+    mock_gr_oauth_token,
+):
+    model_id = "org/test_model"
+    quant_method = "AWQ"
+    result = compress_and_upload(model_id, quant_method, mock_gr_oauth_token)
+    mock_whoami.assert_called_once_with(token="test_token")
+    mock_auto_model_for_causal_lm.from_pretrained.assert_called_once_with(
+        model_id, torch_dtype="auto", device_map=None, token="test_token"
+    )
+    mock_oneshot.assert_called_once()
+    assert mock_oneshot.call_args[1]["model"] == mock_auto_model_for_causal_lm.from_pretrained.return_value
+    assert mock_oneshot.call_args[1]["recipe"] is not None
+    assert mock_oneshot.call_args[1]["output_dir"] == f"test_model-{quant_method}"
+    mock_hf_api.create_repo.assert_called_once_with(
+        repo_id=f"test_user/test_model-{quant_method}", exist_ok=True
+    )
+    mock_hf_api.upload_folder.assert_called_once_with(
+        folder_path=f"test_model-{quant_method}",
+        repo_id=f"test_user/test_model-{quant_method}",
+        commit_message=f"Upload {quant_method} compressed model",
+    )
+    mock_model_card.assert_called_once()
+    mock_model_card.return_value.push_to_hub.assert_called_once_with(
+        f"test_user/test_model-{quant_method}", token="test_token"
+    )
+    assert "✅ Success!" in result
+    assert "https://huggingface.co/test_user/test_model-AWQ" in result
+def test_compress_and_upload_model_no_architecture(
+    mock_hf_api,
+    mock_whoami,
+    mock_auto_model_for_causal_lm,
+    mock_gr_oauth_token,
+):
+    mock_auto_model_for_causal_lm.from_pretrained.return_value.config.architectures = []
+    with pytest.raises(gr.Error, match="Could not determine model architecture."):
+        compress_and_upload("test_model", "AWQ", mock_gr_oauth_token)
+def test_compress_and_upload_generic_exception(
+    mock_hf_api,
+    mock_whoami,
+    mock_auto_model_for_causal_lm,
+    mock_gr_oauth_token,
+):
+    mock_whoami.side_effect = Exception("Network error")
+    result = compress_and_upload("test_model", "AWQ", mock_gr_oauth_token)
+    assert "❌ ERROR" in result
+    assert "Network error" in result