Text Generation
Transformers
PyTorch
English
experimental
research
bit-level
transformer
reversible
safety
telemetry
language-modeling
Instructions to use WCNegentropy/BitTransformerLM with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use WCNegentropy/BitTransformerLM with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="WCNegentropy/BitTransformerLM")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("WCNegentropy/BitTransformerLM", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use WCNegentropy/BitTransformerLM with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "WCNegentropy/BitTransformerLM" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "WCNegentropy/BitTransformerLM", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/WCNegentropy/BitTransformerLM
- SGLang
How to use WCNegentropy/BitTransformerLM with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "WCNegentropy/BitTransformerLM" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "WCNegentropy/BitTransformerLM", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "WCNegentropy/BitTransformerLM" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "WCNegentropy/BitTransformerLM", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use WCNegentropy/BitTransformerLM with Docker Model Runner:
docker model run hf.co/WCNegentropy/BitTransformerLM
| #!/usr/bin/env python3 | |
| """ | |
| BitTransformerLM Dual Dashboard Launcher | |
| ======================================== | |
| Launch both Flask and Gradio dashboards simultaneously for maximum flexibility. | |
| - Flask Dashboard: http://localhost:5000 (Docker/production compatible) | |
| - Gradio Dashboard: http://localhost:7860 (HuggingFace Spaces compatible) | |
| - MCP Server: http://localhost:8000 (if enabled) | |
| """ | |
| import os | |
| import sys | |
| import time | |
| import threading | |
| import subprocess | |
| from pathlib import Path | |
| # Add current directory to path | |
| sys.path.insert(0, str(Path(__file__).parent)) | |
| def launch_flask_dashboard(): | |
| """Launch the Flask dashboard in a separate thread.""" | |
| print("π Starting Flask Dashboard...") | |
| try: | |
| os.environ["FLASK_ENV"] = "development" | |
| # Import and run Flask app | |
| from bit_transformer.dashboard_app import app | |
| app.run(host="127.0.0.1", port=5000, debug=False, use_reloader=False) | |
| except Exception as e: | |
| print(f"β Flask dashboard failed: {e}") | |
| def launch_gradio_dashboard(): | |
| """Launch the Gradio dashboard in a separate thread.""" | |
| print("π¨ Starting Gradio Dashboard...") | |
| try: | |
| from gradio_dashboard import run_gradio_server | |
| run_gradio_server(host="127.0.0.1", port=7860, share=False) | |
| except Exception as e: | |
| print(f"β Gradio dashboard failed: {e}") | |
| def launch_mcp_server(): | |
| """Launch the MCP server if requested.""" | |
| print("π Starting MCP Server...") | |
| try: | |
| from mcp_server import app as mcp_app | |
| mcp_app.run(host="127.0.0.1", port=8000, debug=False, use_reloader=False) | |
| except Exception as e: | |
| print(f"β MCP server failed: {e}") | |
| def main(): | |
| """Main launcher function.""" | |
| print("π BitTransformerLM Dual Dashboard Launcher") | |
| print("=" * 50) | |
| # Check requirements | |
| try: | |
| import flask | |
| import gradio | |
| print(f"β Flask {flask.__version__} and Gradio {gradio.__version__} are available") | |
| except ImportError as e: | |
| print(f"β Missing dependencies: {e}") | |
| print("Please run: pip install -r requirements-gradio.txt") | |
| return | |
| # Configuration | |
| enable_flask = os.getenv("ENABLE_FLASK", "true").lower() == "true" | |
| enable_gradio = os.getenv("ENABLE_GRADIO", "true").lower() == "true" | |
| enable_mcp = os.getenv("ENABLE_MCP", "false").lower() == "true" | |
| print(f"π§ Configuration:") | |
| print(f" Flask Dashboard: {'Enabled' if enable_flask else 'Disabled'}") | |
| print(f" Gradio Dashboard: {'Enabled' if enable_gradio else 'Disabled'}") | |
| print(f" MCP Server: {'Enabled' if enable_mcp else 'Disabled'}") | |
| print() | |
| # Launch threads | |
| threads = [] | |
| if enable_flask: | |
| flask_thread = threading.Thread(target=launch_flask_dashboard, daemon=True) | |
| flask_thread.start() | |
| threads.append(("Flask", flask_thread)) | |
| time.sleep(2) # Stagger startup | |
| if enable_mcp: | |
| # Set MCP server address for other components | |
| os.environ["MCP_SERVER_ADDR"] = "http://localhost:8000" | |
| mcp_thread = threading.Thread(target=launch_mcp_server, daemon=True) | |
| mcp_thread.start() | |
| threads.append(("MCP", mcp_thread)) | |
| time.sleep(2) # Stagger startup | |
| if enable_gradio: | |
| gradio_thread = threading.Thread(target=launch_gradio_dashboard, daemon=True) | |
| gradio_thread.start() | |
| threads.append(("Gradio", gradio_thread)) | |
| # Wait for startup | |
| time.sleep(3) | |
| # Print access URLs | |
| print("π Access URLs:") | |
| if enable_flask: | |
| print(" Flask Dashboard: http://localhost:5000") | |
| if enable_gradio: | |
| print(" Gradio Dashboard: http://localhost:7860") | |
| if enable_mcp: | |
| print(" MCP Server: http://localhost:8000") | |
| print() | |
| print("π‘ Usage Tips:") | |
| print(" β’ Flask dashboard: Full Docker/production compatibility") | |
| print(" β’ Gradio dashboard: HuggingFace Spaces ready interface") | |
| print(" β’ MCP server: Programmatic API access") | |
| print(" β’ Both dashboards share the same model state") | |
| print(" β’ Press Ctrl+C to stop all services") | |
| print() | |
| try: | |
| # Keep main thread alive | |
| while True: | |
| time.sleep(1) | |
| except KeyboardInterrupt: | |
| print("\nπ Shutting down all services...") | |
| # Threads will be cleaned up automatically as they are daemon threads | |
| if __name__ == "__main__": | |
| import argparse | |
| parser = argparse.ArgumentParser(description="BitTransformerLM Dual Dashboard") | |
| parser.add_argument("--flask-only", action="store_true", help="Launch only Flask dashboard") | |
| parser.add_argument("--gradio-only", action="store_true", help="Launch only Gradio dashboard") | |
| parser.add_argument("--enable-mcp", action="store_true", help="Enable MCP server") | |
| args = parser.parse_args() | |
| # Override environment based on args | |
| if args.flask_only: | |
| os.environ["ENABLE_FLASK"] = "true" | |
| os.environ["ENABLE_GRADIO"] = "false" | |
| elif args.gradio_only: | |
| os.environ["ENABLE_FLASK"] = "false" | |
| os.environ["ENABLE_GRADIO"] = "true" | |
| if args.enable_mcp: | |
| os.environ["ENABLE_MCP"] = "true" | |
| main() |