Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| # Docker Compose for DGX Spark deployment | |
| # | |
| # Usage: | |
| # docker compose -f docker/compose.spark.yml --env-file .env.spark up -d --build | |
| # | |
| # Multi-instance (different branches): | |
| # PORT=8001 docker compose -p visai-branch-a -f docker/compose.spark.yml --env-file .env.spark up -d --build | |
| services: | |
| visualisable-ai-backend: | |
| build: | |
| context: .. | |
| dockerfile: docker/Dockerfile.spark | |
| ports: | |
| - "${PORT:-8000}:${PORT:-8000}" | |
| environment: | |
| - PORT=${PORT:-8000} | |
| - DEFAULT_MODEL=${DEFAULT_MODEL:-codegen-350m} | |
| - TORCH_DTYPE=${TORCH_DTYPE:-fp16} | |
| - MAX_CONTEXT=${MAX_CONTEXT:-8192} | |
| - BATCH_SIZE=${BATCH_SIZE:-1} | |
| - API_KEY=${API_KEY} | |
| - HF_TOKEN=${HF_TOKEN} | |
| # HuggingFace cache locations (inside container) | |
| - TRANSFORMERS_CACHE=/models-cache | |
| - HF_HOME=/models-cache | |
| - HUGGINGFACE_HUB_CACHE=/models-cache | |
| volumes: | |
| # Persistent model cache (shared across instances) | |
| - /srv/models-cache/huggingface:/models-cache | |
| # Runtime outputs | |
| - ./runs:/app/runs | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| count: all | |
| capabilities: [gpu] | |
| healthcheck: | |
| test: ["CMD", "curl", "-f", "http://localhost:${PORT:-8000}/health"] | |
| interval: 30s | |
| timeout: 3s | |
| start_period: 10s | |
| retries: 3 | |
| restart: unless-stopped | |
| # Override entrypoint to use model_service on configurable port | |
| command: > | |
| uvicorn backend.model_service:app | |
| --host 0.0.0.0 | |
| --port ${PORT:-8000} | |