# Docker Compose for DGX Spark deployment # # Usage: # docker compose -f docker/compose.spark.yml --env-file .env.spark up -d --build # # Multi-instance (different branches): # PORT=8001 docker compose -p visai-branch-a -f docker/compose.spark.yml --env-file .env.spark up -d --build services: visualisable-ai-backend: build: context: .. dockerfile: docker/Dockerfile.spark ports: - "${PORT:-8000}:${PORT:-8000}" environment: - PORT=${PORT:-8000} - DEFAULT_MODEL=${DEFAULT_MODEL:-codegen-350m} - TORCH_DTYPE=${TORCH_DTYPE:-fp16} - MAX_CONTEXT=${MAX_CONTEXT:-8192} - BATCH_SIZE=${BATCH_SIZE:-1} - API_KEY=${API_KEY} - HF_TOKEN=${HF_TOKEN} # HuggingFace cache locations (inside container) - TRANSFORMERS_CACHE=/models-cache - HF_HOME=/models-cache - HUGGINGFACE_HUB_CACHE=/models-cache volumes: # Persistent model cache (shared across instances) - /srv/models-cache/huggingface:/models-cache # Runtime outputs - ./runs:/app/runs deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-f", "http://localhost:${PORT:-8000}/health"] interval: 30s timeout: 3s start_period: 10s retries: 3 restart: unless-stopped # Override entrypoint to use model_service on configurable port command: > uvicorn backend.model_service:app --host 0.0.0.0 --port ${PORT:-8000}