api / docker /compose.spark.yml
gary-boon
Add GPU-enabled Dockerfile for Spark
9377cd8
# Docker Compose for DGX Spark deployment
#
# Usage:
# docker compose -f docker/compose.spark.yml --env-file .env.spark up -d --build
#
# Multi-instance (different branches):
# PORT=8001 docker compose -p visai-branch-a -f docker/compose.spark.yml --env-file .env.spark up -d --build
services:
visualisable-ai-backend:
build:
context: ..
dockerfile: docker/Dockerfile.spark
ports:
- "${PORT:-8000}:${PORT:-8000}"
environment:
- PORT=${PORT:-8000}
- DEFAULT_MODEL=${DEFAULT_MODEL:-codegen-350m}
- TORCH_DTYPE=${TORCH_DTYPE:-fp16}
- MAX_CONTEXT=${MAX_CONTEXT:-8192}
- BATCH_SIZE=${BATCH_SIZE:-1}
- API_KEY=${API_KEY}
- HF_TOKEN=${HF_TOKEN}
# HuggingFace cache locations (inside container)
- TRANSFORMERS_CACHE=/models-cache
- HF_HOME=/models-cache
- HUGGINGFACE_HUB_CACHE=/models-cache
volumes:
# Persistent model cache (shared across instances)
- /srv/models-cache/huggingface:/models-cache
# Runtime outputs
- ./runs:/app/runs
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:${PORT:-8000}/health"]
interval: 30s
timeout: 3s
start_period: 10s
retries: 3
restart: unless-stopped
# Override entrypoint to use model_service on configurable port
command: >
uvicorn backend.model_service:app
--host 0.0.0.0
--port ${PORT:-8000}