File size: 1,586 Bytes
a2bd186
 
 
 
 
 
 
 
 
 
 
 
9377cd8
a2bd186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Docker Compose for DGX Spark deployment
#
# Usage:
#   docker compose -f docker/compose.spark.yml --env-file .env.spark up -d --build
#
# Multi-instance (different branches):
#   PORT=8001 docker compose -p visai-branch-a -f docker/compose.spark.yml --env-file .env.spark up -d --build

services:
  visualisable-ai-backend:
    build:
      context: ..
      dockerfile: docker/Dockerfile.spark
    ports:
      - "${PORT:-8000}:${PORT:-8000}"
    environment:
      - PORT=${PORT:-8000}
      - DEFAULT_MODEL=${DEFAULT_MODEL:-codegen-350m}
      - TORCH_DTYPE=${TORCH_DTYPE:-fp16}
      - MAX_CONTEXT=${MAX_CONTEXT:-8192}
      - BATCH_SIZE=${BATCH_SIZE:-1}
      - API_KEY=${API_KEY}
      - HF_TOKEN=${HF_TOKEN}
      # HuggingFace cache locations (inside container)
      - TRANSFORMERS_CACHE=/models-cache
      - HF_HOME=/models-cache
      - HUGGINGFACE_HUB_CACHE=/models-cache
    volumes:
      # Persistent model cache (shared across instances)
      - /srv/models-cache/huggingface:/models-cache
      # Runtime outputs
      - ./runs:/app/runs
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:${PORT:-8000}/health"]
      interval: 30s
      timeout: 3s
      start_period: 10s
      retries: 3
    restart: unless-stopped
    # Override entrypoint to use model_service on configurable port
    command: >
      uvicorn backend.model_service:app
      --host 0.0.0.0
      --port ${PORT:-8000}