Sudheer-N commited on
Commit
795fd05
·
1 Parent(s): ab642aa

Fix: set HF cache to /app/cache to avoid permission error

Browse files
Files changed (2) hide show
  1. Dockerfile +3 -1
  2. app.py +5 -0
Dockerfile CHANGED
@@ -5,8 +5,10 @@ RUN pip install --no-cache-dir -r requirements.txt
5
 
6
  COPY app.py .
7
 
8
- # Hugging Face model cache
 
9
  RUN mkdir -p /app/cache
 
10
  ENV TRANSFORMERS_CACHE=/app/cache
11
 
12
  EXPOSE 7860
 
5
 
6
  COPY app.py .
7
 
8
+
9
+ # Create cache dir and set env vars so HF/Transformers don't use `/`
10
  RUN mkdir -p /app/cache
11
+ ENV HF_HOME=/app/cache
12
  ENV TRANSFORMERS_CACHE=/app/cache
13
 
14
  EXPOSE 7860
app.py CHANGED
@@ -3,6 +3,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
3
  from peft import PeftModel
4
  import torch
5
  from pydantic import BaseModel
 
 
 
6
 
7
  app = FastAPI()
8
 
@@ -23,6 +26,8 @@ def generate_text(request: GenerationRequest):
23
  inputs = tokenizer(request.prompt, return_tensors="pt")
24
  outputs = model.generate(**inputs, max_length=request.max_length)
25
  text = tokenizer.decode(outputs[0], skip_special_tokens=True, do_sample=True, top_k=20)
 
 
26
  return {"generated_text": text}
27
 
28
 
 
3
  from peft import PeftModel
4
  import torch
5
  from pydantic import BaseModel
6
+ import os
7
+ os.environ["HF_HOME"] = "/app/cache"
8
+ os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
9
 
10
  app = FastAPI()
11
 
 
26
  inputs = tokenizer(request.prompt, return_tensors="pt")
27
  outputs = model.generate(**inputs, max_length=request.max_length)
28
  text = tokenizer.decode(outputs[0], skip_special_tokens=True, do_sample=True, top_k=20)
29
+ print("Input Prompt:", request.prompt)
30
+ print("Generated Text:", text)
31
  return {"generated_text": text}
32
 
33