Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
gary-boon
Claude Opus 4.5
commited on
Commit
·
5333b21
1
Parent(s):
ba27c0c
fix: Use eager attention for output_attentions support
Browse filesDevstral defaults to SDPA (flash attention) which doesn't support
output_attentions=True. Force eager attention to enable capturing
attention weights for visualization.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <[email protected]>
- backend/model_service.py +3 -1
backend/model_service.py
CHANGED
|
@@ -189,11 +189,13 @@ class ModelManager:
|
|
| 189 |
logger.info(f" Max context: {self.max_context}, Batch size: {self.batch_size}")
|
| 190 |
|
| 191 |
# Load model with configured dtype
|
|
|
|
| 192 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 193 |
self.model_name,
|
| 194 |
torch_dtype=self.dtype,
|
| 195 |
low_cpu_mem_usage=True,
|
| 196 |
-
trust_remote_code=True
|
|
|
|
| 197 |
).to(self.device)
|
| 198 |
|
| 199 |
# Load tokenizer
|
|
|
|
| 189 |
logger.info(f" Max context: {self.max_context}, Batch size: {self.batch_size}")
|
| 190 |
|
| 191 |
# Load model with configured dtype
|
| 192 |
+
# Use eager attention to support output_attentions=True for visualization
|
| 193 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 194 |
self.model_name,
|
| 195 |
torch_dtype=self.dtype,
|
| 196 |
low_cpu_mem_usage=True,
|
| 197 |
+
trust_remote_code=True,
|
| 198 |
+
attn_implementation="eager"
|
| 199 |
).to(self.device)
|
| 200 |
|
| 201 |
# Load tokenizer
|