Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
gary-boon
Claude Opus 4.5
commited on
Commit
·
cb6f39c
1
Parent(s):
5333b21
fix: Convert bfloat16 to float32 for numpy compatibility
Browse filesNumPy doesn't support bfloat16 directly. Add .float() conversion
before .numpy() calls in attention/activation extraction.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <[email protected]>
- backend/model_service.py +8 -6
backend/model_service.py
CHANGED
|
@@ -225,7 +225,7 @@ class ModelManager:
|
|
| 225 |
|
| 226 |
# Average across all heads for visualization
|
| 227 |
# Shape: (batch_size, num_heads, seq_len, seq_len) -> (seq_len, seq_len)
|
| 228 |
-
avg_attention = layer_attention[0].mean(dim=0).detach().cpu().numpy()
|
| 229 |
|
| 230 |
# Don't sample if we have complete attention - we want the full matrix
|
| 231 |
# Only sample if the matrix is very large (>100x100)
|
|
@@ -263,7 +263,7 @@ class ModelManager:
|
|
| 263 |
|
| 264 |
def extract_activation_trace(self, layer_idx: int, hidden_states) -> TraceData:
|
| 265 |
"""Extract activation pattern trace from hidden states"""
|
| 266 |
-
activations = hidden_states[0].detach().cpu().numpy()
|
| 267 |
|
| 268 |
# Handle potential overflow and get safe mean
|
| 269 |
try:
|
|
@@ -1650,7 +1650,8 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
|
|
| 1650 |
confidence = 0.0 if math.isnan(confidence) or math.isinf(confidence) else confidence
|
| 1651 |
|
| 1652 |
# Get full attention weights for this head [seq_len, seq_len]
|
| 1653 |
-
|
|
|
|
| 1654 |
|
| 1655 |
# Get Q/K/V for this head if available
|
| 1656 |
q_matrix = None
|
|
@@ -1658,9 +1659,10 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
|
|
| 1658 |
v_matrix = None
|
| 1659 |
if layer_idx in qkv_captures:
|
| 1660 |
# Q/K/V shape: [seq_len, n_heads, head_dim]
|
| 1661 |
-
|
| 1662 |
-
|
| 1663 |
-
|
|
|
|
| 1664 |
|
| 1665 |
critical_heads.append({
|
| 1666 |
"head_idx": head_idx,
|
|
|
|
| 225 |
|
| 226 |
# Average across all heads for visualization
|
| 227 |
# Shape: (batch_size, num_heads, seq_len, seq_len) -> (seq_len, seq_len)
|
| 228 |
+
avg_attention = layer_attention[0].mean(dim=0).detach().cpu().float().numpy()
|
| 229 |
|
| 230 |
# Don't sample if we have complete attention - we want the full matrix
|
| 231 |
# Only sample if the matrix is very large (>100x100)
|
|
|
|
| 263 |
|
| 264 |
def extract_activation_trace(self, layer_idx: int, hidden_states) -> TraceData:
|
| 265 |
"""Extract activation pattern trace from hidden states"""
|
| 266 |
+
activations = hidden_states[0].detach().cpu().float().numpy()
|
| 267 |
|
| 268 |
# Handle potential overflow and get safe mean
|
| 269 |
try:
|
|
|
|
| 1650 |
confidence = 0.0 if math.isnan(confidence) or math.isinf(confidence) else confidence
|
| 1651 |
|
| 1652 |
# Get full attention weights for this head [seq_len, seq_len]
|
| 1653 |
+
# Convert to float32 for numpy (bfloat16 not supported)
|
| 1654 |
+
attention_matrix = layer_attn[head_idx].cpu().float().numpy().tolist()
|
| 1655 |
|
| 1656 |
# Get Q/K/V for this head if available
|
| 1657 |
q_matrix = None
|
|
|
|
| 1659 |
v_matrix = None
|
| 1660 |
if layer_idx in qkv_captures:
|
| 1661 |
# Q/K/V shape: [seq_len, n_heads, head_dim]
|
| 1662 |
+
# Convert to float32 for numpy (bfloat16 not supported)
|
| 1663 |
+
q_matrix = qkv_captures[layer_idx]['q'][:, head_idx, :].float().numpy().tolist()
|
| 1664 |
+
k_matrix = qkv_captures[layer_idx]['k'][:, head_idx, :].float().numpy().tolist()
|
| 1665 |
+
v_matrix = qkv_captures[layer_idx]['v'][:, head_idx, :].float().numpy().tolist()
|
| 1666 |
|
| 1667 |
critical_heads.append({
|
| 1668 |
"head_idx": head_idx,
|