Spaces:

visualisable-ai
/

api

Running on CPU Upgrade

gary-boon Claude Opus 4.5 commited on 7 days ago

Commit

cb6f39c

1 Parent(s): 5333b21

fix: Convert bfloat16 to float32 for numpy compatibility

NumPy doesn't support bfloat16 directly. Add .float() conversion
before .numpy() calls in attention/activation extraction.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <[email protected]>

Files changed (1) hide show

backend/model_service.py +8 -6

backend/model_service.py CHANGED Viewed

@@ -225,7 +225,7 @@ class ModelManager:
         # Average across all heads for visualization
         # Shape: (batch_size, num_heads, seq_len, seq_len) -> (seq_len, seq_len)
-        avg_attention = layer_attention[0].mean(dim=0).detach().cpu().numpy()
         # Don't sample if we have complete attention - we want the full matrix
         # Only sample if the matrix is very large (>100x100)
@@ -263,7 +263,7 @@ class ModelManager:
     def extract_activation_trace(self, layer_idx: int, hidden_states) -> TraceData:
         """Extract activation pattern trace from hidden states"""
-        activations = hidden_states[0].detach().cpu().numpy()
         # Handle potential overflow and get safe mean
         try:
@@ -1650,7 +1650,8 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
                         confidence = 0.0 if math.isnan(confidence) or math.isinf(confidence) else confidence
                         # Get full attention weights for this head [seq_len, seq_len]
-                        attention_matrix = layer_attn[head_idx].cpu().numpy().tolist()
                         # Get Q/K/V for this head if available
                         q_matrix = None
@@ -1658,9 +1659,10 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
                         v_matrix = None
                         if layer_idx in qkv_captures:
                             # Q/K/V shape: [seq_len, n_heads, head_dim]
-                            q_matrix = qkv_captures[layer_idx]['q'][:, head_idx, :].numpy().tolist()
-                            k_matrix = qkv_captures[layer_idx]['k'][:, head_idx, :].numpy().tolist()
-                            v_matrix = qkv_captures[layer_idx]['v'][:, head_idx, :].numpy().tolist()
                         critical_heads.append({
                             "head_idx": head_idx,

         # Average across all heads for visualization
         # Shape: (batch_size, num_heads, seq_len, seq_len) -> (seq_len, seq_len)
+        avg_attention = layer_attention[0].mean(dim=0).detach().cpu().float().numpy()
         # Don't sample if we have complete attention - we want the full matrix
         # Only sample if the matrix is very large (>100x100)
     def extract_activation_trace(self, layer_idx: int, hidden_states) -> TraceData:
         """Extract activation pattern trace from hidden states"""
+        activations = hidden_states[0].detach().cpu().float().numpy()
         # Handle potential overflow and get safe mean
         try:
                         confidence = 0.0 if math.isnan(confidence) or math.isinf(confidence) else confidence
                         # Get full attention weights for this head [seq_len, seq_len]
+                        # Convert to float32 for numpy (bfloat16 not supported)
+                        attention_matrix = layer_attn[head_idx].cpu().float().numpy().tolist()
                         # Get Q/K/V for this head if available
                         q_matrix = None
                         v_matrix = None
                         if layer_idx in qkv_captures:
                             # Q/K/V shape: [seq_len, n_heads, head_dim]
+                            # Convert to float32 for numpy (bfloat16 not supported)
+                            q_matrix = qkv_captures[layer_idx]['q'][:, head_idx, :].float().numpy().tolist()
+                            k_matrix = qkv_captures[layer_idx]['k'][:, head_idx, :].float().numpy().tolist()
+                            v_matrix = qkv_captures[layer_idx]['v'][:, head_idx, :].float().numpy().tolist()
                         critical_heads.append({
                             "head_idx": head_idx,