gary-boon Claude Opus 4.5 commited on
Commit
cb6f39c
·
1 Parent(s): 5333b21

fix: Convert bfloat16 to float32 for numpy compatibility

Browse files

NumPy doesn't support bfloat16 directly. Add .float() conversion
before .numpy() calls in attention/activation extraction.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <[email protected]>

Files changed (1) hide show
  1. backend/model_service.py +8 -6
backend/model_service.py CHANGED
@@ -225,7 +225,7 @@ class ModelManager:
225
 
226
  # Average across all heads for visualization
227
  # Shape: (batch_size, num_heads, seq_len, seq_len) -> (seq_len, seq_len)
228
- avg_attention = layer_attention[0].mean(dim=0).detach().cpu().numpy()
229
 
230
  # Don't sample if we have complete attention - we want the full matrix
231
  # Only sample if the matrix is very large (>100x100)
@@ -263,7 +263,7 @@ class ModelManager:
263
 
264
  def extract_activation_trace(self, layer_idx: int, hidden_states) -> TraceData:
265
  """Extract activation pattern trace from hidden states"""
266
- activations = hidden_states[0].detach().cpu().numpy()
267
 
268
  # Handle potential overflow and get safe mean
269
  try:
@@ -1650,7 +1650,8 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
1650
  confidence = 0.0 if math.isnan(confidence) or math.isinf(confidence) else confidence
1651
 
1652
  # Get full attention weights for this head [seq_len, seq_len]
1653
- attention_matrix = layer_attn[head_idx].cpu().numpy().tolist()
 
1654
 
1655
  # Get Q/K/V for this head if available
1656
  q_matrix = None
@@ -1658,9 +1659,10 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
1658
  v_matrix = None
1659
  if layer_idx in qkv_captures:
1660
  # Q/K/V shape: [seq_len, n_heads, head_dim]
1661
- q_matrix = qkv_captures[layer_idx]['q'][:, head_idx, :].numpy().tolist()
1662
- k_matrix = qkv_captures[layer_idx]['k'][:, head_idx, :].numpy().tolist()
1663
- v_matrix = qkv_captures[layer_idx]['v'][:, head_idx, :].numpy().tolist()
 
1664
 
1665
  critical_heads.append({
1666
  "head_idx": head_idx,
 
225
 
226
  # Average across all heads for visualization
227
  # Shape: (batch_size, num_heads, seq_len, seq_len) -> (seq_len, seq_len)
228
+ avg_attention = layer_attention[0].mean(dim=0).detach().cpu().float().numpy()
229
 
230
  # Don't sample if we have complete attention - we want the full matrix
231
  # Only sample if the matrix is very large (>100x100)
 
263
 
264
  def extract_activation_trace(self, layer_idx: int, hidden_states) -> TraceData:
265
  """Extract activation pattern trace from hidden states"""
266
+ activations = hidden_states[0].detach().cpu().float().numpy()
267
 
268
  # Handle potential overflow and get safe mean
269
  try:
 
1650
  confidence = 0.0 if math.isnan(confidence) or math.isinf(confidence) else confidence
1651
 
1652
  # Get full attention weights for this head [seq_len, seq_len]
1653
+ # Convert to float32 for numpy (bfloat16 not supported)
1654
+ attention_matrix = layer_attn[head_idx].cpu().float().numpy().tolist()
1655
 
1656
  # Get Q/K/V for this head if available
1657
  q_matrix = None
 
1659
  v_matrix = None
1660
  if layer_idx in qkv_captures:
1661
  # Q/K/V shape: [seq_len, n_heads, head_dim]
1662
+ # Convert to float32 for numpy (bfloat16 not supported)
1663
+ q_matrix = qkv_captures[layer_idx]['q'][:, head_idx, :].float().numpy().tolist()
1664
+ k_matrix = qkv_captures[layer_idx]['k'][:, head_idx, :].float().numpy().tolist()
1665
+ v_matrix = qkv_captures[layer_idx]['v'][:, head_idx, :].float().numpy().tolist()
1666
 
1667
  critical_heads.append({
1668
  "head_idx": head_idx,