gary-boon Claude Opus 4.5 commited on
Commit
c6f4cc5
·
1 Parent(s): e20ccaf

Add tokenSections boundaries and update system prompt

Browse files

- Return tokenSections in research endpoint response with boundaries
for system prompt, user prompt, and output sections
- Estimate system prompt boundary for Devstral using MistralTokenizer
- Update default system prompt to handle both code completion and
instruction-style prompts

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <[email protected]>

backend/model_config.py CHANGED
@@ -88,7 +88,7 @@ SUPPORTED_MODELS: Dict[str, ModelConfig] = {
88
  "recommended_dtype": "bf16", # Devstral requires bfloat16
89
  "uses_chat_template": True, # Instruction-tuned, requires chat format
90
  "prompt_style": "instruction", # Requires system + user messages
91
- "system_prompt": "You are an expert Python programmer. Continue the code provided by the user. Output only valid Python code, no explanations or markdown.",
92
  "recommended_temperature": 0.15 # Devstral recommended temperature
93
  }
94
  }
 
88
  "recommended_dtype": "bf16", # Devstral requires bfloat16
89
  "uses_chat_template": True, # Instruction-tuned, requires chat format
90
  "prompt_style": "instruction", # Requires system + user messages
91
+ "system_prompt": "You are an expert Python programmer. If given partial code, continue it. If given a description or request, write the appropriate implementation. Output only valid Python code, no explanations or markdown.",
92
  "recommended_temperature": 0.15 # Devstral recommended temperature
93
  }
94
  }
backend/model_service.py CHANGED
@@ -1801,6 +1801,54 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
1801
 
1802
  generation_time = time.time() - start_time
1803
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1804
  # Build response
1805
  response = {
1806
  "prompt": prompt,
@@ -1808,6 +1856,7 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
1808
  for tid, t in zip(prompt_token_ids, prompt_tokens)],
1809
  "generatedTokens": [{"text": t, "idx": tid, "bytes": len(t.encode('utf-8')), "type": "generated"}
1810
  for tid, t in zip(generated_token_ids, generated_tokens)],
 
1811
  "tokenAlternatives": token_alternatives_by_step, # Top-k alternatives for each token
1812
  "layersDataByStep": layer_data_by_token, # Layer data for ALL generation steps
1813
  "layersData": layer_data_by_token[-1] if layer_data_by_token else [], # Keep for backward compatibility
 
1801
 
1802
  generation_time = time.time() - start_time
1803
 
1804
+ # Calculate token section boundaries for UI display
1805
+ total_tokens = prompt_length + len(generated_token_ids)
1806
+ system_prompt_text = system_prompt_override or (model_config.get("system_prompt") if model_config else None)
1807
+
1808
+ # For instruction models, estimate where system prompt ends
1809
+ # This is approximate due to control tokens in chat templates
1810
+ system_prompt_end = 0
1811
+ if prompt_style == "instruction" and system_prompt_text:
1812
+ if manager.model_id == "devstral-small" and manager.mistral_tokenizer is not None:
1813
+ # For Devstral, try encoding with empty system to estimate boundary
1814
+ try:
1815
+ no_system_tokens = manager.mistral_tokenizer.encode_chat("", prompt)
1816
+ system_prompt_end = prompt_length - len(no_system_tokens)
1817
+ # Ensure non-negative and within bounds
1818
+ system_prompt_end = max(0, min(system_prompt_end, prompt_length))
1819
+ logger.info(f"Estimated system prompt boundary: {system_prompt_end} tokens")
1820
+ except Exception as e:
1821
+ logger.warning(f"Could not estimate system prompt boundary: {e}")
1822
+ system_prompt_end = 0
1823
+ else:
1824
+ # For other instruction models, rough estimate based on character ratio
1825
+ # This is very approximate but provides some visual separation
1826
+ total_chars = len(system_prompt_text or "") + len(prompt)
1827
+ if total_chars > 0:
1828
+ system_ratio = len(system_prompt_text or "") / total_chars
1829
+ system_prompt_end = int(prompt_length * system_ratio)
1830
+
1831
+ token_sections = {
1832
+ "systemPrompt": {
1833
+ "start": 0,
1834
+ "end": system_prompt_end,
1835
+ "text": system_prompt_text,
1836
+ "tokenCount": system_prompt_end
1837
+ },
1838
+ "userPrompt": {
1839
+ "start": system_prompt_end,
1840
+ "end": prompt_length,
1841
+ "text": prompt,
1842
+ "tokenCount": prompt_length - system_prompt_end
1843
+ },
1844
+ "output": {
1845
+ "start": prompt_length,
1846
+ "end": total_tokens,
1847
+ "text": "".join(generated_tokens),
1848
+ "tokenCount": len(generated_token_ids)
1849
+ }
1850
+ }
1851
+
1852
  # Build response
1853
  response = {
1854
  "prompt": prompt,
 
1856
  for tid, t in zip(prompt_token_ids, prompt_tokens)],
1857
  "generatedTokens": [{"text": t, "idx": tid, "bytes": len(t.encode('utf-8')), "type": "generated"}
1858
  for tid, t in zip(generated_token_ids, generated_tokens)],
1859
+ "tokenSections": token_sections, # Section boundaries for UI coloring
1860
  "tokenAlternatives": token_alternatives_by_step, # Top-k alternatives for each token
1861
  "layersDataByStep": layer_data_by_token, # Layer data for ALL generation steps
1862
  "layersData": layer_data_by_token[-1] if layer_data_by_token else [], # Keep for backward compatibility
backend/prompt_formatter.py CHANGED
@@ -74,7 +74,7 @@ class PromptFormatter:
74
  # Get system prompt (override > model default > generic fallback)
75
  system_prompt = system_prompt_override or model_config.get("system_prompt")
76
  if not system_prompt:
77
- system_prompt = "You are a helpful coding assistant. Continue the code provided."
78
 
79
  # Build messages list
80
  messages = [
 
74
  # Get system prompt (override > model default > generic fallback)
75
  system_prompt = system_prompt_override or model_config.get("system_prompt")
76
  if not system_prompt:
77
+ system_prompt = "You are a helpful coding assistant. If given partial code, continue it. If given a description or request, write the appropriate implementation."
78
 
79
  # Build messages list
80
  messages = [