Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
gary-boon
Claude Opus 4.5
commited on
Commit
·
c6f4cc5
1
Parent(s):
e20ccaf
Add tokenSections boundaries and update system prompt
Browse files- Return tokenSections in research endpoint response with boundaries
for system prompt, user prompt, and output sections
- Estimate system prompt boundary for Devstral using MistralTokenizer
- Update default system prompt to handle both code completion and
instruction-style prompts
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <[email protected]>
- backend/model_config.py +1 -1
- backend/model_service.py +49 -0
- backend/prompt_formatter.py +1 -1
backend/model_config.py
CHANGED
|
@@ -88,7 +88,7 @@ SUPPORTED_MODELS: Dict[str, ModelConfig] = {
|
|
| 88 |
"recommended_dtype": "bf16", # Devstral requires bfloat16
|
| 89 |
"uses_chat_template": True, # Instruction-tuned, requires chat format
|
| 90 |
"prompt_style": "instruction", # Requires system + user messages
|
| 91 |
-
"system_prompt": "You are an expert Python programmer.
|
| 92 |
"recommended_temperature": 0.15 # Devstral recommended temperature
|
| 93 |
}
|
| 94 |
}
|
|
|
|
| 88 |
"recommended_dtype": "bf16", # Devstral requires bfloat16
|
| 89 |
"uses_chat_template": True, # Instruction-tuned, requires chat format
|
| 90 |
"prompt_style": "instruction", # Requires system + user messages
|
| 91 |
+
"system_prompt": "You are an expert Python programmer. If given partial code, continue it. If given a description or request, write the appropriate implementation. Output only valid Python code, no explanations or markdown.",
|
| 92 |
"recommended_temperature": 0.15 # Devstral recommended temperature
|
| 93 |
}
|
| 94 |
}
|
backend/model_service.py
CHANGED
|
@@ -1801,6 +1801,54 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
|
|
| 1801 |
|
| 1802 |
generation_time = time.time() - start_time
|
| 1803 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1804 |
# Build response
|
| 1805 |
response = {
|
| 1806 |
"prompt": prompt,
|
|
@@ -1808,6 +1856,7 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
|
|
| 1808 |
for tid, t in zip(prompt_token_ids, prompt_tokens)],
|
| 1809 |
"generatedTokens": [{"text": t, "idx": tid, "bytes": len(t.encode('utf-8')), "type": "generated"}
|
| 1810 |
for tid, t in zip(generated_token_ids, generated_tokens)],
|
|
|
|
| 1811 |
"tokenAlternatives": token_alternatives_by_step, # Top-k alternatives for each token
|
| 1812 |
"layersDataByStep": layer_data_by_token, # Layer data for ALL generation steps
|
| 1813 |
"layersData": layer_data_by_token[-1] if layer_data_by_token else [], # Keep for backward compatibility
|
|
|
|
| 1801 |
|
| 1802 |
generation_time = time.time() - start_time
|
| 1803 |
|
| 1804 |
+
# Calculate token section boundaries for UI display
|
| 1805 |
+
total_tokens = prompt_length + len(generated_token_ids)
|
| 1806 |
+
system_prompt_text = system_prompt_override or (model_config.get("system_prompt") if model_config else None)
|
| 1807 |
+
|
| 1808 |
+
# For instruction models, estimate where system prompt ends
|
| 1809 |
+
# This is approximate due to control tokens in chat templates
|
| 1810 |
+
system_prompt_end = 0
|
| 1811 |
+
if prompt_style == "instruction" and system_prompt_text:
|
| 1812 |
+
if manager.model_id == "devstral-small" and manager.mistral_tokenizer is not None:
|
| 1813 |
+
# For Devstral, try encoding with empty system to estimate boundary
|
| 1814 |
+
try:
|
| 1815 |
+
no_system_tokens = manager.mistral_tokenizer.encode_chat("", prompt)
|
| 1816 |
+
system_prompt_end = prompt_length - len(no_system_tokens)
|
| 1817 |
+
# Ensure non-negative and within bounds
|
| 1818 |
+
system_prompt_end = max(0, min(system_prompt_end, prompt_length))
|
| 1819 |
+
logger.info(f"Estimated system prompt boundary: {system_prompt_end} tokens")
|
| 1820 |
+
except Exception as e:
|
| 1821 |
+
logger.warning(f"Could not estimate system prompt boundary: {e}")
|
| 1822 |
+
system_prompt_end = 0
|
| 1823 |
+
else:
|
| 1824 |
+
# For other instruction models, rough estimate based on character ratio
|
| 1825 |
+
# This is very approximate but provides some visual separation
|
| 1826 |
+
total_chars = len(system_prompt_text or "") + len(prompt)
|
| 1827 |
+
if total_chars > 0:
|
| 1828 |
+
system_ratio = len(system_prompt_text or "") / total_chars
|
| 1829 |
+
system_prompt_end = int(prompt_length * system_ratio)
|
| 1830 |
+
|
| 1831 |
+
token_sections = {
|
| 1832 |
+
"systemPrompt": {
|
| 1833 |
+
"start": 0,
|
| 1834 |
+
"end": system_prompt_end,
|
| 1835 |
+
"text": system_prompt_text,
|
| 1836 |
+
"tokenCount": system_prompt_end
|
| 1837 |
+
},
|
| 1838 |
+
"userPrompt": {
|
| 1839 |
+
"start": system_prompt_end,
|
| 1840 |
+
"end": prompt_length,
|
| 1841 |
+
"text": prompt,
|
| 1842 |
+
"tokenCount": prompt_length - system_prompt_end
|
| 1843 |
+
},
|
| 1844 |
+
"output": {
|
| 1845 |
+
"start": prompt_length,
|
| 1846 |
+
"end": total_tokens,
|
| 1847 |
+
"text": "".join(generated_tokens),
|
| 1848 |
+
"tokenCount": len(generated_token_ids)
|
| 1849 |
+
}
|
| 1850 |
+
}
|
| 1851 |
+
|
| 1852 |
# Build response
|
| 1853 |
response = {
|
| 1854 |
"prompt": prompt,
|
|
|
|
| 1856 |
for tid, t in zip(prompt_token_ids, prompt_tokens)],
|
| 1857 |
"generatedTokens": [{"text": t, "idx": tid, "bytes": len(t.encode('utf-8')), "type": "generated"}
|
| 1858 |
for tid, t in zip(generated_token_ids, generated_tokens)],
|
| 1859 |
+
"tokenSections": token_sections, # Section boundaries for UI coloring
|
| 1860 |
"tokenAlternatives": token_alternatives_by_step, # Top-k alternatives for each token
|
| 1861 |
"layersDataByStep": layer_data_by_token, # Layer data for ALL generation steps
|
| 1862 |
"layersData": layer_data_by_token[-1] if layer_data_by_token else [], # Keep for backward compatibility
|
backend/prompt_formatter.py
CHANGED
|
@@ -74,7 +74,7 @@ class PromptFormatter:
|
|
| 74 |
# Get system prompt (override > model default > generic fallback)
|
| 75 |
system_prompt = system_prompt_override or model_config.get("system_prompt")
|
| 76 |
if not system_prompt:
|
| 77 |
-
system_prompt = "You are a helpful coding assistant.
|
| 78 |
|
| 79 |
# Build messages list
|
| 80 |
messages = [
|
|
|
|
| 74 |
# Get system prompt (override > model default > generic fallback)
|
| 75 |
system_prompt = system_prompt_override or model_config.get("system_prompt")
|
| 76 |
if not system_prompt:
|
| 77 |
+
system_prompt = "You are a helpful coding assistant. If given partial code, continue it. If given a description or request, write the appropriate implementation."
|
| 78 |
|
| 79 |
# Build messages list
|
| 80 |
messages = [
|