alex4cip Claude commited on
Commit
7cf114c
·
1 Parent(s): b44b68c

fix: Add attention_mask to improve generation reliability

Browse files

Warning: The attention mask is not set and cannot be inferred from input

🔧 Fix:
- Use tokenizer() instead of tokenizer.encode()
- Explicitly create and pass attention_mask to generate()
- Add padding=True to ensure proper mask generation
- Pass attention_mask parameter to model.generate()

✨ Benefits:
- More reliable text generation
- Eliminates attention mask warning
- Proper handling of padded sequences
- Better model behavior with variable length inputs

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <[email protected]>

Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -106,18 +106,22 @@ def generate_response_impl(message, history):
106
 
107
  conversation += f"사용자: {message}\n어시스턴트:"
108
 
109
- # Tokenize
110
- inputs = current_tokenizer.encode(
111
  conversation,
112
  return_tensors="pt",
113
  truncation=True,
114
  max_length=512,
115
- ).to(device)
 
 
 
116
 
117
  # Generate response
118
  with torch.no_grad():
119
  outputs = current_model.generate(
120
  inputs,
 
121
  max_new_tokens=MODEL_CONFIG["max_length"],
122
  temperature=0.7,
123
  top_p=0.9,
 
106
 
107
  conversation += f"사용자: {message}\n어시스턴트:"
108
 
109
+ # Tokenize with attention_mask
110
+ encoded = current_tokenizer(
111
  conversation,
112
  return_tensors="pt",
113
  truncation=True,
114
  max_length=512,
115
+ padding=True,
116
+ )
117
+ inputs = encoded['input_ids'].to(device)
118
+ attention_mask = encoded['attention_mask'].to(device)
119
 
120
  # Generate response
121
  with torch.no_grad():
122
  outputs = current_model.generate(
123
  inputs,
124
+ attention_mask=attention_mask,
125
  max_new_tokens=MODEL_CONFIG["max_length"],
126
  temperature=0.7,
127
  top_p=0.9,