lulavc commited on
Commit
b8252ed
·
verified ·
1 Parent(s): 13efc9b

Improve: better inpainting (NS algo, larger radius), natural translations (romaji names, honorifics), better text positioning

Browse files
Files changed (1) hide show
  1. app.py +55 -27
app.py CHANGED
@@ -258,10 +258,18 @@ For EACH text region found:
258
  2. original: the exact {source_lang} text
259
  3. translated: natural {target_lang} translation
260
 
 
 
 
 
 
 
 
 
261
  Return a JSON array. Example:
262
  [
263
- {{"bbox": [100, 50, 200, 80], "original": "キャラクター名", "translated": "Character Name"}},
264
- {{"bbox": [300, 100, 400, 130], "original": "説明文", "translated": "Description"}}
265
  ]
266
 
267
  CRITICAL: Find at least 20-50 text regions. This image has many text elements. Scan every corner carefully. Include ALL small labels and character descriptions."""
@@ -324,7 +332,7 @@ CRITICAL: Find at least 20-50 text regions. This image has many text elements. S
324
  # INPAINTING (Optimized for 8 vCPU)
325
  # ============================================================
326
 
327
- def create_text_mask(image: Image.Image, detections: list, padding: int = 8) -> Image.Image:
328
  """Create a mask for inpainting based on detected text regions."""
329
  mask = Image.new('L', image.size, 0)
330
  draw = ImageDraw.Draw(mask)
@@ -333,6 +341,7 @@ def create_text_mask(image: Image.Image, detections: list, padding: int = 8) ->
333
  bbox = det.get('bbox', [])
334
  if len(bbox) == 4:
335
  x1, y1, x2, y2 = [int(v) for v in bbox]
 
336
  x1 = max(0, x1 - padding)
337
  y1 = max(0, y1 - padding)
338
  x2 = min(image.width, x2 + padding)
@@ -342,12 +351,13 @@ def create_text_mask(image: Image.Image, detections: list, padding: int = 8) ->
342
  return mask
343
 
344
  def inpaint_image(image: Image.Image, mask: Image.Image) -> Image.Image:
345
- """Fast inpainting using OpenCV (CPU-optimized, multi-threaded)."""
346
  img_array = np.array(image.convert('RGB'))
347
  mask_array = np.array(mask)
348
 
349
- # Use TELEA algorithm - fast and good quality for text removal
350
- result = cv2.inpaint(img_array, mask_array, inpaintRadius=7, flags=cv2.INPAINT_TELEA)
 
351
  return Image.fromarray(result)
352
 
353
  # ============================================================
@@ -376,7 +386,7 @@ def wrap_text(text: str, font: ImageFont.FreeTypeFont, max_width: int, draw: Ima
376
  return lines if lines else [text]
377
 
378
  def add_translated_text(image: Image.Image, detections: list) -> Image.Image:
379
- """Add translated text to the inpainted image with smart sizing."""
380
  result = image.copy()
381
  draw = ImageDraw.Draw(result)
382
 
@@ -389,47 +399,65 @@ def add_translated_text(image: Image.Image, detections: list) -> Image.Image:
389
  box_width = x2 - x1
390
  box_height = y2 - y1
391
 
392
- # Calculate optimal font size
 
 
 
 
 
 
 
393
  text_len = max(len(translated), 1)
394
- estimated_size = min(
395
- box_height // 3,
396
- int(box_width / text_len * 1.8),
397
- 36
398
- )
 
 
 
 
 
399
  estimated_size = max(10, estimated_size)
400
  font = get_font(estimated_size)
401
 
402
  # Word wrap for long text
403
- lines = wrap_text(translated, font, box_width - 10, draw)
404
 
405
  # Calculate total text height
406
- line_height = estimated_size + 4
407
  total_height = len(lines) * line_height
408
 
409
- # If text doesn't fit, reduce font size
410
- if total_height > box_height - 10:
411
- estimated_size = max(8, int(estimated_size * (box_height - 10) / total_height))
412
  font = get_font(estimated_size)
413
- lines = wrap_text(translated, font, box_width - 10, draw)
414
- line_height = estimated_size + 4
415
  total_height = len(lines) * line_height
416
 
417
- # Center vertically
418
- start_y = y1 + (box_height - total_height) // 2
419
 
420
- # Draw each line
421
  for i, line in enumerate(lines):
422
  text_bbox = draw.textbbox((0, 0), line, font=font)
423
  text_width = text_bbox[2] - text_bbox[0]
424
- text_x = x1 + (box_width - text_width) // 2
425
  text_y = start_y + i * line_height
426
 
427
- # Draw outline for readability
428
- for dx in [-1, 0, 1]:
429
- for dy in [-1, 0, 1]:
 
 
 
 
 
430
  if dx != 0 or dy != 0:
431
  draw.text((text_x + dx, text_y + dy), line, font=font, fill="black")
432
 
 
433
  draw.text((text_x, text_y), line, font=font, fill="white")
434
 
435
  return result
 
258
  2. original: the exact {source_lang} text
259
  3. translated: natural {target_lang} translation
260
 
261
+ TRANSLATION GUIDELINES:
262
+ - Keep character names in ROMAJI (e.g., 田中太郎 → "Tanaka Tarou", not "Rice Field Middle Fat Man")
263
+ - Keep honorifics: -san, -kun, -chan, -sama, -sensei
264
+ - Sound effects: Keep original + add meaning (e.g., "ドキドキ" → "Dokidoki (heart pounding)")
265
+ - Make dialogue natural and conversational, not literal
266
+ - Preserve emotional tone and nuance
267
+ - For titles/roles, translate the meaning (e.g., 社長 → "President", 先生 → "Teacher")
268
+
269
  Return a JSON array. Example:
270
  [
271
+ {{"bbox": [100, 50, 200, 80], "original": "山田花子", "translated": "Yamada Hanako"}},
272
+ {{"bbox": [300, 100, 400, 130], "original": "よろしくお願いします", "translated": "Nice to meet you"}}
273
  ]
274
 
275
  CRITICAL: Find at least 20-50 text regions. This image has many text elements. Scan every corner carefully. Include ALL small labels and character descriptions."""
 
332
  # INPAINTING (Optimized for 8 vCPU)
333
  # ============================================================
334
 
335
+ def create_text_mask(image: Image.Image, detections: list, padding: int = 12) -> Image.Image:
336
  """Create a mask for inpainting based on detected text regions."""
337
  mask = Image.new('L', image.size, 0)
338
  draw = ImageDraw.Draw(mask)
 
341
  bbox = det.get('bbox', [])
342
  if len(bbox) == 4:
343
  x1, y1, x2, y2 = [int(v) for v in bbox]
344
+ # Larger padding for cleaner inpainting
345
  x1 = max(0, x1 - padding)
346
  y1 = max(0, y1 - padding)
347
  x2 = min(image.width, x2 + padding)
 
351
  return mask
352
 
353
  def inpaint_image(image: Image.Image, mask: Image.Image) -> Image.Image:
354
+ """High-quality inpainting using OpenCV (CPU-optimized)."""
355
  img_array = np.array(image.convert('RGB'))
356
  mask_array = np.array(mask)
357
 
358
+ # Use NS (Navier-Stokes) algorithm for better quality on larger areas
359
+ # Increased radius for cleaner text removal
360
+ result = cv2.inpaint(img_array, mask_array, inpaintRadius=12, flags=cv2.INPAINT_NS)
361
  return Image.fromarray(result)
362
 
363
  # ============================================================
 
386
  return lines if lines else [text]
387
 
388
  def add_translated_text(image: Image.Image, detections: list) -> Image.Image:
389
+ """Add translated text to the inpainted image with smart sizing and positioning."""
390
  result = image.copy()
391
  draw = ImageDraw.Draw(result)
392
 
 
399
  box_width = x2 - x1
400
  box_height = y2 - y1
401
 
402
+ # Skip very small boxes
403
+ if box_width < 20 or box_height < 10:
404
+ continue
405
+
406
+ # Detect if vertical text (tall narrow box with short text)
407
+ is_vertical = box_height > box_width * 2 and len(translated) < 10
408
+
409
+ # Calculate optimal font size based on box dimensions
410
  text_len = max(len(translated), 1)
411
+ if is_vertical:
412
+ # Vertical: size based on width
413
+ estimated_size = min(box_width - 4, 24)
414
+ else:
415
+ # Horizontal: balance between height and text length
416
+ estimated_size = min(
417
+ box_height - 4,
418
+ int((box_width / text_len) * 1.5),
419
+ 28
420
+ )
421
  estimated_size = max(10, estimated_size)
422
  font = get_font(estimated_size)
423
 
424
  # Word wrap for long text
425
+ lines = wrap_text(translated, font, box_width - 8, draw)
426
 
427
  # Calculate total text height
428
+ line_height = estimated_size + 2
429
  total_height = len(lines) * line_height
430
 
431
+ # If text doesn't fit, reduce font size progressively
432
+ while total_height > box_height - 6 and estimated_size > 8:
433
+ estimated_size -= 1
434
  font = get_font(estimated_size)
435
+ lines = wrap_text(translated, font, box_width - 8, draw)
436
+ line_height = estimated_size + 2
437
  total_height = len(lines) * line_height
438
 
439
+ # Center vertically and horizontally
440
+ start_y = y1 + max(2, (box_height - total_height) // 2)
441
 
442
+ # Draw each line centered
443
  for i, line in enumerate(lines):
444
  text_bbox = draw.textbbox((0, 0), line, font=font)
445
  text_width = text_bbox[2] - text_bbox[0]
446
+ text_x = x1 + max(2, (box_width - text_width) // 2)
447
  text_y = start_y + i * line_height
448
 
449
+ # Ensure text stays within bounds
450
+ text_x = max(x1 + 2, min(text_x, x2 - text_width - 2))
451
+ text_y = max(y1 + 2, min(text_y, y2 - estimated_size - 2))
452
+
453
+ # Draw outline for readability (thicker outline)
454
+ outline_range = [-1, 0, 1]
455
+ for dx in outline_range:
456
+ for dy in outline_range:
457
  if dx != 0 or dy != 0:
458
  draw.text((text_x + dx, text_y + dy), line, font=font, fill="black")
459
 
460
+ # Draw main text in white
461
  draw.text((text_x, text_y), line, font=font, fill="white")
462
 
463
  return result