Spaces:

lulavc
/

BubbleScribe

Runtime error

App Files Files Community

lulavc commited on Dec 10, 2025

Commit

b8252ed

verified ·

1 Parent(s): 13efc9b

Improve: better inpainting (NS algo, larger radius), natural translations (romaji names, honorifics), better text positioning

Browse files

Files changed (1) hide show

app.py +55 -27

app.py CHANGED Viewed

@@ -258,10 +258,18 @@ For EACH text region found:
 2. original: the exact {source_lang} text
 3. translated: natural {target_lang} translation
 Return a JSON array. Example:
 [
-  {{"bbox": [100, 50, 200, 80], "original": "キャラクター名", "translated": "Character Name"}},
-  {{"bbox": [300, 100, 400, 130], "original": "説明文", "translated": "Description"}}
 ]
 CRITICAL: Find at least 20-50 text regions. This image has many text elements. Scan every corner carefully. Include ALL small labels and character descriptions."""
@@ -324,7 +332,7 @@ CRITICAL: Find at least 20-50 text regions. This image has many text elements. S
 # INPAINTING (Optimized for 8 vCPU)
 # ============================================================
-def create_text_mask(image: Image.Image, detections: list, padding: int = 8) -> Image.Image:
     """Create a mask for inpainting based on detected text regions."""
     mask = Image.new('L', image.size, 0)
     draw = ImageDraw.Draw(mask)
@@ -333,6 +341,7 @@ def create_text_mask(image: Image.Image, detections: list, padding: int = 8) ->
         bbox = det.get('bbox', [])
         if len(bbox) == 4:
             x1, y1, x2, y2 = [int(v) for v in bbox]
             x1 = max(0, x1 - padding)
             y1 = max(0, y1 - padding)
             x2 = min(image.width, x2 + padding)
@@ -342,12 +351,13 @@ def create_text_mask(image: Image.Image, detections: list, padding: int = 8) ->
     return mask
 def inpaint_image(image: Image.Image, mask: Image.Image) -> Image.Image:
-    """Fast inpainting using OpenCV (CPU-optimized, multi-threaded)."""
     img_array = np.array(image.convert('RGB'))
     mask_array = np.array(mask)
-    # Use TELEA algorithm - fast and good quality for text removal
-    result = cv2.inpaint(img_array, mask_array, inpaintRadius=7, flags=cv2.INPAINT_TELEA)
     return Image.fromarray(result)
 # ============================================================
@@ -376,7 +386,7 @@ def wrap_text(text: str, font: ImageFont.FreeTypeFont, max_width: int, draw: Ima
     return lines if lines else [text]
 def add_translated_text(image: Image.Image, detections: list) -> Image.Image:
-    """Add translated text to the inpainted image with smart sizing."""
     result = image.copy()
     draw = ImageDraw.Draw(result)
@@ -389,47 +399,65 @@ def add_translated_text(image: Image.Image, detections: list) -> Image.Image:
             box_width = x2 - x1
             box_height = y2 - y1
-            # Calculate optimal font size
             text_len = max(len(translated), 1)
-            estimated_size = min(
-                box_height // 3,
-                int(box_width / text_len * 1.8),
-                36
-            )
             estimated_size = max(10, estimated_size)
             font = get_font(estimated_size)
             # Word wrap for long text
-            lines = wrap_text(translated, font, box_width - 10, draw)
             # Calculate total text height
-            line_height = estimated_size + 4
             total_height = len(lines) * line_height
-            # If text doesn't fit, reduce font size
-            if total_height > box_height - 10:
-                estimated_size = max(8, int(estimated_size * (box_height - 10) / total_height))
                 font = get_font(estimated_size)
-                lines = wrap_text(translated, font, box_width - 10, draw)
-                line_height = estimated_size + 4
                 total_height = len(lines) * line_height
-            # Center vertically
-            start_y = y1 + (box_height - total_height) // 2
-            # Draw each line
             for i, line in enumerate(lines):
                 text_bbox = draw.textbbox((0, 0), line, font=font)
                 text_width = text_bbox[2] - text_bbox[0]
-                text_x = x1 + (box_width - text_width) // 2
                 text_y = start_y + i * line_height
-                # Draw outline for readability
-                for dx in [-1, 0, 1]:
-                    for dy in [-1, 0, 1]:
                         if dx != 0 or dy != 0:
                             draw.text((text_x + dx, text_y + dy), line, font=font, fill="black")
                 draw.text((text_x, text_y), line, font=font, fill="white")
     return result

 2. original: the exact {source_lang} text
 3. translated: natural {target_lang} translation
+TRANSLATION GUIDELINES:
+- Keep character names in ROMAJI (e.g., 田中太郎 → "Tanaka Tarou", not "Rice Field Middle Fat Man")
+- Keep honorifics: -san, -kun, -chan, -sama, -sensei
+- Sound effects: Keep original + add meaning (e.g., "ドキドキ" → "Dokidoki (heart pounding)")
+- Make dialogue natural and conversational, not literal
+- Preserve emotional tone and nuance
+- For titles/roles, translate the meaning (e.g., 社長 → "President", 先生 → "Teacher")
 Return a JSON array. Example:
 [
+  {{"bbox": [100, 50, 200, 80], "original": "山田花子", "translated": "Yamada Hanako"}},
+  {{"bbox": [300, 100, 400, 130], "original": "よろしくお願いします", "translated": "Nice to meet you"}}
 ]
 CRITICAL: Find at least 20-50 text regions. This image has many text elements. Scan every corner carefully. Include ALL small labels and character descriptions."""
 # INPAINTING (Optimized for 8 vCPU)
 # ============================================================
+def create_text_mask(image: Image.Image, detections: list, padding: int = 12) -> Image.Image:
     """Create a mask for inpainting based on detected text regions."""
     mask = Image.new('L', image.size, 0)
     draw = ImageDraw.Draw(mask)
         bbox = det.get('bbox', [])
         if len(bbox) == 4:
             x1, y1, x2, y2 = [int(v) for v in bbox]
+            # Larger padding for cleaner inpainting
             x1 = max(0, x1 - padding)
             y1 = max(0, y1 - padding)
             x2 = min(image.width, x2 + padding)
     return mask
 def inpaint_image(image: Image.Image, mask: Image.Image) -> Image.Image:
+    """High-quality inpainting using OpenCV (CPU-optimized)."""
     img_array = np.array(image.convert('RGB'))
     mask_array = np.array(mask)
+    # Use NS (Navier-Stokes) algorithm for better quality on larger areas
+    # Increased radius for cleaner text removal
+    result = cv2.inpaint(img_array, mask_array, inpaintRadius=12, flags=cv2.INPAINT_NS)
     return Image.fromarray(result)
 # ============================================================
     return lines if lines else [text]
 def add_translated_text(image: Image.Image, detections: list) -> Image.Image:
+    """Add translated text to the inpainted image with smart sizing and positioning."""
     result = image.copy()
     draw = ImageDraw.Draw(result)
             box_width = x2 - x1
             box_height = y2 - y1
+            # Skip very small boxes
+            if box_width < 20 or box_height < 10:
+                continue
+            # Detect if vertical text (tall narrow box with short text)
+            is_vertical = box_height > box_width * 2 and len(translated) < 10
+            # Calculate optimal font size based on box dimensions
             text_len = max(len(translated), 1)
+            if is_vertical:
+                # Vertical: size based on width
+                estimated_size = min(box_width - 4, 24)
+            else:
+                # Horizontal: balance between height and text length
+                estimated_size = min(
+                    box_height - 4,
+                    int((box_width / text_len) * 1.5),
+                    28
+                )
             estimated_size = max(10, estimated_size)
             font = get_font(estimated_size)
             # Word wrap for long text
+            lines = wrap_text(translated, font, box_width - 8, draw)
             # Calculate total text height
+            line_height = estimated_size + 2
             total_height = len(lines) * line_height
+            # If text doesn't fit, reduce font size progressively
+            while total_height > box_height - 6 and estimated_size > 8:
+                estimated_size -= 1
                 font = get_font(estimated_size)
+                lines = wrap_text(translated, font, box_width - 8, draw)
+                line_height = estimated_size + 2
                 total_height = len(lines) * line_height
+            # Center vertically and horizontally
+            start_y = y1 + max(2, (box_height - total_height) // 2)
+            # Draw each line centered
             for i, line in enumerate(lines):
                 text_bbox = draw.textbbox((0, 0), line, font=font)
                 text_width = text_bbox[2] - text_bbox[0]
+                text_x = x1 + max(2, (box_width - text_width) // 2)
                 text_y = start_y + i * line_height
+                # Ensure text stays within bounds
+                text_x = max(x1 + 2, min(text_x, x2 - text_width - 2))
+                text_y = max(y1 + 2, min(text_y, y2 - estimated_size - 2))
+                # Draw outline for readability (thicker outline)
+                outline_range = [-1, 0, 1]
+                for dx in outline_range:
+                    for dy in outline_range:
                         if dx != 0 or dy != 0:
                             draw.text((text_x + dx, text_y + dy), line, font=font, fill="black")
+                # Draw main text in white
                 draw.text((text_x, text_y), line, font=font, fill="white")
     return result