| | import logging |
| | from pathlib import Path |
| | from typing import Any, Iterable, List, Optional, Union |
| |
|
| | from docling_core.types.doc import ( |
| | DoclingDocument, |
| | NodeItem, |
| | PictureClassificationClass, |
| | PictureItem, |
| | ) |
| | from docling_core.types.doc.document import ( |
| | PictureDescriptionData, |
| | ) |
| | from PIL import Image |
| |
|
| | from docling.datamodel.pipeline_options import PictureDescriptionBaseOptions |
| | from docling.models.base_model import ( |
| | BaseItemAndImageEnrichmentModel, |
| | ItemAndImageEnrichmentElement, |
| | ) |
| |
|
| |
|
| | class PictureDescriptionBaseModel(BaseItemAndImageEnrichmentModel): |
| | images_scale: float = 2.0 |
| |
|
| | def __init__( |
| | self, |
| | enabled: bool, |
| | options: PictureDescriptionBaseOptions, |
| | ): |
| | self.enabled = enabled |
| | self.options = options |
| | self.provenance = "not-implemented" |
| |
|
| | def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool: |
| | return self.enabled and isinstance(element, PictureItem) |
| |
|
| | def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]: |
| | raise NotImplementedError |
| |
|
| | def __call__( |
| | self, |
| | doc: DoclingDocument, |
| | element_batch: Iterable[ItemAndImageEnrichmentElement], |
| | ) -> Iterable[NodeItem]: |
| | if not self.enabled: |
| | for element in element_batch: |
| | yield element.item |
| | return |
| |
|
| | images: List[Image.Image] = [] |
| | elements: List[PictureItem] = [] |
| | for el in element_batch: |
| | assert isinstance(el.item, PictureItem) |
| | elements.append(el.item) |
| | images.append(el.image) |
| |
|
| | outputs = self._annotate_images(images) |
| |
|
| | for item, output in zip(elements, outputs): |
| | item.annotations.append( |
| | PictureDescriptionData(text=output, provenance=self.provenance) |
| | ) |
| | yield item |
| |
|