| | from io import BytesIO |
| | from pathlib import Path |
| | from typing import Union |
| |
|
| | from docling_core.types.doc import DoclingDocument |
| | from typing_extensions import override |
| |
|
| | from docling.backend.abstract_backend import DeclarativeDocumentBackend |
| | from docling.datamodel.base_models import InputFormat |
| | from docling.datamodel.document import InputDocument |
| |
|
| |
|
| | class DoclingJSONBackend(DeclarativeDocumentBackend): |
| | @override |
| | def __init__( |
| | self, in_doc: InputDocument, path_or_stream: Union[BytesIO, Path] |
| | ) -> None: |
| | super().__init__(in_doc, path_or_stream) |
| |
|
| | |
| | |
| | |
| | self._doc_or_err = self._get_doc_or_err() |
| |
|
| | @override |
| | def is_valid(self) -> bool: |
| | return isinstance(self._doc_or_err, DoclingDocument) |
| |
|
| | @classmethod |
| | @override |
| | def supports_pagination(cls) -> bool: |
| | return False |
| |
|
| | @classmethod |
| | @override |
| | def supported_formats(cls) -> set[InputFormat]: |
| | return {InputFormat.JSON_DOCLING} |
| |
|
| | def _get_doc_or_err(self) -> Union[DoclingDocument, Exception]: |
| | try: |
| | json_data: Union[str, bytes] |
| | if isinstance(self.path_or_stream, Path): |
| | with open(self.path_or_stream, encoding="utf-8") as f: |
| | json_data = f.read() |
| | elif isinstance(self.path_or_stream, BytesIO): |
| | json_data = self.path_or_stream.getvalue() |
| | else: |
| | raise RuntimeError(f"Unexpected: {type(self.path_or_stream)=}") |
| | return DoclingDocument.model_validate_json(json_data=json_data) |
| | except Exception as e: |
| | return e |
| |
|
| | @override |
| | def convert(self) -> DoclingDocument: |
| | if isinstance(self._doc_or_err, DoclingDocument): |
| | return self._doc_or_err |
| | else: |
| | raise self._doc_or_err |
| |
|