mirror of
https://github.com/docling-project/docling-serve.git
synced 2025-11-29 08:33:50 +00:00
7
.flake8
Normal file
7
.flake8
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
[flake8]
|
||||||
|
max-line-length = 88
|
||||||
|
exclude = test/*
|
||||||
|
max-complexity = 18
|
||||||
|
docstring-convention = google
|
||||||
|
ignore = W503,E203
|
||||||
|
classmethod-decorators = classmethod,validator
|
||||||
41
.pre-commit-config.yaml
Normal file
41
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
fail_fast: true
|
||||||
|
repos:
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: system
|
||||||
|
name: Black
|
||||||
|
entry: poetry run black docling_serve tests
|
||||||
|
pass_filenames: false
|
||||||
|
language: system
|
||||||
|
files: '\.py$'
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: system
|
||||||
|
name: isort
|
||||||
|
entry: poetry run isort docling_serve tests
|
||||||
|
pass_filenames: false
|
||||||
|
language: system
|
||||||
|
files: '\.py$'
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: system
|
||||||
|
name: flake8
|
||||||
|
entry: poetry run flake8 docling_serve
|
||||||
|
pass_filenames: false
|
||||||
|
language: system
|
||||||
|
files: '\.py$'
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: system
|
||||||
|
name: MyPy
|
||||||
|
entry: poetry run mypy docling_serve
|
||||||
|
pass_filenames: false
|
||||||
|
language: system
|
||||||
|
files: '\.py$'
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: system
|
||||||
|
name: Poetry check
|
||||||
|
entry: poetry check --lock
|
||||||
|
pass_filenames: false
|
||||||
|
language: system
|
||||||
129
CODE_OF_CONDUCT.md
Normal file
129
CODE_OF_CONDUCT.md
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
# Contributor Covenant Code of Conduct
|
||||||
|
|
||||||
|
## Our Pledge
|
||||||
|
|
||||||
|
We as members, contributors, and leaders pledge to make participation in our
|
||||||
|
community a harassment-free experience for everyone, regardless of age, body
|
||||||
|
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
||||||
|
identity and expression, level of experience, education, socio-economic status,
|
||||||
|
nationality, personal appearance, race, religion, or sexual identity
|
||||||
|
and orientation.
|
||||||
|
|
||||||
|
We pledge to act and interact in ways that contribute to an open, welcoming,
|
||||||
|
diverse, inclusive, and healthy community.
|
||||||
|
|
||||||
|
## Our Standards
|
||||||
|
|
||||||
|
Examples of behavior that contributes to a positive environment for our
|
||||||
|
community include:
|
||||||
|
|
||||||
|
* Demonstrating empathy and kindness toward other people
|
||||||
|
* Being respectful of differing opinions, viewpoints, and experiences
|
||||||
|
* Giving and gracefully accepting constructive feedback
|
||||||
|
* Accepting responsibility and apologizing to those affected by our mistakes,
|
||||||
|
and learning from the experience
|
||||||
|
* Focusing on what is best not just for us as individuals, but for the
|
||||||
|
overall community
|
||||||
|
|
||||||
|
Examples of unacceptable behavior include:
|
||||||
|
|
||||||
|
* The use of sexualized language or imagery, and sexual attention or
|
||||||
|
advances of any kind
|
||||||
|
* Trolling, insulting or derogatory comments, and personal or political attacks
|
||||||
|
* Public or private harassment
|
||||||
|
* Publishing others' private information, such as a physical or email
|
||||||
|
address, without their explicit permission
|
||||||
|
* Other conduct which could reasonably be considered inappropriate in a
|
||||||
|
professional setting
|
||||||
|
|
||||||
|
## Enforcement Responsibilities
|
||||||
|
|
||||||
|
Community leaders are responsible for clarifying and enforcing our standards of
|
||||||
|
acceptable behavior and will take appropriate and fair corrective action in
|
||||||
|
response to any behavior that they deem inappropriate, threatening, offensive,
|
||||||
|
or harmful.
|
||||||
|
|
||||||
|
Community leaders have the right and responsibility to remove, edit, or reject
|
||||||
|
comments, commits, code, wiki edits, issues, and other contributions that are
|
||||||
|
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
||||||
|
decisions when appropriate.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
This Code of Conduct applies within all community spaces, and also applies when
|
||||||
|
an individual is officially representing the community in public spaces.
|
||||||
|
Examples of representing our community include using an official e-mail address,
|
||||||
|
posting via an official social media account, or acting as an appointed
|
||||||
|
representative at an online or offline event.
|
||||||
|
|
||||||
|
## Enforcement
|
||||||
|
|
||||||
|
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
||||||
|
reported to the community leaders responsible for enforcement using
|
||||||
|
[deepsearch-core@zurich.ibm.com](mailto:deepsearch-core@zurich.ibm.com).
|
||||||
|
|
||||||
|
All complaints will be reviewed and investigated promptly and fairly.
|
||||||
|
|
||||||
|
All community leaders are obligated to respect the privacy and security of the
|
||||||
|
reporter of any incident.
|
||||||
|
|
||||||
|
## Enforcement Guidelines
|
||||||
|
|
||||||
|
Community leaders will follow these Community Impact Guidelines in determining
|
||||||
|
the consequences for any action they deem in violation of this Code of Conduct:
|
||||||
|
|
||||||
|
### 1. Correction
|
||||||
|
|
||||||
|
**Community Impact**: Use of inappropriate language or other behavior deemed
|
||||||
|
unprofessional or unwelcome in the community.
|
||||||
|
|
||||||
|
**Consequence**: A private, written warning from community leaders, providing
|
||||||
|
clarity around the nature of the violation and an explanation of why the
|
||||||
|
behavior was inappropriate. A public apology may be requested.
|
||||||
|
|
||||||
|
### 2. Warning
|
||||||
|
|
||||||
|
**Community Impact**: A violation through a single incident or series
|
||||||
|
of actions.
|
||||||
|
|
||||||
|
**Consequence**: A warning with consequences for continued behavior. No
|
||||||
|
interaction with the people involved, including unsolicited interaction with
|
||||||
|
those enforcing the Code of Conduct, for a specified period of time. This
|
||||||
|
includes avoiding interactions in community spaces as well as external channels
|
||||||
|
like social media. Violating these terms may lead to a temporary or
|
||||||
|
permanent ban.
|
||||||
|
|
||||||
|
### 3. Temporary Ban
|
||||||
|
|
||||||
|
**Community Impact**: A serious violation of community standards, including
|
||||||
|
sustained inappropriate behavior.
|
||||||
|
|
||||||
|
**Consequence**: A temporary ban from any sort of interaction or public
|
||||||
|
communication with the community for a specified period of time. No public or
|
||||||
|
private interaction with the people involved, including unsolicited interaction
|
||||||
|
with those enforcing the Code of Conduct, is allowed during this period.
|
||||||
|
Violating these terms may lead to a permanent ban.
|
||||||
|
|
||||||
|
### 4. Permanent Ban
|
||||||
|
|
||||||
|
**Community Impact**: Demonstrating a pattern of violation of community
|
||||||
|
standards, including sustained inappropriate behavior, harassment of an
|
||||||
|
individual, or aggression toward or disparagement of classes of individuals.
|
||||||
|
|
||||||
|
**Consequence**: A permanent ban from any sort of public interaction within
|
||||||
|
the community.
|
||||||
|
|
||||||
|
## Attribution
|
||||||
|
|
||||||
|
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
||||||
|
version 2.0, available at
|
||||||
|
[https://www.contributor-covenant.org/version/2/0/code_of_conduct.html](https://www.contributor-covenant.org/version/2/0/code_of_conduct.html).
|
||||||
|
|
||||||
|
Community Impact Guidelines were inspired by [Mozilla's code of conduct
|
||||||
|
enforcement ladder](https://github.com/mozilla/diversity).
|
||||||
|
|
||||||
|
Homepage: [https://www.contributor-covenant.org](https://www.contributor-covenant.org)
|
||||||
|
|
||||||
|
For answers to common questions about this code of conduct, see the FAQ at
|
||||||
|
[https://www.contributor-covenant.org/faq](https://www.contributor-covenant.org/faq). Translations are available at
|
||||||
|
[https://www.contributor-covenant.org/translations](https://www.contributor-covenant.org/translations).
|
||||||
164
CONTRIBUTING.md
Normal file
164
CONTRIBUTING.md
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
## Contributing In General
|
||||||
|
Our project welcomes external contributions. If you have an itch, please feel
|
||||||
|
free to scratch it.
|
||||||
|
|
||||||
|
To contribute code or documentation, please submit a [pull request](https://github.com/DS4SD/docling-serve/pulls).
|
||||||
|
|
||||||
|
A good way to familiarize yourself with the codebase and contribution process is
|
||||||
|
to look for and tackle low-hanging fruit in the [issue tracker](https://github.com/DS4SD/docling-serve/issues).
|
||||||
|
Before embarking on a more ambitious contribution, please quickly [get in touch](#communication) with us.
|
||||||
|
|
||||||
|
For general questions or support requests, please refer to the [discussion section](https://github.com/DS4SD/docling-serve/discussions).
|
||||||
|
|
||||||
|
**Note: We appreciate your effort, and want to avoid a situation where a contribution
|
||||||
|
requires extensive rework (by you or by us), sits in backlog for a long time, or
|
||||||
|
cannot be accepted at all!**
|
||||||
|
|
||||||
|
### Proposing new features
|
||||||
|
|
||||||
|
If you would like to implement a new feature, please [raise an issue](https://github.com/DS4SD/docling-serve/issues)
|
||||||
|
before sending a pull request so the feature can be discussed. This is to avoid
|
||||||
|
you wasting your valuable time working on a feature that the project developers
|
||||||
|
are not interested in accepting into the code base.
|
||||||
|
|
||||||
|
### Fixing bugs
|
||||||
|
|
||||||
|
If you would like to fix a bug, please [raise an issue](https://github.com/DS4SD/docling-serve/issues) before sending a
|
||||||
|
pull request so it can be tracked.
|
||||||
|
|
||||||
|
### Merge approval
|
||||||
|
|
||||||
|
The project maintainers use LGTM (Looks Good To Me) in comments on the code
|
||||||
|
review to indicate acceptance. A change requires LGTMs from two of the
|
||||||
|
maintainers of each component affected.
|
||||||
|
|
||||||
|
For a list of the maintainers, see the [MAINTAINERS.md](MAINTAINERS.md) page.
|
||||||
|
|
||||||
|
|
||||||
|
## Legal
|
||||||
|
|
||||||
|
Each source file must include a license header for the MIT
|
||||||
|
Software. Using the SPDX format is the simplest approach.
|
||||||
|
e.g.
|
||||||
|
|
||||||
|
```
|
||||||
|
/*
|
||||||
|
Copyright IBM Inc. All rights reserved.
|
||||||
|
|
||||||
|
SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
```
|
||||||
|
|
||||||
|
We have tried to make it as easy as possible to make contributions. This
|
||||||
|
applies to how we handle the legal aspects of contribution. We use the
|
||||||
|
same approach - the [Developer's Certificate of Origin 1.1 (DCO)](https://github.com/hyperledger/fabric/blob/master/docs/source/DCO1.1.txt) - that the Linux® Kernel [community](https://elinux.org/Developer_Certificate_Of_Origin)
|
||||||
|
uses to manage code contributions.
|
||||||
|
|
||||||
|
We simply ask that when submitting a patch for review, the developer
|
||||||
|
must include a sign-off statement in the commit message.
|
||||||
|
|
||||||
|
Here is an example Signed-off-by line, which indicates that the
|
||||||
|
submitter accepts the DCO:
|
||||||
|
|
||||||
|
```
|
||||||
|
Signed-off-by: John Doe <john.doe@example.com>
|
||||||
|
```
|
||||||
|
|
||||||
|
You can include this automatically when you commit a change to your
|
||||||
|
local git repository using the following command:
|
||||||
|
|
||||||
|
```
|
||||||
|
git commit -s
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Communication
|
||||||
|
|
||||||
|
Please feel free to connect with us using the [discussion section](https://github.com/DS4SD/docling-serve/discussions).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Developing
|
||||||
|
|
||||||
|
### Usage of Poetry
|
||||||
|
|
||||||
|
We use Poetry to manage dependencies.
|
||||||
|
|
||||||
|
|
||||||
|
#### Install
|
||||||
|
|
||||||
|
To install, see the documentation here: https://python-poetry.org/docs/master/#installing-with-the-official-installer
|
||||||
|
|
||||||
|
1. Install the Poetry globally in your machine
|
||||||
|
```bash
|
||||||
|
curl -sSL https://install.python-poetry.org | python3 -
|
||||||
|
```
|
||||||
|
The installation script will print the installation bin folder `POETRY_BIN` which you need in the next steps.
|
||||||
|
|
||||||
|
2. Make sure Poetry is in your `$PATH`
|
||||||
|
- for `zsh`
|
||||||
|
```sh
|
||||||
|
echo 'export PATH="POETRY_BIN:$PATH"' >> ~/.zshrc
|
||||||
|
```
|
||||||
|
- for `bash`
|
||||||
|
```sh
|
||||||
|
echo 'export PATH="POETRY_BIN:$PATH"' >> ~/.bashrc
|
||||||
|
```
|
||||||
|
|
||||||
|
3. The official guidelines linked above include useful details on the configuration of autocomplete for most shell environments, e.g. Bash and Zsh.
|
||||||
|
|
||||||
|
|
||||||
|
#### Create a Virtual Environment and Install Dependencies
|
||||||
|
|
||||||
|
To activate the Virtual Environment, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
poetry shell
|
||||||
|
```
|
||||||
|
|
||||||
|
To spawn a shell with the Virtual Environment activated. If the Virtual Environment doesn't exist, Poetry will create one for you. Then, to install dependencies, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
poetry install
|
||||||
|
```
|
||||||
|
|
||||||
|
**(Advanced) Use a Specific Python Version**
|
||||||
|
|
||||||
|
If for whatever reason you need to work in a specific (older) version of Python, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
poetry env use $(which python3.10)
|
||||||
|
```
|
||||||
|
|
||||||
|
This creates a Virtual Environment with Python 3.10. For other versions, replace `$(which python3.10)` by the path to the interpreter (e.g., `/usr/bin/python3.8`) or use `$(which pythonX.Y)`.
|
||||||
|
|
||||||
|
|
||||||
|
#### Add a new dependency
|
||||||
|
|
||||||
|
```bash
|
||||||
|
poetry add NAME
|
||||||
|
```
|
||||||
|
|
||||||
|
## Coding style guidelines
|
||||||
|
|
||||||
|
We use the following tools to enforce code style:
|
||||||
|
|
||||||
|
- iSort, to sort imports
|
||||||
|
- Black, to format code
|
||||||
|
|
||||||
|
|
||||||
|
We run a series of checks on the code base on every commit, using `pre-commit`. To install the hooks, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pre-commit install
|
||||||
|
```
|
||||||
|
|
||||||
|
To run the checks on-demand, run:
|
||||||
|
|
||||||
|
```
|
||||||
|
pre-commit run --all-files
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: Checks like `Black` and `isort` will "fail" if they modify files. This is because `pre-commit` doesn't like to see files modified by their Hooks. In these cases, `git add` the modified files and `git commit` again.
|
||||||
|
|
||||||
|
|
||||||
1
Containerfile
Normal file
1
Containerfile
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# TODO
|
||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2024 International Business Machines
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
11
MAINTAINERS.md
Normal file
11
MAINTAINERS.md
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# MAINTAINERS
|
||||||
|
|
||||||
|
- Christoph Auer - [@cau-git](https://github.com/cau-git)
|
||||||
|
- Michele Dolfi - [@dolfim-ibm](https://github.com/dolfim-ibm)
|
||||||
|
- Maxim Lysak - [@maxmnemonic](https://github.com/maxmnemonic)
|
||||||
|
- Nikos Livathinos - [@nikos-livathinos](https://github.com/nikos-livathinos)
|
||||||
|
- Ahmed Nassar - [@nassarofficial](https://github.com/nassarofficial)
|
||||||
|
- Panos Vagenas - [@vagenas](https://github.com/vagenas)
|
||||||
|
- Peter Staar - [@PeterStaar-IBM](https://github.com/PeterStaar-IBM)
|
||||||
|
|
||||||
|
Maintainers can be contacted at [deepsearch-core@zurich.ibm.com](mailto:deepsearch-core@zurich.ibm.com).
|
||||||
33
README.md
Normal file
33
README.md
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
# Docling Serve
|
||||||
|
|
||||||
|
Running [Docling](https://github.com/DS4SD/docling) as an API service.
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> This is an unstable draft implementation which will quickly evolve.
|
||||||
|
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
Install the dependencies
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# Install dependencies
|
||||||
|
poetry install
|
||||||
|
|
||||||
|
# Run the server
|
||||||
|
poetry run uvicorn docling_serve.app:app --reload
|
||||||
|
```
|
||||||
|
|
||||||
|
Example payload (http source):
|
||||||
|
|
||||||
|
```sh
|
||||||
|
curl -X 'POST' \
|
||||||
|
'http://127.0.0.1:8000/convert' \
|
||||||
|
-H 'accept: application/json' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{
|
||||||
|
"http_source": {
|
||||||
|
"url": "https://arxiv.org/pdf/2206.01062"
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
0
docling_serve/__init__.py
Normal file
0
docling_serve/__init__.py
Normal file
96
docling_serve/app.py
Normal file
96
docling_serve/app.py
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
import base64
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from io import BytesIO
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, Union
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from docling.datamodel.base_models import (
|
||||||
|
ConversionStatus,
|
||||||
|
DocumentStream,
|
||||||
|
PipelineOptions,
|
||||||
|
)
|
||||||
|
from docling.datamodel.document import ConversionResult, DocumentConversionInput
|
||||||
|
from docling.document_converter import DocumentConverter
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from docling_serve.settings import Settings
|
||||||
|
|
||||||
|
|
||||||
|
class HttpSource(BaseModel):
|
||||||
|
url: str
|
||||||
|
headers: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
|
||||||
|
class FileSource(BaseModel):
|
||||||
|
base64_string: str
|
||||||
|
filename: str
|
||||||
|
|
||||||
|
|
||||||
|
class ConvertDocumentHttpSourceRequest(BaseModel):
|
||||||
|
http_source: HttpSource
|
||||||
|
|
||||||
|
|
||||||
|
class ConvertDocumentFileSourceRequest(BaseModel):
|
||||||
|
file_source: FileSource
|
||||||
|
|
||||||
|
|
||||||
|
class ConvertDocumentResponse(BaseModel):
|
||||||
|
content_md: str
|
||||||
|
|
||||||
|
|
||||||
|
ConvertDocumentRequest = Union[
|
||||||
|
ConvertDocumentFileSourceRequest, ConvertDocumentHttpSourceRequest
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
models = {}
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
# Converter
|
||||||
|
settings = Settings()
|
||||||
|
pipeline_options = PipelineOptions()
|
||||||
|
pipeline_options.do_ocr = settings.do_ocr
|
||||||
|
pipeline_options.do_table_structure = settings.do_table_structure
|
||||||
|
models["converter"] = DocumentConverter(pipeline_options=pipeline_options)
|
||||||
|
yield
|
||||||
|
|
||||||
|
models.clear()
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="Docling Serve",
|
||||||
|
lifespan=lifespan,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/convert")
|
||||||
|
def convert_pdf_document(
|
||||||
|
body: ConvertDocumentRequest,
|
||||||
|
) -> ConvertDocumentResponse:
|
||||||
|
|
||||||
|
filename: str
|
||||||
|
buf: BytesIO
|
||||||
|
|
||||||
|
if isinstance(body, ConvertDocumentFileSourceRequest):
|
||||||
|
buf = BytesIO(base64.b64decode(body.file_source.base64_string))
|
||||||
|
filename = body.file_source.filename
|
||||||
|
elif isinstance(body, ConvertDocumentHttpSourceRequest):
|
||||||
|
http_res = httpx.get(body.http_source.url, headers=body.http_source.headers)
|
||||||
|
buf = BytesIO(http_res.content)
|
||||||
|
filename = Path(
|
||||||
|
body.http_source.url
|
||||||
|
).name # TODO: use better way to detect filename, e.g. from Content-Disposition
|
||||||
|
|
||||||
|
docs_input = DocumentConversionInput.from_streams(
|
||||||
|
[DocumentStream(filename=filename, stream=buf)]
|
||||||
|
)
|
||||||
|
result: ConversionResult = next(models["converter"].convert(docs_input), None)
|
||||||
|
|
||||||
|
if result is None or result.status != ConversionStatus.SUCCESS:
|
||||||
|
raise HTTPException(status_code=500, detail={"errors": result.errors})
|
||||||
|
|
||||||
|
return ConvertDocumentResponse(content_md=result.render_as_markdown())
|
||||||
0
docling_serve/py.typed
Normal file
0
docling_serve/py.typed
Normal file
8
docling_serve/settings.py
Normal file
8
docling_serve/settings.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
|
||||||
|
|
||||||
|
class Settings(BaseSettings):
|
||||||
|
do_ocr: bool = True
|
||||||
|
do_table_structure: bool = True
|
||||||
|
|
||||||
|
model_config = SettingsConfigDict(env_prefix="DOCLING_")
|
||||||
4774
poetry.lock
generated
Normal file
4774
poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
77
pyproject.toml
Normal file
77
pyproject.toml
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
[tool.poetry]
|
||||||
|
name = "docling-serve"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Running Docling as a service"
|
||||||
|
license = "MIT"
|
||||||
|
authors = [
|
||||||
|
"Michele Dolfi <dol@zurich.ibm.com>",
|
||||||
|
"Christoph Auer <cau@zurich.ibm.com>",
|
||||||
|
"Panos Vagenas <pva@zurich.ibm.com>",
|
||||||
|
"Cesar Berrospi Ramis <ceb@zurich.ibm.com>",
|
||||||
|
"Peter Staar <taa@zurich.ibm.com>",
|
||||||
|
]
|
||||||
|
maintainers = [
|
||||||
|
"Peter Staar <taa@zurich.ibm.com>",
|
||||||
|
"Christoph Auer <cau@zurich.ibm.com>",
|
||||||
|
"Michele Dolfi <dol@zurich.ibm.com>",
|
||||||
|
"Cesar Berrospi Ramis <ceb@zurich.ibm.com>",
|
||||||
|
"Panos Vagenas <pva@zurich.ibm.com>",
|
||||||
|
]
|
||||||
|
readme = "README.md"
|
||||||
|
repository = "https://github.com/DS4SD/docling-serve"
|
||||||
|
homepage = "https://github.com/DS4SD/docling-serve"
|
||||||
|
classifiers = [
|
||||||
|
"License :: OSI Approved :: MIT License",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
# "Development Status :: 5 - Production/Stable",
|
||||||
|
"Intended Audience :: Developers",
|
||||||
|
"Typing :: Typed",
|
||||||
|
"Programming Language :: Python :: 3"
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.10"
|
||||||
|
docling = "^1.9.0"
|
||||||
|
fastapi = {version = "^0.110.2", extras = ["standard"]}
|
||||||
|
uvicorn = "^0.30.6"
|
||||||
|
pydantic-settings = "^2.4.0"
|
||||||
|
httpx = "^0.27.2"
|
||||||
|
|
||||||
|
|
||||||
|
[tool.poetry.group.dev.dependencies]
|
||||||
|
black = "^24.8.0"
|
||||||
|
isort = "^5.13.2"
|
||||||
|
pre-commit = "^3.8.0"
|
||||||
|
autoflake = "^2.3.1"
|
||||||
|
flake8 = "^7.1.1"
|
||||||
|
pytest = "^8.3.2"
|
||||||
|
mypy = "^1.11.2"
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
|
|
||||||
|
[tool.black]
|
||||||
|
line-length = 88
|
||||||
|
target-version = ["py310"]
|
||||||
|
include = '\.pyi?$'
|
||||||
|
|
||||||
|
[tool.isort]
|
||||||
|
profile = "black"
|
||||||
|
line_length = 88
|
||||||
|
py_version=311
|
||||||
|
|
||||||
|
[tool.autoflake]
|
||||||
|
in-place = true
|
||||||
|
remove-all-unused-imports = true
|
||||||
|
remove-unused-variables = true
|
||||||
|
expand-star-imports = true
|
||||||
|
recursive = true
|
||||||
|
|
||||||
|
[[tool.mypy.overrides]]
|
||||||
|
module = [
|
||||||
|
"docling.*",
|
||||||
|
]
|
||||||
|
ignore_missing_imports = true
|
||||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
Reference in New Issue
Block a user