mirror of
https://github.com/docling-project/docling-serve.git
synced 2025-11-29 08:33:50 +00:00
fix: avoid missing specialized keys in the options hash (#166)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -42,15 +42,12 @@ _log = logging.getLogger(__name__)
|
||||
# Custom serializer for PdfFormatOption
|
||||
# (model_dump_json does not work with some classes)
|
||||
def _hash_pdf_format_option(pdf_format_option: PdfFormatOption) -> bytes:
|
||||
data = pdf_format_option.model_dump()
|
||||
data = pdf_format_option.model_dump(serialize_as_any=True)
|
||||
|
||||
# pipeline_options are not fully serialized by model_dump, dedicated pass
|
||||
if pdf_format_option.pipeline_options:
|
||||
data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump()
|
||||
|
||||
# Replace `artifacts_path` with a string representation
|
||||
data["pipeline_options"]["artifacts_path"] = repr(
|
||||
data["pipeline_options"]["artifacts_path"]
|
||||
data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump(
|
||||
serialize_as_any=True, mode="json"
|
||||
)
|
||||
|
||||
# Replace `pipeline_cls` with a string representation
|
||||
@@ -59,12 +56,6 @@ def _hash_pdf_format_option(pdf_format_option: PdfFormatOption) -> bytes:
|
||||
# Replace `backend` with a string representation
|
||||
data["backend"] = repr(data["backend"])
|
||||
|
||||
# Handle `device` in `accelerator_options`
|
||||
if "accelerator_options" in data and "device" in data["accelerator_options"]:
|
||||
data["accelerator_options"]["device"] = repr(
|
||||
data["accelerator_options"]["device"]
|
||||
)
|
||||
|
||||
# Serialize the dictionary to JSON with sorted keys to have consistent hashes
|
||||
serialized_data = json.dumps(data, sort_keys=True)
|
||||
options_hash = hashlib.sha1(serialized_data.encode()).digest()
|
||||
|
||||
54
tests/test_options_serialization.py
Normal file
54
tests/test_options_serialization.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from docling_serve.datamodel.convert import (
|
||||
ConvertDocumentsOptions,
|
||||
PictureDescriptionApi,
|
||||
)
|
||||
from docling_serve.docling_conversion import (
|
||||
_hash_pdf_format_option,
|
||||
get_pdf_pipeline_opts,
|
||||
)
|
||||
|
||||
|
||||
def test_options_cache_key():
|
||||
hashes = set()
|
||||
|
||||
opts = ConvertDocumentsOptions()
|
||||
pipeline_opts = get_pdf_pipeline_opts(opts)
|
||||
hash = _hash_pdf_format_option(pipeline_opts)
|
||||
assert hash not in hashes
|
||||
hashes.add(hash)
|
||||
|
||||
opts.do_picture_description = True
|
||||
pipeline_opts = get_pdf_pipeline_opts(opts)
|
||||
hash = _hash_pdf_format_option(pipeline_opts)
|
||||
# pprint(pipeline_opts.pipeline_options.model_dump(serialize_as_any=True))
|
||||
assert hash not in hashes
|
||||
hashes.add(hash)
|
||||
|
||||
opts.picture_description_api = PictureDescriptionApi(
|
||||
url="http://localhost",
|
||||
params={"model": "mymodel"},
|
||||
prompt="Hello 1",
|
||||
)
|
||||
pipeline_opts = get_pdf_pipeline_opts(opts)
|
||||
hash = _hash_pdf_format_option(pipeline_opts)
|
||||
# pprint(pipeline_opts.pipeline_options.model_dump(serialize_as_any=True))
|
||||
assert hash not in hashes
|
||||
hashes.add(hash)
|
||||
|
||||
opts.picture_description_api = PictureDescriptionApi(
|
||||
url="http://localhost",
|
||||
params={"model": "your-model"},
|
||||
prompt="Hello 1",
|
||||
)
|
||||
pipeline_opts = get_pdf_pipeline_opts(opts)
|
||||
hash = _hash_pdf_format_option(pipeline_opts)
|
||||
# pprint(pipeline_opts.pipeline_options.model_dump(serialize_as_any=True))
|
||||
assert hash not in hashes
|
||||
hashes.add(hash)
|
||||
|
||||
opts.picture_description_api.prompt = "World"
|
||||
pipeline_opts = get_pdf_pipeline_opts(opts)
|
||||
hash = _hash_pdf_format_option(pipeline_opts)
|
||||
# pprint(pipeline_opts.pipeline_options.model_dump(serialize_as_any=True))
|
||||
assert hash not in hashes
|
||||
hashes.add(hash)
|
||||
Reference in New Issue
Block a user