From d51c9c8ff6beb70e260917bb89fe2435b874ee85 Mon Sep 17 00:00:00 2001 From: Yury Kossakovsky Date: Fri, 29 Aug 2025 17:31:47 -0600 Subject: [PATCH] Add document preprocessing submodules to OCR configuration - Introduced new `SubPipelines` section in `ocr_config.yaml` to include `DocPreprocessor` settings. - Added submodules for document orientation classification and unwarping, enhancing the OCR pipeline's ability to process various document formats and improve accuracy. --- paddlex/ocr_config.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/paddlex/ocr_config.yaml b/paddlex/ocr_config.yaml index 1725f14..e922ef3 100644 --- a/paddlex/ocr_config.yaml +++ b/paddlex/ocr_config.yaml @@ -14,3 +14,18 @@ SubModules: model_dir: null batch_size: 1 score_thresh: 0 + +SubPipelines: + DocPreprocessor: + pipeline_name: doc_preprocessor + use_doc_orientation_classify: true + use_doc_unwarping: true + SubModules: + DocOrientationClassify: + module_name: doc_text_orientation + model_name: PP-LCNet_x1_0_doc_ori + model_dir: null + DocUnwarping: + module_name: image_unwarping + model_name: UVDoc + model_dir: null