Linting

* validate python formatting on every build with Ruff * fix lint warnings
2026-02-07 14:51:52 +00:00 · 2023-05-13 10:36:17 +02:00
parent 168648e789
commit 962becb9a5
35 changed files with 277 additions and 252 deletions
--- a/scripts/parser/file/base.py
+++ b/scripts/parser/file/base.py
@@ -3,7 +3,6 @@ from abc import abstractmethod
 from typing import Any, List

 from langchain.docstore.document import Document as LCDocument
-
 from parser.schema.base import Document


--- a/scripts/parser/file/html_parser.py
+++ b/scripts/parser/file/html_parser.py
@@ -24,12 +24,11 @@ class HTMLParser(BaseParser):
            Union[str, List[str]]: a string or a List of strings.
        """
        try:
-            import unstructured
+            from unstructured.partition.html import partition_html
+            from unstructured.staging.base import convert_to_isd
+            from unstructured.cleaners.core import clean
        except ImportError:
            raise ValueError("unstructured package is required to parse HTML files.")
-        from unstructured.partition.html import partition_html
-        from unstructured.staging.base import convert_to_isd
-        from unstructured.cleaners.core import clean

        # Using the unstructured library to convert the html to isd format
        # isd sample : isd = [
@@ -70,7 +69,8 @@ class HTMLParser(BaseParser):
                Chunks.append([])
            Chunks[-1].append(isd_el['text'])

-        # Removing all the chunks with sum of lenth of all the strings in the chunk < 25 #TODO: This value can be an user defined variable
+        # Removing all the chunks with sum of lenth of all the strings in the chunk < 25
+        # TODO: This value can be a user defined variable
        for chunk in Chunks:
            # sum of lenth of all the strings in the chunk
            sum = 0
--- a/scripts/parser/file/markdown_parser.py
+++ b/scripts/parser/file/markdown_parser.py
@@ -7,8 +7,8 @@ import re
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Union, cast

-from parser.file.base_parser import BaseParser
 import tiktoken
+from parser.file.base_parser import BaseParser


 class MarkdownParser(BaseParser):
--- a/scripts/parser/file/rst_parser.py
+++ b/scripts/parser/file/rst_parser.py
@@ -5,10 +5,9 @@ Contains parser for md files.
 """
 import re
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union, cast
+from typing import Any, Dict, List, Optional, Tuple, Union

 from parser.file.base_parser import BaseParser
-import tiktoken


 class RstParser(BaseParser):