From 168648e7891082855f0cbf15eec48e522bf646b8 Mon Sep 17 00:00:00 2001
From: Anton Larin <larinam@gmail.com>
Date: Fri, 12 May 2023 12:02:25 +0200
Subject: [PATCH] Proper PEP8 formatting

---
 application/app.py                     |  5 +++-
 application/parser/file/bulk.py        | 28 ++++++++++-----------
 scripts/code_docs_gen.py               | 10 +-------
 scripts/ingest.py                      | 32 ++++++++++++------------
 scripts/parser/file/bulk.py            | 28 ++++++++++-----------
 scripts/parser/file/html_parser.py     | 21 ++++++++--------
 scripts/parser/file/markdown_parser.py | 23 ++++++++---------
 scripts/parser/file/rst_parser.py      | 33 +++++++++++++------------
 scripts/parser/file/tabular_parser.py  | 14 +++++------
 scripts/parser/java2doc.py             |  6 ++++-
 scripts/parser/js2doc.py               |  3 +++
 scripts/parser/open_ai_func.py         | 34 ++++++++++++++------------
 scripts/parser/py2doc.py               | 19 +++++++++-----
 scripts/parser/token_func.py           |  6 ++++-
 14 files changed, 139 insertions(+), 123 deletions(-)

diff --git a/application/app.py b/application/app.py
index cd0784e0..19d5f0c3 100644
--- a/application/app.py
+++ b/application/app.py
@@ -90,10 +90,12 @@ mongo = MongoClient(app.config['MONGO_URI'])
 db = mongo["docsgpt"]
 vectors_collection = db["vectors"]
 
+
 async def async_generate(chain, question, chat_history):
     result = await chain.arun({"question": question, "chat_history": chat_history})
     return result
 
+
 def run_async_chain(chain, question, chat_history):
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
@@ -105,6 +107,7 @@ def run_async_chain(chain, question, chat_history):
     result["answer"] = answer
     return result
 
+
 @celery.task(bind=True)
 def ingest(self, directory, formats, name_job, filename, user):
     resp = ingest_worker(self, directory, formats, name_job, filename, user)
@@ -206,7 +209,7 @@ def api_answer():
                 combine_docs_chain=doc_chain,
             )
             chat_history = []
-            #result = chain({"question": question, "chat_history": chat_history})
+            # result = chain({"question": question, "chat_history": chat_history})
             # generate async with async generate method
             result = run_async_chain(chain, question, chat_history)
         else:
diff --git a/application/parser/file/bulk.py b/application/parser/file/bulk.py
index b22f16b3..2be8e328 100644
--- a/application/parser/file/bulk.py
+++ b/application/parser/file/bulk.py
@@ -52,17 +52,17 @@ class SimpleDirectoryReader(BaseReader):
     """
 
     def __init__(
-        self,
-        input_dir: Optional[str] = None,
-        input_files: Optional[List] = None,
-        exclude_hidden: bool = True,
-        errors: str = "ignore",
-        recursive: bool = True,
-        required_exts: Optional[List[str]] = None,
-        file_extractor: Optional[Dict[str, BaseParser]] = None,
-        num_files_limit: Optional[int] = None,
-        file_metadata: Optional[Callable[[str], Dict]] = None,
-        chunk_size_max: int = 2048,
+            self,
+            input_dir: Optional[str] = None,
+            input_files: Optional[List] = None,
+            exclude_hidden: bool = True,
+            errors: str = "ignore",
+            recursive: bool = True,
+            required_exts: Optional[List[str]] = None,
+            file_extractor: Optional[Dict[str, BaseParser]] = None,
+            num_files_limit: Optional[int] = None,
+            file_metadata: Optional[Callable[[str], Dict]] = None,
+            chunk_size_max: int = 2048,
     ) -> None:
         """Initialize with parameters."""
         super().__init__()
@@ -102,8 +102,8 @@ class SimpleDirectoryReader(BaseReader):
             elif self.exclude_hidden and input_file.name.startswith("."):
                 continue
             elif (
-                self.required_exts is not None
-                and input_file.suffix not in self.required_exts
+                    self.required_exts is not None
+                    and input_file.suffix not in self.required_exts
             ):
                 continue
             else:
@@ -114,7 +114,7 @@ class SimpleDirectoryReader(BaseReader):
             new_input_files.extend(sub_input_files)
 
         if self.num_files_limit is not None and self.num_files_limit > 0:
-            new_input_files = new_input_files[0 : self.num_files_limit]
+            new_input_files = new_input_files[0: self.num_files_limit]
 
         # print total number of files added
         logging.debug(
diff --git a/scripts/code_docs_gen.py b/scripts/code_docs_gen.py
index a2170eaa..3b057506 100644
--- a/scripts/code_docs_gen.py
+++ b/scripts/code_docs_gen.py
@@ -11,10 +11,10 @@ import tiktoken
 import sys
 from argparse import ArgumentParser
 import ast
+import json
 
 dotenv.load_dotenv()
 
-
 ps = list(Path("inputs").glob("**/*.py"))
 data = []
 sources = []
@@ -24,7 +24,6 @@ for p in ps:
     sources.append(p)
 
 
-
 # with open('inputs/client.py', 'r') as f:
 #     tree = ast.parse(f.read())
 
@@ -64,11 +63,9 @@ for code in data:
     c1 += 1
 
 # save the structure dict as json
-import json
 with open('structure_dict.json', 'w') as f:
     json.dump(structure_dict, f)
 
-
 # llm = OpenAI(temperature=0)
 # prompt = PromptTemplate(
 #     input_variables=["code"],
@@ -119,8 +116,3 @@ for source, classes in structure_dict.items():
             else:
                 with open(f"outputs/{source_w}", "a") as f:
                     f.write(f"\n\nFunction: {functions[function]}, \nDocumentation: {response}")
-
-
-
-
-
diff --git a/scripts/ingest.py b/scripts/ingest.py
index e8082c5d..72f497f3 100644
--- a/scripts/ingest.py
+++ b/scripts/ingest.py
@@ -16,7 +16,6 @@ from parser.js2doc import extract_functions_and_classes as extract_js
 from parser.java2doc import extract_functions_and_classes as extract_java
 from parser.token_func import group_split
 
-
 dotenv.load_dotenv()
 
 app = typer.Typer(add_completion=False)
@@ -25,28 +24,28 @@ nltk.download('punkt', quiet=True)
 nltk.download('averaged_perceptron_tagger', quiet=True)
 
 
-#Splits all files in specified folder to documents
+# Splits all files in specified folder to documents
 @app.command()
 def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
-                                                   help="Whether to skip price confirmation"),
+                                    help="Whether to skip price confirmation"),
            dir: Optional[List[str]] = typer.Option(["inputs"],
                                                    help="""List of paths to directory for index creation.
                                                         E.g. --dir inputs --dir inputs2"""),
            file: Optional[List[str]] = typer.Option(None,
-                                                   help="""File paths to use (Optional; overrides dir).
+                                                    help="""File paths to use (Optional; overrides dir).
                                                         E.g. --file inputs/1.md --file inputs/2.md"""),
            recursive: Optional[bool] = typer.Option(True, help="Whether to recursively search in subdirectories."),
            limit: Optional[int] = typer.Option(None, help="Maximum number of files to read."),
            formats: Optional[List[str]] = typer.Option([".rst", ".md"],
-                                                   help="""List of required extensions (list with .)
+                                                       help="""List of required extensions (list with .)
                                                         Currently supported: .rst, .md, .pdf, .docx, .csv, .epub, .html, .mdx"""),
            exclude: Optional[bool] = typer.Option(True, help="Whether to exclude hidden files (dotfiles)."),
-           sample: Optional[bool] = typer.Option(False, help="Whether to output sample of the first 5 split documents."),
+           sample: Optional[bool] = typer.Option(False,
+                                                 help="Whether to output sample of the first 5 split documents."),
            token_check: Optional[bool] = typer.Option(True, help="Whether to group small documents and split large."),
            min_tokens: Optional[int] = typer.Option(150, help="Minimum number of tokens to not group."),
            max_tokens: Optional[int] = typer.Option(2000, help="Maximum number of tokens to not split."),
            ):
-
     """
         Creates index from specified location or files.
         By default /inputs folder is used, .rst and .md are parsed.
@@ -59,19 +58,19 @@ def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
 
         # Here we split the documents, as needed, into smaller chunks.
         # We do this due to the context limits of the LLMs.
-        raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
-        #Old method
+        raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens,
+                               token_check=token_check)
+        # Old method
         # text_splitter = RecursiveCharacterTextSplitter()
         # docs = text_splitter.split_documents(raw_docs)
 
-        #Sample feature
+        # Sample feature
         if sample == True:
             for i in range(min(5, len(raw_docs))):
                 print(raw_docs[i].text)
 
         docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
 
-
         # Here we check for command line arguments for bot calls.
         # If no argument exists or the yes is not True, then the
         # user permission is requested to call the API.
@@ -98,12 +97,11 @@ def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
 
 @app.command()
 def convert(dir: Optional[str] = typer.Option("inputs",
-                                                   help="""Path to directory to make documentation for.
+                                              help="""Path to directory to make documentation for.
                                                         E.g. --dir inputs """),
             formats: Optional[str] = typer.Option("py",
-                                                        help="""Required language. 
+                                                  help="""Required language. 
                                                         py, js, java supported for now""")):
-
     """
             Creates documentation linked to original functions from specified location.
             By default /inputs folder is used, .py is parsed.
@@ -117,7 +115,7 @@ def convert(dir: Optional[str] = typer.Option("inputs",
     else:
         raise Exception("Sorry, language not supported yet")
     transform_to_docs(functions_dict, classes_dict, formats, dir)
+
+
 if __name__ == "__main__":
-  app()
-
-
+    app()
diff --git a/scripts/parser/file/bulk.py b/scripts/parser/file/bulk.py
index 871123ce..8a963104 100644
--- a/scripts/parser/file/bulk.py
+++ b/scripts/parser/file/bulk.py
@@ -52,17 +52,17 @@ class SimpleDirectoryReader(BaseReader):
     """
 
     def __init__(
-        self,
-        input_dir: Optional[str] = None,
-        input_files: Optional[List] = None,
-        exclude_hidden: bool = True,
-        errors: str = "ignore",
-        recursive: bool = True,
-        required_exts: Optional[List[str]] = None,
-        file_extractor: Optional[Dict[str, BaseParser]] = None,
-        num_files_limit: Optional[int] = None,
-        file_metadata: Optional[Callable[[str], Dict]] = None,
-        chunk_size_max: int = 2048,
+            self,
+            input_dir: Optional[str] = None,
+            input_files: Optional[List] = None,
+            exclude_hidden: bool = True,
+            errors: str = "ignore",
+            recursive: bool = True,
+            required_exts: Optional[List[str]] = None,
+            file_extractor: Optional[Dict[str, BaseParser]] = None,
+            num_files_limit: Optional[int] = None,
+            file_metadata: Optional[Callable[[str], Dict]] = None,
+            chunk_size_max: int = 2048,
     ) -> None:
         """Initialize with parameters."""
         super().__init__()
@@ -103,8 +103,8 @@ class SimpleDirectoryReader(BaseReader):
             elif self.exclude_hidden and input_file.name.startswith("."):
                 continue
             elif (
-                self.required_exts is not None
-                and input_file.suffix not in self.required_exts
+                    self.required_exts is not None
+                    and input_file.suffix not in self.required_exts
             ):
                 continue
             else:
@@ -115,7 +115,7 @@ class SimpleDirectoryReader(BaseReader):
             new_input_files.extend(sub_input_files)
 
         if self.num_files_limit is not None and self.num_files_limit > 0:
-            new_input_files = new_input_files[0 : self.num_files_limit]
+            new_input_files = new_input_files[0: self.num_files_limit]
 
         # print total number of files added
         logging.debug(
diff --git a/scripts/parser/file/html_parser.py b/scripts/parser/file/html_parser.py
index 53d7492f..73ce97d3 100644
--- a/scripts/parser/file/html_parser.py
+++ b/scripts/parser/file/html_parser.py
@@ -9,6 +9,7 @@ from typing import Dict, Union
 
 from parser.file.base_parser import BaseParser
 
+
 class HTMLParser(BaseParser):
     """HTML parser."""
 
@@ -32,12 +33,12 @@ class HTMLParser(BaseParser):
 
         # Using the unstructured library to convert the html to isd format
         # isd sample : isd = [
-                            #   {"text": "My Title", "type": "Title"},
-                            #   {"text": "My Narrative", "type": "NarrativeText"}
-                            # ]
+        #   {"text": "My Title", "type": "Title"},
+        #   {"text": "My Narrative", "type": "NarrativeText"}
+        # ]
         with open(file, "r", encoding="utf-8") as fp:
             elements = partition_html(file=fp)
-            isd = convert_to_isd(elements)  
+            isd = convert_to_isd(elements)
 
         # Removing non ascii charactwers from isd_el['text']
         for isd_el in isd:
@@ -46,15 +47,15 @@ class HTMLParser(BaseParser):
         # Removing all the \n characters from isd_el['text'] using regex and replace with single space
         # Removing all the extra spaces  from isd_el['text'] using regex and replace with single space
         for isd_el in isd:
-            isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE|re.DOTALL)
-            isd_el['text'] = re.sub(r"\s{2,}"," ", isd_el['text'], flags=re.MULTILINE|re.DOTALL)
+            isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE | re.DOTALL)
+            isd_el['text'] = re.sub(r"\s{2,}", " ", isd_el['text'], flags=re.MULTILINE | re.DOTALL)
 
         # more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
         for isd_el in isd:
-            clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True )
+            clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True)
 
         # Creating a list of all the indexes of isd_el['type'] = 'Title'
-        title_indexes = [i for i,isd_el in enumerate(isd) if isd_el['type'] == 'Title']
+        title_indexes = [i for i, isd_el in enumerate(isd) if isd_el['type'] == 'Title']
 
         # Creating 'Chunks' - List of lists of strings 
         # each list starting with with isd_el['type'] = 'Title' and all the data till the next 'Title'
@@ -64,7 +65,7 @@ class HTMLParser(BaseParser):
         Chunks = [[]]
         final_chunks = list(list())
 
-        for i,isd_el in enumerate(isd):
+        for i, isd_el in enumerate(isd):
             if i in title_indexes:
                 Chunks.append([])
             Chunks[-1].append(isd_el['text'])
@@ -76,7 +77,7 @@ class HTMLParser(BaseParser):
             sum += len(str(chunk))
             if sum < 25:
                 Chunks.remove(chunk)
-            else :         
+            else:
                 # appending all the approved chunks to final_chunks as a single string       
                 final_chunks.append(" ".join([str(item) for item in chunk]))
         return final_chunks
diff --git a/scripts/parser/file/markdown_parser.py b/scripts/parser/file/markdown_parser.py
index 2dd9e430..0b767a63 100644
--- a/scripts/parser/file/markdown_parser.py
+++ b/scripts/parser/file/markdown_parser.py
@@ -20,13 +20,13 @@ class MarkdownParser(BaseParser):
     """
 
     def __init__(
-        self,
-        *args: Any,
-        remove_hyperlinks: bool = True,
-        remove_images: bool = True,
-        max_tokens: int = 2048,
-        # remove_tables: bool = True,
-        **kwargs: Any,
+            self,
+            *args: Any,
+            remove_hyperlinks: bool = True,
+            remove_images: bool = True,
+            max_tokens: int = 2048,
+            # remove_tables: bool = True,
+            **kwargs: Any,
     ) -> None:
         """Init params."""
         super().__init__(*args, **kwargs)
@@ -35,8 +35,8 @@ class MarkdownParser(BaseParser):
         self._max_tokens = max_tokens
         # self._remove_tables = remove_tables
 
-
-    def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str], current_text: str):
+    def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str],
+                          current_text: str):
         """Append to tups chunk."""
         num_tokens = len(tiktoken.get_encoding("cl100k_base").encode(current_text))
         if num_tokens > self._max_tokens:
@@ -46,6 +46,7 @@ class MarkdownParser(BaseParser):
         else:
             tups.append((current_header, current_text))
         return tups
+
     def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
         """Convert a markdown file to a dictionary.
 
@@ -115,7 +116,7 @@ class MarkdownParser(BaseParser):
         return {}
 
     def parse_tups(
-        self, filepath: Path, errors: str = "ignore"
+            self, filepath: Path, errors: str = "ignore"
     ) -> List[Tuple[Optional[str], str]]:
         """Parse file into tuples."""
         with open(filepath, "r") as f:
@@ -130,7 +131,7 @@ class MarkdownParser(BaseParser):
         return markdown_tups
 
     def parse_file(
-        self, filepath: Path, errors: str = "ignore"
+            self, filepath: Path, errors: str = "ignore"
     ) -> Union[str, List[str]]:
         """Parse file into string."""
         tups = self.parse_tups(filepath, errors=errors)
diff --git a/scripts/parser/file/rst_parser.py b/scripts/parser/file/rst_parser.py
index 1719b84c..4e8fd630 100644
--- a/scripts/parser/file/rst_parser.py
+++ b/scripts/parser/file/rst_parser.py
@@ -10,6 +10,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union, cast
 from parser.file.base_parser import BaseParser
 import tiktoken
 
+
 class RstParser(BaseParser):
     """reStructuredText parser.
 
@@ -19,17 +20,17 @@ class RstParser(BaseParser):
     """
 
     def __init__(
-        self,
-        *args: Any,
-        remove_hyperlinks: bool = True,
-        remove_images: bool = True,
-        remove_table_excess: bool = True,
-        remove_interpreters: bool = True,
-        remove_directives: bool = True,
-        remove_whitespaces_excess: bool = True,
-        #Be carefull with remove_characters_excess, might cause data loss
-        remove_characters_excess: bool = True,
-        **kwargs: Any,
+            self,
+            *args: Any,
+            remove_hyperlinks: bool = True,
+            remove_images: bool = True,
+            remove_table_excess: bool = True,
+            remove_interpreters: bool = True,
+            remove_directives: bool = True,
+            remove_whitespaces_excess: bool = True,
+            # Be carefull with remove_characters_excess, might cause data loss
+            remove_characters_excess: bool = True,
+            **kwargs: Any,
     ) -> None:
         """Init params."""
         super().__init__(*args, **kwargs)
@@ -41,7 +42,6 @@ class RstParser(BaseParser):
         self._remove_whitespaces_excess = remove_whitespaces_excess
         self._remove_characters_excess = remove_characters_excess
 
-
     def rst_to_tups(self, rst_text: str) -> List[Tuple[Optional[str], str]]:
         """Convert a reStructuredText file to a dictionary.
 
@@ -56,7 +56,8 @@ class RstParser(BaseParser):
 
         for i, line in enumerate(lines):
             header_match = re.match(r"^[^\S\n]*[-=]+[^\S\n]*$", line)
-            if header_match and i > 0 and (len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
+            if header_match and i > 0 and (
+                    len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
                 if current_header is not None:
                     if current_text == "" or None:
                         continue
@@ -72,7 +73,7 @@ class RstParser(BaseParser):
 
         rst_tups.append((current_header, current_text))
 
-        #TODO: Format for rst
+        # TODO: Format for rst
         #
         # if current_header is not None:
         #     # pass linting, assert keys are defined
@@ -136,7 +137,7 @@ class RstParser(BaseParser):
         return {}
 
     def parse_tups(
-        self, filepath: Path, errors: str = "ignore"
+            self, filepath: Path, errors: str = "ignore"
     ) -> List[Tuple[Optional[str], str]]:
         """Parse file into tuples."""
         with open(filepath, "r") as f:
@@ -159,7 +160,7 @@ class RstParser(BaseParser):
         return rst_tups
 
     def parse_file(
-        self, filepath: Path, errors: str = "ignore"
+            self, filepath: Path, errors: str = "ignore"
     ) -> Union[str, List[str]]:
         """Parse file into string."""
         tups = self.parse_tups(filepath, errors=errors)
diff --git a/scripts/parser/file/tabular_parser.py b/scripts/parser/file/tabular_parser.py
index bbb875e1..d7c6402a 100644
--- a/scripts/parser/file/tabular_parser.py
+++ b/scripts/parser/file/tabular_parser.py
@@ -77,13 +77,13 @@ class PandasCSVParser(BaseParser):
     """
 
     def __init__(
-        self,
-        *args: Any,
-        concat_rows: bool = True,
-        col_joiner: str = ", ",
-        row_joiner: str = "\n",
-        pandas_config: dict = {},
-        **kwargs: Any
+            self,
+            *args: Any,
+            concat_rows: bool = True,
+            col_joiner: str = ", ",
+            row_joiner: str = "\n",
+            pandas_config: dict = {},
+            **kwargs: Any
     ) -> None:
         """Init params."""
         super().__init__(*args, **kwargs)
diff --git a/scripts/parser/java2doc.py b/scripts/parser/java2doc.py
index c1701c5d..7f97750d 100644
--- a/scripts/parser/java2doc.py
+++ b/scripts/parser/java2doc.py
@@ -1,6 +1,7 @@
 import os
 import javalang
 
+
 def find_files(directory):
     files_list = []
     for root, dirs, files in os.walk(directory):
@@ -9,6 +10,7 @@ def find_files(directory):
                 files_list.append(os.path.join(root, file))
     return files_list
 
+
 def extract_functions(file_path):
     with open(file_path, "r") as file:
         java_code = file.read()
@@ -28,6 +30,7 @@ def extract_functions(file_path):
             methods[method_name] = method_source_code
     return methods
 
+
 def extract_classes(file_path):
     with open(file_path, 'r') as file:
         source_code = file.read()
@@ -47,6 +50,7 @@ def extract_classes(file_path):
             classes[class_name] = class_string
     return classes
 
+
 def extract_functions_and_classes(directory):
     files = find_files(directory)
     functions_dict = {}
@@ -58,4 +62,4 @@ def extract_functions_and_classes(directory):
         classes = extract_classes(file)
         if classes:
             classes_dict[file] = classes
-    return functions_dict, classes_dict
\ No newline at end of file
+    return functions_dict, classes_dict
diff --git a/scripts/parser/js2doc.py b/scripts/parser/js2doc.py
index d434ab23..3c99a0a6 100644
--- a/scripts/parser/js2doc.py
+++ b/scripts/parser/js2doc.py
@@ -11,6 +11,7 @@ def find_files(directory):
                 files_list.append(os.path.join(root, file))
     return files_list
 
+
 def extract_functions(file_path):
     with open(file_path, 'r') as file:
         source_code = file.read()
@@ -38,6 +39,7 @@ def extract_functions(file_path):
                                 functions[func_name] = escodegen.generate(declaration.init)
         return functions
 
+
 def extract_classes(file_path):
     with open(file_path, 'r') as file:
         source_code = file.read()
@@ -53,6 +55,7 @@ def extract_classes(file_path):
                 classes[class_name] = ", ".join(function_names)
     return classes
 
+
 def extract_functions_and_classes(directory):
     files = find_files(directory)
     functions_dict = {}
diff --git a/scripts/parser/open_ai_func.py b/scripts/parser/open_ai_func.py
index 368e0d56..ef6ea597 100644
--- a/scripts/parser/open_ai_func.py
+++ b/scripts/parser/open_ai_func.py
@@ -5,28 +5,29 @@ import tiktoken
 from langchain.vectorstores import FAISS
 from langchain.embeddings import OpenAIEmbeddings
 
-#from langchain.embeddings import HuggingFaceEmbeddings
-#from langchain.embeddings import HuggingFaceInstructEmbeddings
-#from langchain.embeddings import CohereEmbeddings
+# from langchain.embeddings import HuggingFaceEmbeddings
+# from langchain.embeddings import HuggingFaceInstructEmbeddings
+# from langchain.embeddings import CohereEmbeddings
 
 from retry import retry
 
 
-
 def num_tokens_from_string(string: str, encoding_name: str) -> int:
-# Function to convert string to tokens and estimate user cost.
+    # Function to convert string to tokens and estimate user cost.
     encoding = tiktoken.get_encoding(encoding_name)
     num_tokens = len(encoding.encode(string))
-    total_price = ((num_tokens/1000) * 0.0004)
+    total_price = ((num_tokens / 1000) * 0.0004)
     return num_tokens, total_price
 
+
 @retry(tries=10, delay=60)
 def store_add_texts_with_retry(store, i):
     store.add_texts([i.page_content], metadatas=[i.metadata])
-    #store_pine.add_texts([i.page_content], metadatas=[i.metadata])
+    # store_pine.add_texts([i.page_content], metadatas=[i.metadata])
+
 
 def call_openai_api(docs, folder_name):
-# Function to create a vector store from the documents and save it to disk.
+    # Function to create a vector store from the documents and save it to disk.
 
     # create output folder if it doesn't exist
     if not os.path.exists(f"outputs/{folder_name}"):
@@ -37,21 +38,22 @@ def call_openai_api(docs, folder_name):
     # remove the first element from docs
     docs.pop(0)
     # cut first n docs if you want to restart
-    #docs = docs[:n]
+    # docs = docs[:n]
     c1 = 0
     # pinecone.init(
     #     api_key="",  # find at app.pinecone.io
     #     environment="us-east1-gcp"  # next to api key in console
     # )
-    #index_name = "pandas"
+    # index_name = "pandas"
     store = FAISS.from_documents(docs_test, OpenAIEmbeddings())
-    #store_pine = Pinecone.from_documents(docs_test, OpenAIEmbeddings(), index_name=index_name)
+    # store_pine = Pinecone.from_documents(docs_test, OpenAIEmbeddings(), index_name=index_name)
 
     # Uncomment for MPNet embeddings
     # model_name = "sentence-transformers/all-mpnet-base-v2"
     # hf = HuggingFaceEmbeddings(model_name=model_name)
     # store = FAISS.from_documents(docs_test, hf)
-    for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs), bar_format='{l_bar}{bar}| Time Left: {remaining}'):
+    for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs),
+                  bar_format='{l_bar}{bar}| Time Left: {remaining}'):
         try:
             store_add_texts_with_retry(store, i)
         except Exception as e:
@@ -64,20 +66,20 @@ def call_openai_api(docs, folder_name):
         c1 += 1
     store.save_local(f"outputs/{folder_name}")
 
+
 def get_user_permission(docs, folder_name):
-# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
+    # Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
     # Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
-    #docs_content = (" ".join(docs))
+    # docs_content = (" ".join(docs))
     docs_content = ""
     for doc in docs:
         docs_content += doc.page_content
 
-
     tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
     # Here we print the number of tokens and the approx user cost with some visually appealing formatting.
     print(f"Number of Tokens = {format(tokens, ',d')}")
     print(f"Approx Cost = ${format(total_price, ',.2f')}")
-    #Here we check for user permission before calling the API.
+    # Here we check for user permission before calling the API.
     user_input = input("Price Okay? (Y/N) \n").lower()
     if user_input == "y":
         call_openai_api(docs, folder_name)
diff --git a/scripts/parser/py2doc.py b/scripts/parser/py2doc.py
index 4ac73cd9..1443e46e 100644
--- a/scripts/parser/py2doc.py
+++ b/scripts/parser/py2doc.py
@@ -5,6 +5,7 @@ from pathlib import Path
 from langchain.llms import OpenAI
 from langchain.prompts import PromptTemplate
 
+
 def find_files(directory):
     files_list = []
     for root, dirs, files in os.walk(directory):
@@ -13,6 +14,7 @@ def find_files(directory):
                 files_list.append(os.path.join(root, file))
     return files_list
 
+
 def extract_functions(file_path):
     with open(file_path, 'r') as file:
         source_code = file.read()
@@ -25,6 +27,7 @@ def extract_functions(file_path):
                 functions[func_name] = func_def
     return functions
 
+
 def extract_classes(file_path):
     with open(file_path, 'r') as file:
         source_code = file.read()
@@ -40,6 +43,7 @@ def extract_classes(file_path):
                 classes[class_name] = ", ".join(function_names)
     return classes
 
+
 def extract_functions_and_classes(directory):
     files = find_files(directory)
     functions_dict = {}
@@ -53,11 +57,12 @@ def extract_functions_and_classes(directory):
             classes_dict[file] = classes
     return functions_dict, classes_dict
 
+
 def parse_functions(functions_dict, formats, dir):
     c1 = len(functions_dict)
     for i, (source, functions) in enumerate(functions_dict.items(), start=1):
         print(f"Processing file {i}/{c1}")
-        source_w = source.replace(dir+"/", "").replace("."+formats, ".md")
+        source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
         subfolders = "/".join(source_w.split("/")[:-1])
         Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
         for j, (name, function) in enumerate(functions.items(), start=1):
@@ -70,18 +75,19 @@ def parse_functions(functions_dict, formats, dir):
             response = llm(prompt.format(code=function))
             mode = "a" if Path(f"outputs/{source_w}").exists() else "w"
             with open(f"outputs/{source_w}", mode) as f:
-                f.write(f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
+                f.write(
+                    f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
 
 
 def parse_classes(classes_dict, formats, dir):
     c1 = len(classes_dict)
     for i, (source, classes) in enumerate(classes_dict.items()):
-        print(f"Processing file {i+1}/{c1}")
-        source_w = source.replace(dir+"/", "").replace("."+formats, ".md")
+        print(f"Processing file {i + 1}/{c1}")
+        source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
         subfolders = "/".join(source_w.split("/")[:-1])
         Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
         for name, function_names in classes.items():
-            print(f"Processing Class {i+1}/{c1}")
+            print(f"Processing Class {i + 1}/{c1}")
             prompt = PromptTemplate(
                 input_variables=["class_name", "functions_names"],
                 template="Class name: {class_name} \nFunctions: {functions_names}, \nDocumentation: ",
@@ -92,6 +98,7 @@ def parse_classes(classes_dict, formats, dir):
             with open(f"outputs/{source_w}", "a" if Path(f"outputs/{source_w}").exists() else "w") as f:
                 f.write(f"\n\n# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
 
+
 def transform_to_docs(functions_dict, classes_dict, formats, dir):
     docs_content = ''.join([str(key) + str(value) for key, value in functions_dict.items()])
     docs_content += ''.join([str(key) + str(value) for key, value in classes_dict.items()])
@@ -110,4 +117,4 @@ def transform_to_docs(functions_dict, classes_dict, formats, dir):
         parse_classes(classes_dict, formats, dir)
         print("All done!")
     else:
-        print("The API was not called. No money was spent.")
\ No newline at end of file
+        print("The API was not called. No money was spent.")
diff --git a/scripts/parser/token_func.py b/scripts/parser/token_func.py
index 95b318b9..d5435f6b 100644
--- a/scripts/parser/token_func.py
+++ b/scripts/parser/token_func.py
@@ -13,6 +13,7 @@ def separate_header_and_body(text):
     body = text[len(header):]
     return header, body
 
+
 def group_documents(documents: List[Document], min_tokens: int, max_tokens: int) -> List[Document]:
     docs = []
     current_group = None
@@ -23,7 +24,8 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
         if current_group is None:
             current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
                                      extra_info=doc.extra_info)
-        elif len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
+        elif len(tiktoken.get_encoding("cl100k_base").encode(
+                current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
             current_group.text += " " + doc.text
         else:
             docs.append(current_group)
@@ -35,6 +37,7 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
 
     return docs
 
+
 def split_documents(documents: List[Document], max_tokens: int) -> List[Document]:
     docs = []
     for doc in documents:
@@ -54,6 +57,7 @@ def split_documents(documents: List[Document], max_tokens: int) -> List[Document
                 docs.append(new_doc)
     return docs
 
+
 def group_split(documents: List[Document], max_tokens: int = 2000, min_tokens: int = 150, token_check: bool = True):
     if token_check == False:
         return documents