From 38753c4395484932362749fe4c14533bb3cc5629 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 16 Dec 2024 20:09:34 +0000
Subject: [PATCH 01/26] build(deps): bump yarl from 1.11.1 to 1.18.3 in
 /application

Bumps [yarl](https://github.com/aio-libs/yarl) from 1.11.1 to 1.18.3.
- [Release notes](https://github.com/aio-libs/yarl/releases)
- [Changelog](https://github.com/aio-libs/yarl/blob/master/CHANGES.rst)
- [Commits](https://github.com/aio-libs/yarl/compare/v1.11.1...v1.18.3)

---
updated-dependencies:
- dependency-name: yarl
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 application/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/requirements.txt b/application/requirements.txt
index 232b8508..13e9a30a 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -86,4 +86,4 @@ urllib3==2.2.3
 vine==5.1.0
 wcwidth==0.2.13
 werkzeug==3.1.3
-yarl==1.11.1
\ No newline at end of file
+yarl==1.18.3
\ No newline at end of file

From c4f3dc4434402b7223e01feb8a8f2dde63018ac4 Mon Sep 17 00:00:00 2001
From: Pavel <pabin@yandex.ru>
Date: Fri, 20 Dec 2024 18:41:47 +0300
Subject: [PATCH 02/26] test version

---
 application/parser/chunking.py           | 118 +++++++++++++++++++++++
 application/parser/embedding_pipeline.py |  86 +++++++++++++++++
 application/parser/open_ai_func.py       |  75 --------------
 application/parser/token_func.py         |  79 ---------------
 application/worker.py                    |  30 +++---
 5 files changed, 221 insertions(+), 167 deletions(-)
 create mode 100644 application/parser/chunking.py
 create mode 100755 application/parser/embedding_pipeline.py
 delete mode 100755 application/parser/open_ai_func.py
 delete mode 100644 application/parser/token_func.py

diff --git a/application/parser/chunking.py b/application/parser/chunking.py
new file mode 100644
index 00000000..26f05dba
--- /dev/null
+++ b/application/parser/chunking.py
@@ -0,0 +1,118 @@
+import re
+from typing import List, Tuple, Union
+import logging
+from application.parser.schema.base import Document
+from application.utils import get_encoding
+
+logger = logging.getLogger(__name__)
+
+class Chunker:
+    def __init__(
+        self,
+        chunking_strategy: str = "classic_chunk",
+        max_tokens: int = 2000,
+        min_tokens: int = 150,
+        duplicate_headers: bool = False,
+    ):
+        if chunking_strategy not in ["classic_chunk"]:
+            raise ValueError(f"Unsupported chunking strategy: {chunking_strategy}")
+        self.chunking_strategy = chunking_strategy
+        self.max_tokens = max_tokens
+        self.min_tokens = min_tokens
+        self.duplicate_headers = duplicate_headers
+        self.encoding = get_encoding()
+
+    def separate_header_and_body(self, text: str) -> Tuple[str, str]:
+        header_pattern = r"^(.*?\n){3}"
+        match = re.match(header_pattern, text)
+        if match:
+            header = match.group(0)
+            body = text[len(header):]
+        else:
+            header, body = "", text  # No header, treat entire text as body
+        return header, body
+
+    def combine_documents(self, doc: Document, next_doc: Document) -> Document:
+        combined_text = doc.text + " " + next_doc.text
+        combined_token_count = len(self.encoding.encode(combined_text))
+        new_doc = Document(
+            text=combined_text,
+            doc_id=doc.doc_id,
+            embedding=doc.embedding,
+            extra_info={**(doc.extra_info or {}), "token_count": combined_token_count}
+        )
+        return new_doc
+    
+    def split_document(self, doc: Document) -> List[Document]:
+        split_docs = []
+        header, body = self.separate_header_and_body(doc.text)
+        header_tokens = self.encoding.encode(header) if header else []
+        body_tokens = self.encoding.encode(body)
+
+        current_position = 0
+        part_index = 0
+        while current_position < len(body_tokens):
+            end_position = current_position + self.max_tokens - len(header_tokens)
+            chunk_tokens = (header_tokens + body_tokens[current_position:end_position]
+                            if self.duplicate_headers or part_index == 0 else body_tokens[current_position:end_position])
+            chunk_text = self.encoding.decode(chunk_tokens)
+            new_doc = Document(
+                text=chunk_text,
+                doc_id=f"{doc.doc_id}-{part_index}",
+                embedding=doc.embedding,
+                extra_info={**(doc.extra_info or {}), "token_count": len(chunk_tokens)}
+            )
+            split_docs.append(new_doc)
+            current_position = end_position
+            part_index += 1
+            header_tokens = []
+        return split_docs
+
+    def classic_chunk(self, documents: List[Document]) -> List[Document]:
+        processed_docs = []
+        i = 0
+        while i < len(documents):
+            doc = documents[i]
+            tokens = self.encoding.encode(doc.text)
+            token_count = len(tokens)
+
+            if self.min_tokens <= token_count <= self.max_tokens:
+                doc.extra_info = doc.extra_info or {}
+                doc.extra_info["token_count"] = token_count
+                processed_docs.append(doc)
+                i += 1
+            elif token_count < self.min_tokens:
+                if i + 1 < len(documents):
+                    next_doc = documents[i + 1]
+                    next_tokens = self.encoding.encode(next_doc.text)
+                    if token_count + len(next_tokens) <= self.max_tokens:
+                        # Combine small documents
+                        combined_doc = self.combine_documents(doc, next_doc)
+                        processed_docs.append(combined_doc)
+                        i += 2
+                    else:
+                        # Keep the small document as is if adding next_doc would exceed max_tokens
+                        doc.extra_info = doc.extra_info or {}
+                        doc.extra_info["token_count"] = token_count
+                        processed_docs.append(doc)
+                        i += 1
+                else:
+                    # No next document to combine with; add the small document as is
+                    doc.extra_info = doc.extra_info or {}
+                    doc.extra_info["token_count"] = token_count
+                    processed_docs.append(doc)
+                    i += 1
+            else:
+                # Split large documents
+                processed_docs.extend(self.split_document(doc))
+                i += 1
+        return processed_docs
+
+    def chunk(
+        self,
+        documents: List[Document]
+    ) -> List[Document]:
+        if self.chunking_strategy == "classic_chunk":
+            return self.classic_chunk(documents)
+        else:
+            raise ValueError("Unsupported chunking strategy")
diff --git a/application/parser/embedding_pipeline.py b/application/parser/embedding_pipeline.py
new file mode 100755
index 00000000..6cf40048
--- /dev/null
+++ b/application/parser/embedding_pipeline.py
@@ -0,0 +1,86 @@
+import os
+import logging
+from retry import retry
+from tqdm import tqdm
+from application.core.settings import settings
+from application.vectorstore.vector_creator import VectorCreator
+
+
+@retry(tries=10, delay=60)
+def add_text_to_store_with_retry(store, doc, source_id):
+    """
+    Add a document's text and metadata to the vector store with retry logic.
+    Args:
+        store: The vector store object.
+        doc: The document to be added.
+        source_id: Unique identifier for the source.
+    """
+    try:
+        doc.metadata["source_id"] = str(source_id)
+        store.add_texts([doc.page_content], metadatas=[doc.metadata])
+    except Exception as e:
+        logging.error(f"Failed to add document with retry: {e}")
+        raise
+
+
+def embed_and_store_documents(docs, folder_name, source_id, task_status):
+    """
+    Embeds documents and stores them in a vector store.
+
+    Args:
+        docs (list): List of documents to be embedded and stored.
+        folder_name (str): Directory to save the vector store.
+        source_id (str): Unique identifier for the source.
+        task_status: Task state manager for progress updates.
+
+    Returns:
+        None
+    """
+    # Ensure the folder exists
+    if not os.path.exists(folder_name):
+        os.makedirs(folder_name)
+
+    # Initialize vector store
+    if settings.VECTOR_STORE == "faiss":
+        docs_init = [docs.pop(0)]
+        store = VectorCreator.create_vectorstore(
+            settings.VECTOR_STORE,
+            docs_init=docs_init,
+            source_id=folder_name,
+            embeddings_key=os.getenv("EMBEDDINGS_KEY"),
+        )
+    else:
+        store = VectorCreator.create_vectorstore(
+            settings.VECTOR_STORE,
+            source_id=source_id,
+            embeddings_key=os.getenv("EMBEDDINGS_KEY"),
+        )
+        store.delete_index()
+
+    total_docs = len(docs)
+
+    # Process and embed documents
+    for idx, doc in tqdm(
+        docs,
+        desc="Embedding 🦖",
+        unit="docs",
+        total=total_docs,
+        bar_format="{l_bar}{bar}| Time Left: {remaining}",
+    ):
+        try:
+            # Update task status for progress tracking
+            progress = int((idx / total_docs) * 100)
+            task_status.update_state(state="PROGRESS", meta={"current": progress})
+
+            # Add document to vector store
+            add_text_to_store_with_retry(store, doc, source_id)
+        except Exception as e:
+            logging.error(f"Error embedding document {idx}: {e}")
+            logging.info(f"Saving progress at document {idx} out of {total_docs}")
+            store.save_local(folder_name)
+            break
+
+    # Save the vector store
+    if settings.VECTOR_STORE == "faiss":
+        store.save_local(folder_name)
+    logging.info("Vector store saved successfully.")
diff --git a/application/parser/open_ai_func.py b/application/parser/open_ai_func.py
deleted file mode 100755
index 3109f583..00000000
--- a/application/parser/open_ai_func.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import os
-
-from retry import retry
-
-from application.core.settings import settings
-
-from application.vectorstore.vector_creator import VectorCreator
-
-
-# from langchain_community.embeddings import HuggingFaceEmbeddings
-# from langchain_community.embeddings import HuggingFaceInstructEmbeddings
-# from langchain_community.embeddings import CohereEmbeddings
-
-
-@retry(tries=10, delay=60)
-def store_add_texts_with_retry(store, i, id):
-    # add source_id to the metadata
-    i.metadata["source_id"] = str(id)
-    store.add_texts([i.page_content], metadatas=[i.metadata])
-    # store_pine.add_texts([i.page_content], metadatas=[i.metadata])
-
-
-def call_openai_api(docs, folder_name, id, task_status):
-    # Function to create a vector store from the documents and save it to disk
-
-    if not os.path.exists(f"{folder_name}"):
-        os.makedirs(f"{folder_name}")
-
-    from tqdm import tqdm
-
-    c1 = 0
-    if settings.VECTOR_STORE == "faiss":
-        docs_init = [docs[0]]
-        docs.pop(0)
-
-        store = VectorCreator.create_vectorstore(
-            settings.VECTOR_STORE,
-            docs_init=docs_init,
-            source_id=f"{folder_name}",
-            embeddings_key=os.getenv("EMBEDDINGS_KEY"),
-        )
-    else:
-        store = VectorCreator.create_vectorstore(
-            settings.VECTOR_STORE,
-            source_id=str(id),
-            embeddings_key=os.getenv("EMBEDDINGS_KEY"),
-        )
-        store.delete_index()
-    # Uncomment for MPNet embeddings
-    # model_name = "sentence-transformers/all-mpnet-base-v2"
-    # hf = HuggingFaceEmbeddings(model_name=model_name)
-    # store = FAISS.from_documents(docs_test, hf)
-    s1 = len(docs)
-    for i in tqdm(
-        docs,
-        desc="Embedding 🦖",
-        unit="docs",
-        total=len(docs),
-        bar_format="{l_bar}{bar}| Time Left: {remaining}",
-    ):
-        try:
-            task_status.update_state(
-                state="PROGRESS", meta={"current": int((c1 / s1) * 100)}
-            )
-            store_add_texts_with_retry(store, i, id)
-        except Exception as e:
-            print(e)
-            print("Error on ", i)
-            print("Saving progress")
-            print(f"stopped at {c1} out of {len(docs)}")
-            store.save_local(f"{folder_name}")
-            break
-        c1 += 1
-    if settings.VECTOR_STORE == "faiss":
-        store.save_local(f"{folder_name}")
diff --git a/application/parser/token_func.py b/application/parser/token_func.py
deleted file mode 100644
index 7511cde0..00000000
--- a/application/parser/token_func.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import re
-from math import ceil
-from typing import List
-
-import tiktoken
-from application.parser.schema.base import Document
-
-
-def separate_header_and_body(text):
-    header_pattern = r"^(.*?\n){3}"
-    match = re.match(header_pattern, text)
-    header = match.group(0)
-    body = text[len(header):]
-    return header, body
-
-
-def group_documents(documents: List[Document], min_tokens: int, max_tokens: int) -> List[Document]:
-    docs = []
-    current_group = None
-
-    for doc in documents:
-        doc_len = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))
-
-        # Check if current group is empty or if the document can be added based on token count and matching metadata
-        if (current_group is None or 
-            (len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and 
-             doc_len < min_tokens and 
-             current_group.extra_info == doc.extra_info)):
-            if current_group is None:
-                current_group = doc  # Use the document directly to retain its metadata
-            else:
-                current_group.text += " " + doc.text  # Append text to the current group
-        else:
-            docs.append(current_group)
-            current_group = doc  # Start a new group with the current document
-
-    if current_group is not None:
-        docs.append(current_group)
-
-    return docs
-
-
-def split_documents(documents: List[Document], max_tokens: int) -> List[Document]:
-    docs = []
-    for doc in documents:
-        token_length = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))
-        if token_length <= max_tokens:
-            docs.append(doc)
-        else:
-            header, body = separate_header_and_body(doc.text)
-            if len(tiktoken.get_encoding("cl100k_base").encode(header)) > max_tokens:
-                body = doc.text
-                header = ""
-            num_body_parts = ceil(token_length / max_tokens)
-            part_length = ceil(len(body) / num_body_parts)
-            body_parts = [body[i:i + part_length] for i in range(0, len(body), part_length)]
-            for i, body_part in enumerate(body_parts):
-                new_doc = Document(text=header + body_part.strip(),
-                                   doc_id=f"{doc.doc_id}-{i}",
-                                   embedding=doc.embedding,
-                                   extra_info=doc.extra_info)
-                docs.append(new_doc)
-    return docs
-
-
-def group_split(documents: List[Document], max_tokens: int = 2000, min_tokens: int = 150, token_check: bool = True):
-    if not token_check:
-        return documents
-    print("Grouping small documents")
-    try:
-        documents = group_documents(documents=documents, min_tokens=min_tokens, max_tokens=max_tokens)
-    except Exception:
-        print("Grouping failed, try running without token_check")
-    print("Separating large documents")
-    try:
-        documents = split_documents(documents=documents, max_tokens=max_tokens)
-    except Exception:
-        print("Grouping failed, try running without token_check")
-    return documents
diff --git a/application/worker.py b/application/worker.py
index 33cd90e5..0edb46ff 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -12,10 +12,10 @@ from bson.objectid import ObjectId
 from application.core.mongo_db import MongoDB
 from application.core.settings import settings
 from application.parser.file.bulk import SimpleDirectoryReader
-from application.parser.open_ai_func import call_openai_api
+from application.parser.embedding_pipeline import embed_and_store_documents
 from application.parser.remote.remote_creator import RemoteCreator
 from application.parser.schema.base import Document
-from application.parser.token_func import group_split
+from application.parser.chunking import Chunker
 from application.utils import count_tokens_docs
 
 mongo = MongoDB.get_client()
@@ -153,17 +153,19 @@ def ingest_worker(
         exclude_hidden=exclude,
         file_metadata=metadata_from_filename,
     ).load_data()
-    raw_docs = group_split(
-        documents=raw_docs,
-        min_tokens=MIN_TOKENS,
+
+    chunker = Chunker(
+        chunking_strategy="classic_chunk",
         max_tokens=MAX_TOKENS,
-        token_check=token_check,
+        min_tokens=MIN_TOKENS,
+        duplicate_headers=False
     )
+    raw_docs = chunker.chunk(documents=raw_docs)
 
     docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
     id = ObjectId()
 
-    call_openai_api(docs, full_path, id, self)
+    embed_and_store_documents(docs, full_path, id, self)
     tokens = count_tokens_docs(docs)
     self.update_state(state="PROGRESS", meta={"current": 100})
 
@@ -217,21 +219,23 @@ def remote_worker(
     remote_loader = RemoteCreator.create_loader(loader)
     raw_docs = remote_loader.load_data(source_data)
 
-    docs = group_split(
-        documents=raw_docs,
-        min_tokens=MIN_TOKENS,
+    chunker = Chunker(
+        chunking_strategy="classic_chunk",
         max_tokens=MAX_TOKENS,
-        token_check=token_check,
+        min_tokens=MIN_TOKENS,
+        duplicate_headers=False
     )
+    docs = chunker.chunk(documents=raw_docs)
+
     tokens = count_tokens_docs(docs)
     if operation_mode == "upload":
         id = ObjectId()
-        call_openai_api(docs, full_path, id, self)
+        embed_and_store_documents(docs, full_path, id, self)
     elif operation_mode == "sync":
         if not doc_id or not ObjectId.is_valid(doc_id):
             raise ValueError("doc_id must be provided for sync operation.")
         id = ObjectId(doc_id)
-        call_openai_api(docs, full_path, id, self)
+        embed_and_store_documents(docs, full_path, id, self)
     self.update_state(state="PROGRESS", meta={"current": 100})
 
     file_data = {

From b41a989051b437112f7a05ee36d8cb5df877d84b Mon Sep 17 00:00:00 2001
From: Pavel <pabin@yandex.ru>
Date: Fri, 20 Dec 2024 18:41:47 +0300
Subject: [PATCH 03/26] test version

---
 application/parser/chunking.py           | 118 +++++++++++++++++++++++
 application/parser/embedding_pipeline.py |  86 +++++++++++++++++
 application/parser/open_ai_func.py       |  75 --------------
 application/parser/token_func.py         |  79 ---------------
 application/worker.py                    |  30 +++---
 5 files changed, 221 insertions(+), 167 deletions(-)
 create mode 100644 application/parser/chunking.py
 create mode 100755 application/parser/embedding_pipeline.py
 delete mode 100755 application/parser/open_ai_func.py
 delete mode 100644 application/parser/token_func.py

diff --git a/application/parser/chunking.py b/application/parser/chunking.py
new file mode 100644
index 00000000..26f05dba
--- /dev/null
+++ b/application/parser/chunking.py
@@ -0,0 +1,118 @@
+import re
+from typing import List, Tuple, Union
+import logging
+from application.parser.schema.base import Document
+from application.utils import get_encoding
+
+logger = logging.getLogger(__name__)
+
+class Chunker:
+    def __init__(
+        self,
+        chunking_strategy: str = "classic_chunk",
+        max_tokens: int = 2000,
+        min_tokens: int = 150,
+        duplicate_headers: bool = False,
+    ):
+        if chunking_strategy not in ["classic_chunk"]:
+            raise ValueError(f"Unsupported chunking strategy: {chunking_strategy}")
+        self.chunking_strategy = chunking_strategy
+        self.max_tokens = max_tokens
+        self.min_tokens = min_tokens
+        self.duplicate_headers = duplicate_headers
+        self.encoding = get_encoding()
+
+    def separate_header_and_body(self, text: str) -> Tuple[str, str]:
+        header_pattern = r"^(.*?\n){3}"
+        match = re.match(header_pattern, text)
+        if match:
+            header = match.group(0)
+            body = text[len(header):]
+        else:
+            header, body = "", text  # No header, treat entire text as body
+        return header, body
+
+    def combine_documents(self, doc: Document, next_doc: Document) -> Document:
+        combined_text = doc.text + " " + next_doc.text
+        combined_token_count = len(self.encoding.encode(combined_text))
+        new_doc = Document(
+            text=combined_text,
+            doc_id=doc.doc_id,
+            embedding=doc.embedding,
+            extra_info={**(doc.extra_info or {}), "token_count": combined_token_count}
+        )
+        return new_doc
+    
+    def split_document(self, doc: Document) -> List[Document]:
+        split_docs = []
+        header, body = self.separate_header_and_body(doc.text)
+        header_tokens = self.encoding.encode(header) if header else []
+        body_tokens = self.encoding.encode(body)
+
+        current_position = 0
+        part_index = 0
+        while current_position < len(body_tokens):
+            end_position = current_position + self.max_tokens - len(header_tokens)
+            chunk_tokens = (header_tokens + body_tokens[current_position:end_position]
+                            if self.duplicate_headers or part_index == 0 else body_tokens[current_position:end_position])
+            chunk_text = self.encoding.decode(chunk_tokens)
+            new_doc = Document(
+                text=chunk_text,
+                doc_id=f"{doc.doc_id}-{part_index}",
+                embedding=doc.embedding,
+                extra_info={**(doc.extra_info or {}), "token_count": len(chunk_tokens)}
+            )
+            split_docs.append(new_doc)
+            current_position = end_position
+            part_index += 1
+            header_tokens = []
+        return split_docs
+
+    def classic_chunk(self, documents: List[Document]) -> List[Document]:
+        processed_docs = []
+        i = 0
+        while i < len(documents):
+            doc = documents[i]
+            tokens = self.encoding.encode(doc.text)
+            token_count = len(tokens)
+
+            if self.min_tokens <= token_count <= self.max_tokens:
+                doc.extra_info = doc.extra_info or {}
+                doc.extra_info["token_count"] = token_count
+                processed_docs.append(doc)
+                i += 1
+            elif token_count < self.min_tokens:
+                if i + 1 < len(documents):
+                    next_doc = documents[i + 1]
+                    next_tokens = self.encoding.encode(next_doc.text)
+                    if token_count + len(next_tokens) <= self.max_tokens:
+                        # Combine small documents
+                        combined_doc = self.combine_documents(doc, next_doc)
+                        processed_docs.append(combined_doc)
+                        i += 2
+                    else:
+                        # Keep the small document as is if adding next_doc would exceed max_tokens
+                        doc.extra_info = doc.extra_info or {}
+                        doc.extra_info["token_count"] = token_count
+                        processed_docs.append(doc)
+                        i += 1
+                else:
+                    # No next document to combine with; add the small document as is
+                    doc.extra_info = doc.extra_info or {}
+                    doc.extra_info["token_count"] = token_count
+                    processed_docs.append(doc)
+                    i += 1
+            else:
+                # Split large documents
+                processed_docs.extend(self.split_document(doc))
+                i += 1
+        return processed_docs
+
+    def chunk(
+        self,
+        documents: List[Document]
+    ) -> List[Document]:
+        if self.chunking_strategy == "classic_chunk":
+            return self.classic_chunk(documents)
+        else:
+            raise ValueError("Unsupported chunking strategy")
diff --git a/application/parser/embedding_pipeline.py b/application/parser/embedding_pipeline.py
new file mode 100755
index 00000000..6cf40048
--- /dev/null
+++ b/application/parser/embedding_pipeline.py
@@ -0,0 +1,86 @@
+import os
+import logging
+from retry import retry
+from tqdm import tqdm
+from application.core.settings import settings
+from application.vectorstore.vector_creator import VectorCreator
+
+
+@retry(tries=10, delay=60)
+def add_text_to_store_with_retry(store, doc, source_id):
+    """
+    Add a document's text and metadata to the vector store with retry logic.
+    Args:
+        store: The vector store object.
+        doc: The document to be added.
+        source_id: Unique identifier for the source.
+    """
+    try:
+        doc.metadata["source_id"] = str(source_id)
+        store.add_texts([doc.page_content], metadatas=[doc.metadata])
+    except Exception as e:
+        logging.error(f"Failed to add document with retry: {e}")
+        raise
+
+
+def embed_and_store_documents(docs, folder_name, source_id, task_status):
+    """
+    Embeds documents and stores them in a vector store.
+
+    Args:
+        docs (list): List of documents to be embedded and stored.
+        folder_name (str): Directory to save the vector store.
+        source_id (str): Unique identifier for the source.
+        task_status: Task state manager for progress updates.
+
+    Returns:
+        None
+    """
+    # Ensure the folder exists
+    if not os.path.exists(folder_name):
+        os.makedirs(folder_name)
+
+    # Initialize vector store
+    if settings.VECTOR_STORE == "faiss":
+        docs_init = [docs.pop(0)]
+        store = VectorCreator.create_vectorstore(
+            settings.VECTOR_STORE,
+            docs_init=docs_init,
+            source_id=folder_name,
+            embeddings_key=os.getenv("EMBEDDINGS_KEY"),
+        )
+    else:
+        store = VectorCreator.create_vectorstore(
+            settings.VECTOR_STORE,
+            source_id=source_id,
+            embeddings_key=os.getenv("EMBEDDINGS_KEY"),
+        )
+        store.delete_index()
+
+    total_docs = len(docs)
+
+    # Process and embed documents
+    for idx, doc in tqdm(
+        docs,
+        desc="Embedding 🦖",
+        unit="docs",
+        total=total_docs,
+        bar_format="{l_bar}{bar}| Time Left: {remaining}",
+    ):
+        try:
+            # Update task status for progress tracking
+            progress = int((idx / total_docs) * 100)
+            task_status.update_state(state="PROGRESS", meta={"current": progress})
+
+            # Add document to vector store
+            add_text_to_store_with_retry(store, doc, source_id)
+        except Exception as e:
+            logging.error(f"Error embedding document {idx}: {e}")
+            logging.info(f"Saving progress at document {idx} out of {total_docs}")
+            store.save_local(folder_name)
+            break
+
+    # Save the vector store
+    if settings.VECTOR_STORE == "faiss":
+        store.save_local(folder_name)
+    logging.info("Vector store saved successfully.")
diff --git a/application/parser/open_ai_func.py b/application/parser/open_ai_func.py
deleted file mode 100755
index 3109f583..00000000
--- a/application/parser/open_ai_func.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import os
-
-from retry import retry
-
-from application.core.settings import settings
-
-from application.vectorstore.vector_creator import VectorCreator
-
-
-# from langchain_community.embeddings import HuggingFaceEmbeddings
-# from langchain_community.embeddings import HuggingFaceInstructEmbeddings
-# from langchain_community.embeddings import CohereEmbeddings
-
-
-@retry(tries=10, delay=60)
-def store_add_texts_with_retry(store, i, id):
-    # add source_id to the metadata
-    i.metadata["source_id"] = str(id)
-    store.add_texts([i.page_content], metadatas=[i.metadata])
-    # store_pine.add_texts([i.page_content], metadatas=[i.metadata])
-
-
-def call_openai_api(docs, folder_name, id, task_status):
-    # Function to create a vector store from the documents and save it to disk
-
-    if not os.path.exists(f"{folder_name}"):
-        os.makedirs(f"{folder_name}")
-
-    from tqdm import tqdm
-
-    c1 = 0
-    if settings.VECTOR_STORE == "faiss":
-        docs_init = [docs[0]]
-        docs.pop(0)
-
-        store = VectorCreator.create_vectorstore(
-            settings.VECTOR_STORE,
-            docs_init=docs_init,
-            source_id=f"{folder_name}",
-            embeddings_key=os.getenv("EMBEDDINGS_KEY"),
-        )
-    else:
-        store = VectorCreator.create_vectorstore(
-            settings.VECTOR_STORE,
-            source_id=str(id),
-            embeddings_key=os.getenv("EMBEDDINGS_KEY"),
-        )
-        store.delete_index()
-    # Uncomment for MPNet embeddings
-    # model_name = "sentence-transformers/all-mpnet-base-v2"
-    # hf = HuggingFaceEmbeddings(model_name=model_name)
-    # store = FAISS.from_documents(docs_test, hf)
-    s1 = len(docs)
-    for i in tqdm(
-        docs,
-        desc="Embedding 🦖",
-        unit="docs",
-        total=len(docs),
-        bar_format="{l_bar}{bar}| Time Left: {remaining}",
-    ):
-        try:
-            task_status.update_state(
-                state="PROGRESS", meta={"current": int((c1 / s1) * 100)}
-            )
-            store_add_texts_with_retry(store, i, id)
-        except Exception as e:
-            print(e)
-            print("Error on ", i)
-            print("Saving progress")
-            print(f"stopped at {c1} out of {len(docs)}")
-            store.save_local(f"{folder_name}")
-            break
-        c1 += 1
-    if settings.VECTOR_STORE == "faiss":
-        store.save_local(f"{folder_name}")
diff --git a/application/parser/token_func.py b/application/parser/token_func.py
deleted file mode 100644
index 7511cde0..00000000
--- a/application/parser/token_func.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import re
-from math import ceil
-from typing import List
-
-import tiktoken
-from application.parser.schema.base import Document
-
-
-def separate_header_and_body(text):
-    header_pattern = r"^(.*?\n){3}"
-    match = re.match(header_pattern, text)
-    header = match.group(0)
-    body = text[len(header):]
-    return header, body
-
-
-def group_documents(documents: List[Document], min_tokens: int, max_tokens: int) -> List[Document]:
-    docs = []
-    current_group = None
-
-    for doc in documents:
-        doc_len = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))
-
-        # Check if current group is empty or if the document can be added based on token count and matching metadata
-        if (current_group is None or 
-            (len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and 
-             doc_len < min_tokens and 
-             current_group.extra_info == doc.extra_info)):
-            if current_group is None:
-                current_group = doc  # Use the document directly to retain its metadata
-            else:
-                current_group.text += " " + doc.text  # Append text to the current group
-        else:
-            docs.append(current_group)
-            current_group = doc  # Start a new group with the current document
-
-    if current_group is not None:
-        docs.append(current_group)
-
-    return docs
-
-
-def split_documents(documents: List[Document], max_tokens: int) -> List[Document]:
-    docs = []
-    for doc in documents:
-        token_length = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))
-        if token_length <= max_tokens:
-            docs.append(doc)
-        else:
-            header, body = separate_header_and_body(doc.text)
-            if len(tiktoken.get_encoding("cl100k_base").encode(header)) > max_tokens:
-                body = doc.text
-                header = ""
-            num_body_parts = ceil(token_length / max_tokens)
-            part_length = ceil(len(body) / num_body_parts)
-            body_parts = [body[i:i + part_length] for i in range(0, len(body), part_length)]
-            for i, body_part in enumerate(body_parts):
-                new_doc = Document(text=header + body_part.strip(),
-                                   doc_id=f"{doc.doc_id}-{i}",
-                                   embedding=doc.embedding,
-                                   extra_info=doc.extra_info)
-                docs.append(new_doc)
-    return docs
-
-
-def group_split(documents: List[Document], max_tokens: int = 2000, min_tokens: int = 150, token_check: bool = True):
-    if not token_check:
-        return documents
-    print("Grouping small documents")
-    try:
-        documents = group_documents(documents=documents, min_tokens=min_tokens, max_tokens=max_tokens)
-    except Exception:
-        print("Grouping failed, try running without token_check")
-    print("Separating large documents")
-    try:
-        documents = split_documents(documents=documents, max_tokens=max_tokens)
-    except Exception:
-        print("Grouping failed, try running without token_check")
-    return documents
diff --git a/application/worker.py b/application/worker.py
index 33cd90e5..0edb46ff 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -12,10 +12,10 @@ from bson.objectid import ObjectId
 from application.core.mongo_db import MongoDB
 from application.core.settings import settings
 from application.parser.file.bulk import SimpleDirectoryReader
-from application.parser.open_ai_func import call_openai_api
+from application.parser.embedding_pipeline import embed_and_store_documents
 from application.parser.remote.remote_creator import RemoteCreator
 from application.parser.schema.base import Document
-from application.parser.token_func import group_split
+from application.parser.chunking import Chunker
 from application.utils import count_tokens_docs
 
 mongo = MongoDB.get_client()
@@ -153,17 +153,19 @@ def ingest_worker(
         exclude_hidden=exclude,
         file_metadata=metadata_from_filename,
     ).load_data()
-    raw_docs = group_split(
-        documents=raw_docs,
-        min_tokens=MIN_TOKENS,
+
+    chunker = Chunker(
+        chunking_strategy="classic_chunk",
         max_tokens=MAX_TOKENS,
-        token_check=token_check,
+        min_tokens=MIN_TOKENS,
+        duplicate_headers=False
     )
+    raw_docs = chunker.chunk(documents=raw_docs)
 
     docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
     id = ObjectId()
 
-    call_openai_api(docs, full_path, id, self)
+    embed_and_store_documents(docs, full_path, id, self)
     tokens = count_tokens_docs(docs)
     self.update_state(state="PROGRESS", meta={"current": 100})
 
@@ -217,21 +219,23 @@ def remote_worker(
     remote_loader = RemoteCreator.create_loader(loader)
     raw_docs = remote_loader.load_data(source_data)
 
-    docs = group_split(
-        documents=raw_docs,
-        min_tokens=MIN_TOKENS,
+    chunker = Chunker(
+        chunking_strategy="classic_chunk",
         max_tokens=MAX_TOKENS,
-        token_check=token_check,
+        min_tokens=MIN_TOKENS,
+        duplicate_headers=False
     )
+    docs = chunker.chunk(documents=raw_docs)
+
     tokens = count_tokens_docs(docs)
     if operation_mode == "upload":
         id = ObjectId()
-        call_openai_api(docs, full_path, id, self)
+        embed_and_store_documents(docs, full_path, id, self)
     elif operation_mode == "sync":
         if not doc_id or not ObjectId.is_valid(doc_id):
             raise ValueError("doc_id must be provided for sync operation.")
         id = ObjectId(doc_id)
-        call_openai_api(docs, full_path, id, self)
+        embed_and_store_documents(docs, full_path, id, self)
     self.update_state(state="PROGRESS", meta={"current": 100})
 
     file_data = {

From 90962ee056df8d97d7d9284665d28fd0fe655ff8 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 23 Dec 2024 17:41:13 +0000
Subject: [PATCH 04/26] fix: debugger in launch json

---
 .vscode/launch.json | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 5be1f711..5083d977 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -13,7 +13,7 @@
       ]
     },
     {
-        "name": "Python Debugger: Flask",
+        "name": "Flask Debugger",
         "type": "debugpy",
         "request": "launch",
         "module": "flask",
@@ -32,5 +32,23 @@
         ],
         "cwd": "${workspaceFolder}",
     },
+    {
+      "name": "Celery Debugger",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "env": {
+        "PYTHONPATH": "${workspaceFolder}",
+      },
+      "args": [
+        "-A",
+        "application.app.celery",
+        "worker",
+        "-l",
+        "INFO",
+        "--pool=solo"
+      ],
+      "cwd": "${workspaceFolder}"
+    }
   ]
 }
\ No newline at end of file

From 41b4c28430ae29eded99b62e82e1ad4b863e99a3 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 23 Dec 2024 17:41:44 +0000
Subject: [PATCH 05/26] fix: linting

---
 application/parser/chunking.py           | 2 +-
 application/parser/embedding_pipeline.py | 4 ++--
 application/worker.py                    | 2 --
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/application/parser/chunking.py b/application/parser/chunking.py
index 26f05dba..aae14898 100644
--- a/application/parser/chunking.py
+++ b/application/parser/chunking.py
@@ -1,5 +1,5 @@
 import re
-from typing import List, Tuple, Union
+from typing import List, Tuple
 import logging
 from application.parser.schema.base import Document
 from application.utils import get_encoding
diff --git a/application/parser/embedding_pipeline.py b/application/parser/embedding_pipeline.py
index 6cf40048..0435cd14 100755
--- a/application/parser/embedding_pipeline.py
+++ b/application/parser/embedding_pipeline.py
@@ -61,7 +61,7 @@ def embed_and_store_documents(docs, folder_name, source_id, task_status):
 
     # Process and embed documents
     for idx, doc in tqdm(
-        docs,
+        enumerate(docs),
         desc="Embedding 🦖",
         unit="docs",
         total=total_docs,
@@ -69,7 +69,7 @@ def embed_and_store_documents(docs, folder_name, source_id, task_status):
     ):
         try:
             # Update task status for progress tracking
-            progress = int((idx / total_docs) * 100)
+            progress = int(((idx + 1) / total_docs) * 100)
             task_status.update_state(state="PROGRESS", meta={"current": progress})
 
             # Add document to vector store
diff --git a/application/worker.py b/application/worker.py
index 0edb46ff..f4f181e5 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -126,7 +126,6 @@ def ingest_worker(
     limit = None
     exclude = True
     sample = False
-    token_check = True
     full_path = os.path.join(directory, user, name_job)
 
     logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": name_job})
@@ -205,7 +204,6 @@ def remote_worker(
     operation_mode="upload",
     doc_id=None,
 ):
-    token_check = True
     full_path = os.path.join(directory, user, name_job)
 
     if not os.path.exists(full_path):

From b2a013c02739d818fcd2f16513286815b7c04905 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 23 Dec 2024 18:11:15 +0000
Subject: [PATCH 06/26] fix: remove reqs from scripts folder

---
 scripts/requirements.txt | 22 ----------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 scripts/requirements.txt

diff --git a/scripts/requirements.txt b/scripts/requirements.txt
deleted file mode 100644
index d90af2c3..00000000
--- a/scripts/requirements.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-dataclasses_json==0.6.3
-docx2txt==0.8
-EbookLib==0.18
-escodegen==1.0.11
-esprima==4.0.1
-faiss_cpu==1.7.4
-html2text==2020.1.16
-javalang==0.13.0
-langchain==0.2.10
-langchain_community==0.2.9
-langchain-openai==0.0.5
-nltk==3.9
-openapi3_parser==1.1.16
-pandas==2.2.0
-PyPDF2==3.0.1
-python-dotenv==1.0.1
-retry==0.9.2
-Sphinx==7.2.6
-tiktoken==0.5.2
-tqdm==4.66.3
-typer==0.9.0
-unstructured==0.12.2

From 474298c969e0f08779dbaddf6bfae7a8b2dff261 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Dec 2024 18:12:13 +0000
Subject: [PATCH 07/26] build(deps): bump jinja2 from 3.1.4 to 3.1.5 in
 /application

Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.4 to 3.1.5.
- [Release notes](https://github.com/pallets/jinja/releases)
- [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst)
- [Commits](https://github.com/pallets/jinja/compare/3.1.4...3.1.5)

---
updated-dependencies:
- dependency-name: jinja2
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 application/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/requirements.txt b/application/requirements.txt
index b9d2c33c..015eb545 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -18,7 +18,7 @@ gTTS==2.3.2
 gunicorn==23.0.0
 html2text==2024.2.26
 javalang==0.13.0
-jinja2==3.1.4
+jinja2==3.1.5
 jiter==0.5.0
 jmespath==1.0.1
 joblib==1.4.2

From 7760e779aeee4cffceb4cc157ac75e32cfc8650a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Dec 2024 20:14:33 +0000
Subject: [PATCH 08/26] build(deps): bump i18next from 23.15.1 to 24.2.0 in
 /frontend

Bumps [i18next](https://github.com/i18next/i18next) from 23.15.1 to 24.2.0.
- [Release notes](https://github.com/i18next/i18next/releases)
- [Changelog](https://github.com/i18next/i18next/blob/master/CHANGELOG.md)
- [Commits](https://github.com/i18next/i18next/compare/v23.15.1...v24.2.0)

---
updated-dependencies:
- dependency-name: i18next
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 frontend/package-lock.json | 20 ++++++++++++++------
 frontend/package.json      |  2 +-
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index f96a17d4..4371d7c3 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -10,7 +10,7 @@
       "dependencies": {
         "@reduxjs/toolkit": "^2.2.7",
         "chart.js": "^4.4.4",
-        "i18next": "^23.15.1",
+        "i18next": "^24.2.0",
         "i18next-browser-languagedetector": "^8.0.0",
         "prop-types": "^15.8.1",
         "react": "^18.2.0",
@@ -1649,7 +1649,7 @@
       "version": "18.3.0",
       "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.3.0.tgz",
       "integrity": "sha512-EhwApuTmMBmXuFOikhQLIBUn6uFg81SwLMOAUgodJF14SOBOCMdU04gDoYi0WOJJHD144TL32z4yDqCW3dnkQg==",
-      "dev": true,
+      "devOptional": true,
       "dependencies": {
         "@types/react": "*"
       }
@@ -4921,9 +4921,9 @@
       }
     },
     "node_modules/i18next": {
-      "version": "23.15.1",
-      "resolved": "https://registry.npmjs.org/i18next/-/i18next-23.15.1.tgz",
-      "integrity": "sha512-wB4abZ3uK7EWodYisHl/asf8UYEhrI/vj/8aoSsrj/ZDxj4/UXPOa1KvFt1Fq5hkUHquNqwFlDprmjZ8iySgYA==",
+      "version": "24.2.0",
+      "resolved": "https://registry.npmjs.org/i18next/-/i18next-24.2.0.tgz",
+      "integrity": "sha512-ArJJTS1lV6lgKH7yEf4EpgNZ7+THl7bsGxxougPYiXRTJ/Fe1j08/TBpV9QsXCIYVfdE/HWG/xLezJ5DOlfBOA==",
       "funding": [
         {
           "type": "individual",
@@ -4940,6 +4940,14 @@
       ],
       "dependencies": {
         "@babel/runtime": "^7.23.2"
+      },
+      "peerDependencies": {
+        "typescript": "^5"
+      },
+      "peerDependenciesMeta": {
+        "typescript": {
+          "optional": true
+        }
       }
     },
     "node_modules/i18next-browser-languagedetector": {
@@ -9250,7 +9258,7 @@
       "version": "5.6.2",
       "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.2.tgz",
       "integrity": "sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==",
-      "dev": true,
+      "devOptional": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
diff --git a/frontend/package.json b/frontend/package.json
index 868a72ae..ca6ca518 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -21,7 +21,7 @@
   "dependencies": {
     "@reduxjs/toolkit": "^2.2.7",
     "chart.js": "^4.4.4",
-    "i18next": "^23.15.1",
+    "i18next": "^24.2.0",
     "i18next-browser-languagedetector": "^8.0.0",
     "prop-types": "^15.8.1",
     "react": "^18.2.0",

From 502d82e1c9e7f003b485e34f81d44221e875677e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Dec 2024 20:58:59 +0000
Subject: [PATCH 09/26] build(deps): bump langchain-openai from 0.2.0 to 0.2.14
 in /application

Bumps [langchain-openai](https://github.com/langchain-ai/langchain) from 0.2.0 to 0.2.14.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain-openai==0.2.0...langchain-openai==0.2.14)

---
updated-dependencies:
- dependency-name: langchain-openai
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 application/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/requirements.txt b/application/requirements.txt
index b9d2c33c..dfd1898e 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -31,7 +31,7 @@ kombu==5.4.2
 langchain==0.3.11
 langchain-community==0.3.11
 langchain-core==0.3.25
-langchain-openai==0.2.0
+langchain-openai==0.2.14
 langchain-text-splitters==0.3.0
 langsmith==0.2.3
 lazy-object-proxy==1.10.0

From 52dd3f798a59932661fdf374460c60ec94a430f5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Dec 2024 21:06:38 +0000
Subject: [PATCH 10/26] build(deps): bump jinja2 from 3.1.4 to 3.1.5 in
 /application

Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.4 to 3.1.5.
- [Release notes](https://github.com/pallets/jinja/releases)
- [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst)
- [Commits](https://github.com/pallets/jinja/compare/3.1.4...3.1.5)

---
updated-dependencies:
- dependency-name: jinja2
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 application/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/requirements.txt b/application/requirements.txt
index a8b909e5..e4e3f232 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -18,7 +18,7 @@ gTTS==2.3.2
 gunicorn==23.0.0
 html2text==2024.2.26
 javalang==0.13.0
-jinja2==3.1.4
+jinja2==3.1.5
 jiter==0.5.0
 jmespath==1.0.1
 joblib==1.4.2

From ab90a93eec9f7cd5b505b62356a3ab6391f8c1b4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Dec 2024 21:15:46 +0000
Subject: [PATCH 11/26] build(deps): bump numpy from 1.26.4 to 2.2.1 in
 /application

Bumps [numpy](https://github.com/numpy/numpy) from 1.26.4 to 2.2.1.
- [Release notes](https://github.com/numpy/numpy/releases)
- [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst)
- [Commits](https://github.com/numpy/numpy/compare/v1.26.4...v2.2.1)

---
updated-dependencies:
- dependency-name: numpy
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 application/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/requirements.txt b/application/requirements.txt
index e4e3f232..57da7d9a 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -42,7 +42,7 @@ mpmath==1.3.0
 multidict==6.1.0
 mypy-extensions==1.0.0
 networkx==3.3
-numpy==1.26.4
+numpy==2.2.1
 openai==1.57.0
 openapi-schema-validator==0.6.2
 openapi-spec-validator==0.6.0

From fb2df05e3feb21ec763aa283facce70e45aad9ac Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 23 Dec 2024 21:23:54 +0000
Subject: [PATCH 12/26] feat: upgrade python and bump faiss-cpu

---
 application/Dockerfile       | 14 +++++++-------
 application/requirements.txt |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/application/Dockerfile b/application/Dockerfile
index d076bc41..d422db45 100644
--- a/application/Dockerfile
+++ b/application/Dockerfile
@@ -8,14 +8,14 @@ RUN apt-get update && \
     add-apt-repository ppa:deadsnakes/ppa && \
 # Install necessary packages and Python
     apt-get update && \
-    apt-get install -y --no-install-recommends gcc wget unzip libc6-dev python3.11 python3.11-distutils python3.11-venv && \
+    apt-get install -y --no-install-recommends gcc wget unzip libc6-dev python3.12 python3.12-venv && \
     rm -rf /var/lib/apt/lists/* 
 
 # Verify Python installation and setup symlink
-RUN if [ -f /usr/bin/python3.11 ]; then \
-        ln -s /usr/bin/python3.11 /usr/bin/python; \
+RUN if [ -f /usr/bin/python3.12 ]; then \
+        ln -s /usr/bin/python3.12 /usr/bin/python; \
     else \
-        echo "Python 3.11 not found"; exit 1; \
+        echo "Python 3.12 not found"; exit 1; \
     fi
 
 # Download and unzip the model
@@ -33,7 +33,7 @@ RUN apt-get remove --purge -y wget unzip && apt-get autoremove -y && rm -rf /var
 COPY requirements.txt .
 
 # Setup Python virtual environment
-RUN python3.11 -m venv /venv
+RUN python3.12 -m venv /venv
 
 # Activate virtual environment and install Python packages
 ENV PATH="/venv/bin:$PATH"
@@ -50,8 +50,8 @@ RUN apt-get update && \
     apt-get install -y software-properties-common && \
     add-apt-repository ppa:deadsnakes/ppa && \
 # Install Python
-    apt-get update && apt-get install -y --no-install-recommends python3.11 && \
-    ln -s /usr/bin/python3.11 /usr/bin/python && \
+    apt-get update && apt-get install -y --no-install-recommends python3.12 && \
+    ln -s /usr/bin/python3.12 /usr/bin/python && \
     rm -rf /var/lib/apt/lists/*
 
 # Set working directory
diff --git a/application/requirements.txt b/application/requirements.txt
index 57da7d9a..787be450 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -12,7 +12,7 @@ escodegen==1.0.11
 esprima==4.0.1
 esutils==1.0.1
 Flask==3.0.3
-faiss-cpu==1.8.0.post1
+faiss-cpu==1.9.0.post1
 flask-restx==1.3.0
 gTTS==2.3.2
 gunicorn==23.0.0

From 4927b64d273a34d56f74cebf2bcfb1a25357de99 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 23 Dec 2024 21:26:22 +0000
Subject: [PATCH 13/26] bump pytest

---
 .github/workflows/pytest.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index cf68ff9c..d5b31109 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -6,7 +6,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.11"]
+        python-version: ["3.12"]
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python ${{ matrix.python-version }}
@@ -23,7 +23,7 @@ jobs:
         run: |
           python -m pytest --cov=application --cov-report=xml
       - name: Upload coverage reports to Codecov
-        if: github.event_name == 'pull_request' && matrix.python-version == '3.11'
+        if: github.event_name == 'pull_request' && matrix.python-version == '3.12'
         uses: codecov/codecov-action@v5
         env:
           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

From fdd2300517cec29836ec687d9dd49ee2715c3163 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Dec 2024 21:31:04 +0000
Subject: [PATCH 14/26] build(deps): bump langchain-openai from 0.2.0 to 0.2.14
 in /application

Bumps [langchain-openai](https://github.com/langchain-ai/langchain) from 0.2.0 to 0.2.14.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain-openai==0.2.0...langchain-openai==0.2.14)

---
updated-dependencies:
- dependency-name: langchain-openai
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 application/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/requirements.txt b/application/requirements.txt
index 787be450..f189a300 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -31,7 +31,7 @@ kombu==5.4.2
 langchain==0.3.11
 langchain-community==0.3.11
 langchain-core==0.3.25
-langchain-openai==0.2.0
+langchain-openai==0.2.14
 langchain-text-splitters==0.3.0
 langsmith==0.2.3
 lazy-object-proxy==1.10.0

From 36e4398bcb2fef487c17ce0530e5d93b45023abc Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 23 Dec 2024 21:39:33 +0000
Subject: [PATCH 15/26] fix: bump deps

---
 application/requirements.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/application/requirements.txt b/application/requirements.txt
index f189a300..da9d114e 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -28,11 +28,11 @@ jsonschema==4.23.0
 jsonschema-spec==0.2.4
 jsonschema-specifications==2023.7.1
 kombu==5.4.2
-langchain==0.3.11
-langchain-community==0.3.11
-langchain-core==0.3.25
+langchain==0.3.13
+langchain-community==0.3.13
+langchain-core==0.3.28
 langchain-openai==0.2.14
-langchain-text-splitters==0.3.0
+langchain-text-splitters==0.3.4
 langsmith==0.2.3
 lazy-object-proxy==1.10.0
 lxml==5.3.0
@@ -43,7 +43,7 @@ multidict==6.1.0
 mypy-extensions==1.0.0
 networkx==3.3
 numpy==2.2.1
-openai==1.57.0
+openai==1.58.1
 openapi-schema-validator==0.6.2
 openapi-spec-validator==0.6.0
 openapi3-parser==1.1.18

From 2536bd098826a90b9f6582ccf58a98c73ae88e02 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Dec 2024 21:44:39 +0000
Subject: [PATCH 16/26] build(deps): bump elasticsearch from 8.15.1 to 8.17.0
 in /application

Bumps [elasticsearch](https://github.com/elastic/elasticsearch-py) from 8.15.1 to 8.17.0.
- [Release notes](https://github.com/elastic/elasticsearch-py/releases)
- [Commits](https://github.com/elastic/elasticsearch-py/compare/v8.15.1...v8.17.0)

---
updated-dependencies:
- dependency-name: elasticsearch
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 application/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/requirements.txt b/application/requirements.txt
index da9d114e..20e90232 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -7,7 +7,7 @@ docx2txt==0.8
 duckduckgo-search==6.3.0
 ebooklib==0.18
 elastic-transport==8.15.0
-elasticsearch==8.15.1
+elasticsearch==8.17.0
 escodegen==1.0.11
 esprima==4.0.1
 esutils==1.0.1

From e30291966a9761aa39f46296ccb4699bcaf44185 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 23 Dec 2024 21:47:31 +0000
Subject: [PATCH 17/26] fix: bump elastic transport

---
 application/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/requirements.txt b/application/requirements.txt
index 20e90232..08990ab2 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -6,7 +6,7 @@ dataclasses-json==0.6.7
 docx2txt==0.8
 duckduckgo-search==6.3.0
 ebooklib==0.18
-elastic-transport==8.15.0
+elastic-transport==8.15.1
 elasticsearch==8.17.0
 escodegen==1.0.11
 esprima==4.0.1

From ba9e2101bbf81629af4093df36d36b85b3995d0a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Dec 2024 21:52:27 +0000
Subject: [PATCH 18/26] build(deps-dev): bump postcss from 8.4.47 to 8.4.49 in
 /frontend

Bumps [postcss](https://github.com/postcss/postcss) from 8.4.47 to 8.4.49.
- [Release notes](https://github.com/postcss/postcss/releases)
- [Changelog](https://github.com/postcss/postcss/blob/main/CHANGELOG.md)
- [Commits](https://github.com/postcss/postcss/compare/8.4.47...8.4.49)

---
updated-dependencies:
- dependency-name: postcss
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 frontend/package-lock.json | 10 +++++-----
 frontend/package.json      |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index 4371d7c3..4d6759da 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -48,7 +48,7 @@
         "eslint-plugin-unused-imports": "^4.1.4",
         "husky": "^8.0.0",
         "lint-staged": "^15.2.10",
-        "postcss": "^8.4.41",
+        "postcss": "^8.4.49",
         "prettier": "^3.3.3",
         "prettier-plugin-tailwindcss": "^0.6.8",
         "tailwindcss": "^3.4.15",
@@ -7466,9 +7466,9 @@
       }
     },
     "node_modules/postcss": {
-      "version": "8.4.47",
-      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.47.tgz",
-      "integrity": "sha512-56rxCq7G/XfB4EkXq9Egn5GCqugWvDFjafDOThIdMBsI15iqPqR5r15TfSr1YPYeEI19YeaXMCbY6u88Y76GLQ==",
+      "version": "8.4.49",
+      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.49.tgz",
+      "integrity": "sha512-OCVPnIObs4N29kxTjzLfUryOkvZEq+pf8jTF0lg8E7uETuWHA+v7j3c/xJmiqpX450191LlmZfUKkXxkTry7nA==",
       "dev": true,
       "funding": [
         {
@@ -7486,7 +7486,7 @@
       ],
       "dependencies": {
         "nanoid": "^3.3.7",
-        "picocolors": "^1.1.0",
+        "picocolors": "^1.1.1",
         "source-map-js": "^1.2.1"
       },
       "engines": {
diff --git a/frontend/package.json b/frontend/package.json
index ca6ca518..220f9759 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -59,7 +59,7 @@
     "eslint-plugin-unused-imports": "^4.1.4",
     "husky": "^8.0.0",
     "lint-staged": "^15.2.10",
-    "postcss": "^8.4.41",
+    "postcss": "^8.4.49",
     "prettier": "^3.3.3",
     "prettier-plugin-tailwindcss": "^0.6.8",
     "tailwindcss": "^3.4.15",

From 636ac2a56c9b1974132fc24e77f0b785dc2b95ad Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Dec 2024 21:58:40 +0000
Subject: [PATCH 19/26] build(deps-dev): bump typescript from 5.6.2 to 5.7.2 in
 /frontend

Bumps [typescript](https://github.com/microsoft/TypeScript) from 5.6.2 to 5.7.2.
- [Release notes](https://github.com/microsoft/TypeScript/releases)
- [Changelog](https://github.com/microsoft/TypeScript/blob/main/azure-pipelines.release.yml)
- [Commits](https://github.com/microsoft/TypeScript/compare/v5.6.2...v5.7.2)

---
updated-dependencies:
- dependency-name: typescript
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 frontend/package-lock.json | 8 ++++----
 frontend/package.json      | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index 4d6759da..d6ea061e 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -52,7 +52,7 @@
         "prettier": "^3.3.3",
         "prettier-plugin-tailwindcss": "^0.6.8",
         "tailwindcss": "^3.4.15",
-        "typescript": "^5.6.2",
+        "typescript": "^5.7.2",
         "vite": "^5.4.11",
         "vite-plugin-svgr": "^4.2.0"
       }
@@ -9255,9 +9255,9 @@
       }
     },
     "node_modules/typescript": {
-      "version": "5.6.2",
-      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.2.tgz",
-      "integrity": "sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==",
+      "version": "5.7.2",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.2.tgz",
+      "integrity": "sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==",
       "devOptional": true,
       "bin": {
         "tsc": "bin/tsc",
diff --git a/frontend/package.json b/frontend/package.json
index 220f9759..83237b07 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -63,7 +63,7 @@
     "prettier": "^3.3.3",
     "prettier-plugin-tailwindcss": "^0.6.8",
     "tailwindcss": "^3.4.15",
-    "typescript": "^5.6.2",
+    "typescript": "^5.7.2",
     "vite": "^5.4.11",
     "vite-plugin-svgr": "^4.2.0"
   }

From 868ea1a1e24e0691a88917a3ff6299bf057b1de5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Dec 2024 22:03:05 +0000
Subject: [PATCH 20/26] build(deps-dev): bump lint-staged from 15.2.10 to
 15.2.11 in /frontend

Bumps [lint-staged](https://github.com/lint-staged/lint-staged) from 15.2.10 to 15.2.11.
- [Release notes](https://github.com/lint-staged/lint-staged/releases)
- [Changelog](https://github.com/lint-staged/lint-staged/blob/master/CHANGELOG.md)
- [Commits](https://github.com/lint-staged/lint-staged/compare/v15.2.10...v15.2.11)

---
updated-dependencies:
- dependency-name: lint-staged
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 frontend/package-lock.json | 138 ++++++++++++++-----------------------
 frontend/package.json      |   2 +-
 2 files changed, 52 insertions(+), 88 deletions(-)

diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index d6ea061e..1fc37133 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -47,7 +47,7 @@
         "eslint-plugin-react": "^7.37.2",
         "eslint-plugin-unused-imports": "^4.1.4",
         "husky": "^8.0.0",
-        "lint-staged": "^15.2.10",
+        "lint-staged": "^15.2.11",
         "postcss": "^8.4.49",
         "prettier": "^3.3.3",
         "prettier-plugin-tailwindcss": "^0.6.8",
@@ -864,18 +864,6 @@
         "url": "https://github.com/chalk/ansi-regex?sponsor=1"
       }
     },
-    "node_modules/@isaacs/cliui/node_modules/ansi-styles": {
-      "version": "6.2.1",
-      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz",
-      "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==",
-      "dev": true,
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
-      }
-    },
     "node_modules/@isaacs/cliui/node_modules/emoji-regex": {
       "version": "9.2.2",
       "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz",
@@ -2152,6 +2140,18 @@
         "node": ">=8"
       }
     },
+    "node_modules/ansi-styles": {
+      "version": "6.2.1",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz",
+      "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==",
+      "dev": true,
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
     "node_modules/any-promise": {
       "version": "1.3.0",
       "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz",
@@ -2902,11 +2902,11 @@
       }
     },
     "node_modules/debug": {
-      "version": "4.3.6",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.6.tgz",
-      "integrity": "sha512-O/09Bd4Z1fBrU4VzkhFqVgpPzaGbw6Sm9FEkBT1A/YBXQFGuuSxa1dN2nxgxS34JmKXqYx8CZAwEVoJFImUXIg==",
+      "version": "4.4.0",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz",
+      "integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==",
       "dependencies": {
-        "ms": "2.1.2"
+        "ms": "^2.1.3"
       },
       "engines": {
         "node": ">=6.0"
@@ -3068,9 +3068,9 @@
       "dev": true
     },
     "node_modules/emoji-regex": {
-      "version": "10.3.0",
-      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.3.0.tgz",
-      "integrity": "sha512-QpLs9D9v9kArv4lfDEgg1X/gN5XLnf/A6l9cs8SPZLRZR3ZkY9+kwIQTxm+fsSej5UMYGE8fdoaZVIBlqG0XTw==",
+      "version": "10.4.0",
+      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.4.0.tgz",
+      "integrity": "sha512-EC+0oUMY1Rqm4O6LLrgjtYDvcVYTy7chDnM4Q7030tP4Kwj3u/pR6gP9ygnp2CJMK5Gq+9Q2oqmrFJAz01DXjw==",
       "dev": true
     },
     "node_modules/entities": {
@@ -4304,9 +4304,9 @@
       }
     },
     "node_modules/get-east-asian-width": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.2.0.tgz",
-      "integrity": "sha512-2nk+7SIVb14QrgXFHcm84tD4bKQz0RxPuMT8Ag5KPOq7J5fEmAg0UbXdTOSHqNuHSU28k55qnceesxXRZGzKWA==",
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.3.0.tgz",
+      "integrity": "sha512-vpeMIQKxczTD/0s2CdEWHcb0eeJe6TFjxb+J5xgX7hScxqrGuyjmv4c1D4A/gelKfyox0gJJwIHF+fLjeaM8kQ==",
       "dev": true,
       "engines": {
         "node": ">=18"
@@ -5671,21 +5671,21 @@
       "dev": true
     },
     "node_modules/lint-staged": {
-      "version": "15.2.10",
-      "resolved": "https://registry.npmjs.org/lint-staged/-/lint-staged-15.2.10.tgz",
-      "integrity": "sha512-5dY5t743e1byO19P9I4b3x8HJwalIznL5E1FWYnU6OWw33KxNBSLAc6Cy7F2PsFEO8FKnLwjwm5hx7aMF0jzZg==",
+      "version": "15.2.11",
+      "resolved": "https://registry.npmjs.org/lint-staged/-/lint-staged-15.2.11.tgz",
+      "integrity": "sha512-Ev6ivCTYRTGs9ychvpVw35m/bcNDuBN+mnTeObCL5h+boS5WzBEC6LHI4I9F/++sZm1m+J2LEiy0gxL/R9TBqQ==",
       "dev": true,
       "dependencies": {
         "chalk": "~5.3.0",
         "commander": "~12.1.0",
-        "debug": "~4.3.6",
+        "debug": "~4.4.0",
         "execa": "~8.0.1",
-        "lilconfig": "~3.1.2",
-        "listr2": "~8.2.4",
+        "lilconfig": "~3.1.3",
+        "listr2": "~8.2.5",
         "micromatch": "~4.0.8",
         "pidtree": "~0.6.0",
         "string-argv": "~0.3.2",
-        "yaml": "~2.5.0"
+        "yaml": "~2.6.1"
       },
       "bin": {
         "lint-staged": "bin/lint-staged.js"
@@ -5710,9 +5710,9 @@
       }
     },
     "node_modules/lint-staged/node_modules/lilconfig": {
-      "version": "3.1.2",
-      "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.2.tgz",
-      "integrity": "sha512-eop+wDAvpItUys0FWkHIKeC9ybYrTGbU41U5K7+bttZZeohvnY7M9dZ5kB21GNWiFT2q1OoPTvncPCgSOVO5ow==",
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz",
+      "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==",
       "dev": true,
       "engines": {
         "node": ">=14"
@@ -5722,9 +5722,9 @@
       }
     },
     "node_modules/listr2": {
-      "version": "8.2.4",
-      "resolved": "https://registry.npmjs.org/listr2/-/listr2-8.2.4.tgz",
-      "integrity": "sha512-opevsywziHd3zHCVQGAj8zu+Z3yHNkkoYhWIGnq54RrCVwLz0MozotJEDnKsIBLvkfLGN6BLOyAeRrYI0pKA4g==",
+      "version": "8.2.5",
+      "resolved": "https://registry.npmjs.org/listr2/-/listr2-8.2.5.tgz",
+      "integrity": "sha512-iyAZCeyD+c1gPyE9qpFu8af0Y+MRtmKOncdGoA2S5EY8iFq99dmmvkNnHiWo+pj0s7yH7l3KPIgee77tKpXPWQ==",
       "dev": true,
       "dependencies": {
         "cli-truncate": "^4.0.0",
@@ -5779,9 +5779,9 @@
       }
     },
     "node_modules/log-update/node_modules/ansi-regex": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz",
-      "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==",
+      "version": "6.1.0",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz",
+      "integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==",
       "dev": true,
       "engines": {
         "node": ">=12"
@@ -5790,18 +5790,6 @@
         "url": "https://github.com/chalk/ansi-regex?sponsor=1"
       }
     },
-    "node_modules/log-update/node_modules/ansi-styles": {
-      "version": "6.2.1",
-      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz",
-      "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==",
-      "dev": true,
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
-      }
-    },
     "node_modules/log-update/node_modules/is-fullwidth-code-point": {
       "version": "5.0.0",
       "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-5.0.0.tgz",
@@ -6968,9 +6956,9 @@
       }
     },
     "node_modules/ms": {
-      "version": "2.1.2",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
-      "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
     },
     "node_modules/mz": {
       "version": "2.7.0",
@@ -8576,18 +8564,6 @@
         "url": "https://github.com/chalk/slice-ansi?sponsor=1"
       }
     },
-    "node_modules/slice-ansi/node_modules/ansi-styles": {
-      "version": "6.2.1",
-      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz",
-      "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==",
-      "dev": true,
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
-      }
-    },
     "node_modules/snake-case": {
       "version": "3.0.4",
       "resolved": "https://registry.npmjs.org/snake-case/-/snake-case-3.0.4.tgz",
@@ -8679,9 +8655,9 @@
       }
     },
     "node_modules/string-width/node_modules/ansi-regex": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz",
-      "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==",
+      "version": "6.1.0",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz",
+      "integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==",
       "dev": true,
       "engines": {
         "node": ">=12"
@@ -9775,9 +9751,9 @@
       }
     },
     "node_modules/wrap-ansi/node_modules/ansi-regex": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz",
-      "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==",
+      "version": "6.1.0",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz",
+      "integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==",
       "dev": true,
       "engines": {
         "node": ">=12"
@@ -9786,18 +9762,6 @@
         "url": "https://github.com/chalk/ansi-regex?sponsor=1"
       }
     },
-    "node_modules/wrap-ansi/node_modules/ansi-styles": {
-      "version": "6.2.1",
-      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz",
-      "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==",
-      "dev": true,
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
-      }
-    },
     "node_modules/wrap-ansi/node_modules/strip-ansi": {
       "version": "7.1.0",
       "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz",
@@ -9834,9 +9798,9 @@
       "dev": true
     },
     "node_modules/yaml": {
-      "version": "2.5.1",
-      "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.5.1.tgz",
-      "integrity": "sha512-bLQOjaX/ADgQ20isPJRvF0iRUHIxVhYvr53Of7wGcWlO2jvtUlH5m87DsmulFVxRpNLOnI4tB6p/oh8D7kpn9Q==",
+      "version": "2.6.1",
+      "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.6.1.tgz",
+      "integrity": "sha512-7r0XPzioN/Q9kXBro/XPnA6kznR73DHq+GXh5ON7ZozRO6aMjbmiBuKste2wslTFkC5d1dw0GooOCepZXJ2SAg==",
       "dev": true,
       "bin": {
         "yaml": "bin.mjs"
diff --git a/frontend/package.json b/frontend/package.json
index 83237b07..0547d68d 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -58,7 +58,7 @@
     "eslint-plugin-react": "^7.37.2",
     "eslint-plugin-unused-imports": "^4.1.4",
     "husky": "^8.0.0",
-    "lint-staged": "^15.2.10",
+    "lint-staged": "^15.2.11",
     "postcss": "^8.4.49",
     "prettier": "^3.3.3",
     "prettier-plugin-tailwindcss": "^0.6.8",

From 753832d701cc127887adf933b8a6f48ab0818a9d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Dec 2024 22:06:40 +0000
Subject: [PATCH 21/26] build(deps): bump react-router-dom from 6.8.1 to 7.1.1
 in /frontend

Bumps [react-router-dom](https://github.com/remix-run/react-router/tree/HEAD/packages/react-router-dom) from 6.8.1 to 7.1.1.
- [Release notes](https://github.com/remix-run/react-router/releases)
- [Changelog](https://github.com/remix-run/react-router/blob/main/packages/react-router-dom/CHANGELOG.md)
- [Commits](https://github.com/remix-run/react-router/commits/react-router-dom@7.1.1/packages/react-router-dom)

---
updated-dependencies:
- dependency-name: react-router-dom
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 frontend/package-lock.json | 69 +++++++++++++++++++++++++-------------
 frontend/package.json      |  2 +-
 2 files changed, 47 insertions(+), 24 deletions(-)

diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index 1fc37133..9a3dbf31 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -22,7 +22,7 @@
         "react-i18next": "^15.0.2",
         "react-markdown": "^9.0.1",
         "react-redux": "^8.0.5",
-        "react-router-dom": "^6.8.1",
+        "react-router-dom": "^7.1.1",
         "react-syntax-highlighter": "^15.5.0",
         "rehype-katex": "^7.0.1",
         "remark-gfm": "^4.0.0",
@@ -1051,14 +1051,6 @@
         }
       }
     },
-    "node_modules/@remix-run/router": {
-      "version": "1.3.2",
-      "resolved": "https://registry.npmjs.org/@remix-run/router/-/router-1.3.2.tgz",
-      "integrity": "sha512-t54ONhl/h75X94SWsHGQ4G/ZrCEguKSRQr7DrjTciJXW0YU1QhlwYeycvK5JgkzlxmvrK7wq1NB/PLtHxoiDcA==",
-      "engines": {
-        "node": ">=14"
-      }
-    },
     "node_modules/@rollup/pluginutils": {
       "version": "5.1.0",
       "resolved": "https://registry.npmjs.org/@rollup/pluginutils/-/pluginutils-5.1.0.tgz",
@@ -1549,6 +1541,11 @@
         "@babel/types": "^7.20.7"
       }
     },
+    "node_modules/@types/cookie": {
+      "version": "0.6.0",
+      "resolved": "https://registry.npmjs.org/@types/cookie/-/cookie-0.6.0.tgz",
+      "integrity": "sha512-4Kh9a6B2bQciAhf7FSuMRRkUWecJgJu9nPnx3yzpsfXX/c50REIqpHY4C82bXP90qrLtXtkDxTZosYO3UpOwlA=="
+    },
     "node_modules/@types/debug": {
       "version": "4.1.12",
       "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.12.tgz",
@@ -2784,6 +2781,14 @@
       "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==",
       "dev": true
     },
+    "node_modules/cookie": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/cookie/-/cookie-1.0.2.tgz",
+      "integrity": "sha512-9Kr/j4O16ISv8zBBhJoi4bXOYNTkFLOqSL3UDB0njXxCXNezjeyVrJyGOWtgfs/q2km1gwBcfH8q1yEGoMYunA==",
+      "engines": {
+        "node": ">=18"
+      }
+    },
     "node_modules/copy-to-clipboard": {
       "version": "3.3.3",
       "resolved": "https://registry.npmjs.org/copy-to-clipboard/-/copy-to-clipboard-3.3.3.tgz",
@@ -7972,33 +7977,41 @@
       }
     },
     "node_modules/react-router": {
-      "version": "6.8.1",
-      "resolved": "https://registry.npmjs.org/react-router/-/react-router-6.8.1.tgz",
-      "integrity": "sha512-Jgi8BzAJQ8MkPt8ipXnR73rnD7EmZ0HFFb7jdQU24TynGW1Ooqin2KVDN9voSC+7xhqbbCd2cjGUepb6RObnyg==",
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/react-router/-/react-router-7.1.1.tgz",
+      "integrity": "sha512-39sXJkftkKWRZ2oJtHhCxmoCrBCULr/HAH4IT5DHlgu/Q0FCPV0S4Lx+abjDTx/74xoZzNYDYbOZWlJjruyuDQ==",
       "dependencies": {
-        "@remix-run/router": "1.3.2"
+        "@types/cookie": "^0.6.0",
+        "cookie": "^1.0.1",
+        "set-cookie-parser": "^2.6.0",
+        "turbo-stream": "2.4.0"
       },
       "engines": {
-        "node": ">=14"
+        "node": ">=20.0.0"
       },
       "peerDependencies": {
-        "react": ">=16.8"
+        "react": ">=18",
+        "react-dom": ">=18"
+      },
+      "peerDependenciesMeta": {
+        "react-dom": {
+          "optional": true
+        }
       }
     },
     "node_modules/react-router-dom": {
-      "version": "6.8.1",
-      "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-6.8.1.tgz",
-      "integrity": "sha512-67EXNfkQgf34P7+PSb6VlBuaacGhkKn3kpE51+P6zYSG2kiRoumXEL6e27zTa9+PGF2MNXbgIUHTVlleLbIcHQ==",
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-7.1.1.tgz",
+      "integrity": "sha512-vSrQHWlJ5DCfyrhgo0k6zViOe9ToK8uT5XGSmnuC2R3/g261IdIMpZVqfjD6vWSXdnf5Czs4VA/V60oVR6/jnA==",
       "dependencies": {
-        "@remix-run/router": "1.3.2",
-        "react-router": "6.8.1"
+        "react-router": "7.1.1"
       },
       "engines": {
-        "node": ">=14"
+        "node": ">=20.0.0"
       },
       "peerDependencies": {
-        "react": ">=16.8",
-        "react-dom": ">=16.8"
+        "react": ">=18",
+        "react-dom": ">=18"
       }
     },
     "node_modules/react-side-effect": {
@@ -8456,6 +8469,11 @@
         "semver": "bin/semver.js"
       }
     },
+    "node_modules/set-cookie-parser": {
+      "version": "2.7.1",
+      "resolved": "https://registry.npmjs.org/set-cookie-parser/-/set-cookie-parser-2.7.1.tgz",
+      "integrity": "sha512-IOc8uWeOZgnb3ptbCURJWNjWUPcO3ZnTTdzsurqERrP6nPyv+paC55vJM0LpOlT2ne+Ix+9+CRG1MNLlyZ4GjQ=="
+    },
     "node_modules/set-function-length": {
       "version": "1.2.2",
       "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz",
@@ -9133,6 +9151,11 @@
         "typescript": ">=2.8.0 || >= 3.2.0-dev || >= 3.3.0-dev || >= 3.4.0-dev || >= 3.5.0-dev || >= 3.6.0-dev || >= 3.6.0-beta || >= 3.7.0-dev || >= 3.7.0-beta"
       }
     },
+    "node_modules/turbo-stream": {
+      "version": "2.4.0",
+      "resolved": "https://registry.npmjs.org/turbo-stream/-/turbo-stream-2.4.0.tgz",
+      "integrity": "sha512-FHncC10WpBd2eOmGwpmQsWLDoK4cqsA/UT/GqNoaKOQnT8uzhtCbg3EoUDMvqpOSAI0S26mr0rkjzbOO6S3v1g=="
+    },
     "node_modules/type-check": {
       "version": "0.4.0",
       "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz",
diff --git a/frontend/package.json b/frontend/package.json
index 0547d68d..ff98e94c 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -33,7 +33,7 @@
     "react-i18next": "^15.0.2",
     "react-markdown": "^9.0.1",
     "react-redux": "^8.0.5",
-    "react-router-dom": "^6.8.1",
+    "react-router-dom": "^7.1.1",
     "react-syntax-highlighter": "^15.5.0",
     "rehype-katex": "^7.0.1",
     "remark-gfm": "^4.0.0",

From e45648b389544fa59f24f65ea1d58efc58c35ef4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 30 Dec 2024 20:17:15 +0000
Subject: [PATCH 22/26] build(deps): bump langsmith from 0.2.3 to 0.2.6 in
 /application

Bumps [langsmith](https://github.com/langchain-ai/langsmith-sdk) from 0.2.3 to 0.2.6.
- [Release notes](https://github.com/langchain-ai/langsmith-sdk/releases)
- [Commits](https://github.com/langchain-ai/langsmith-sdk/compare/v0.2.3...v0.2.6)

---
updated-dependencies:
- dependency-name: langsmith
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 application/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/requirements.txt b/application/requirements.txt
index 08990ab2..754fb271 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -33,7 +33,7 @@ langchain-community==0.3.13
 langchain-core==0.3.28
 langchain-openai==0.2.14
 langchain-text-splitters==0.3.4
-langsmith==0.2.3
+langsmith==0.2.6
 lazy-object-proxy==1.10.0
 lxml==5.3.0
 markupsafe==2.1.5

From e42fc97d03e88a31f676ad8daffc912b6210e7da Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 30 Dec 2024 20:17:25 +0000
Subject: [PATCH 23/26] build(deps): bump celery from 5.3.6 to 5.4.0 in
 /application

Bumps [celery](https://github.com/celery/celery) from 5.3.6 to 5.4.0.
- [Release notes](https://github.com/celery/celery/releases)
- [Changelog](https://github.com/celery/celery/blob/main/Changelog.rst)
- [Commits](https://github.com/celery/celery/compare/v5.3.6...v5.4.0)

---
updated-dependencies:
- dependency-name: celery
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 application/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/requirements.txt b/application/requirements.txt
index 08990ab2..41297624 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -1,7 +1,7 @@
 anthropic==0.40.0
 boto3==1.34.153
 beautifulsoup4==4.12.3
-celery==5.3.6
+celery==5.4.0
 dataclasses-json==0.6.7
 docx2txt==0.8
 duckduckgo-search==6.3.0

From 0f611eb87bf038154afb20b31350027eecf23ae0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 31 Dec 2024 14:52:21 +0000
Subject: [PATCH 24/26] build(deps): bump redis from 5.0.1 to 5.2.1 in
 /application

Bumps [redis](https://github.com/redis/redis-py) from 5.0.1 to 5.2.1.
- [Release notes](https://github.com/redis/redis-py/releases)
- [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES)
- [Commits](https://github.com/redis/redis-py/compare/v5.0.1...v5.2.1)

---
updated-dependencies:
- dependency-name: redis
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 application/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/requirements.txt b/application/requirements.txt
index b7660c42..6a318338 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -68,7 +68,7 @@ python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
 python-pptx==1.0.2
 qdrant-client==1.11.0
-redis==5.0.1
+redis==5.2.1
 referencing==0.30.2
 regex==2024.9.11
 requests==2.32.3

From 3daeab5186e41289656b5a5d71b86c38a526a62e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 31 Dec 2024 14:57:29 +0000
Subject: [PATCH 25/26] build(deps): bump tiktoken from 0.7.0 to 0.8.0 in
 /application

Bumps [tiktoken](https://github.com/openai/tiktoken) from 0.7.0 to 0.8.0.
- [Release notes](https://github.com/openai/tiktoken/releases)
- [Changelog](https://github.com/openai/tiktoken/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/tiktoken/compare/0.7.0...0.8.0)

---
updated-dependencies:
- dependency-name: tiktoken
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 application/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/requirements.txt b/application/requirements.txt
index 6a318338..7c9f8101 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -74,7 +74,7 @@ regex==2024.9.11
 requests==2.32.3
 retry==0.9.2
 sentence-transformers==3.3.1
-tiktoken==0.7.0
+tiktoken==0.8.0
 tokenizers==0.21.0
 torch==2.4.1
 tqdm==4.66.5

From efb018d2b068a88dd99b8b32bb08f106517294e4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 31 Dec 2024 14:58:26 +0000
Subject: [PATCH 26/26] build(deps): bump marshmallow from 3.22.0 to 3.23.2 in
 /application

Bumps [marshmallow](https://github.com/marshmallow-code/marshmallow) from 3.22.0 to 3.23.2.
- [Changelog](https://github.com/marshmallow-code/marshmallow/blob/dev/CHANGELOG.rst)
- [Commits](https://github.com/marshmallow-code/marshmallow/compare/3.22.0...3.23.2)

---
updated-dependencies:
- dependency-name: marshmallow
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 application/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/requirements.txt b/application/requirements.txt
index 7c9f8101..362fdd45 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -37,7 +37,7 @@ langsmith==0.2.6
 lazy-object-proxy==1.10.0
 lxml==5.3.0
 markupsafe==2.1.5
-marshmallow==3.22.0
+marshmallow==3.23.2
 mpmath==1.3.0
 multidict==6.1.0
 mypy-extensions==1.0.0