Merge branch 'main' into feat/agent-refactor-and-logging

2026-02-23 04:41:47 +00:00 · 2025-03-05 16:04:09 -05:00
parent f88c34a0be bf195051e2
commit af69bc9d3c
28 changed files with 624 additions and 1072 deletions
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -116,8 +116,9 @@ def is_azure_configured():


 def save_conversation(
-    conversation_id, question, response, source_log_docs, tool_calls, llm, index=None
+    conversation_id, question, response, source_log_docs, tool_calls, llm, index=None, api_key=None
 ):
+    current_time = datetime.datetime.now(datetime.timezone.utc)
    if conversation_id is not None and index is not None:
        conversations_collection.update_one(
            {"_id": ObjectId(conversation_id), f"queries.{index}": {"$exists": True}},
@@ -127,6 +128,7 @@ def save_conversation(
                    f"queries.{index}.response": response,
                    f"queries.{index}.sources": source_log_docs,
                    f"queries.{index}.tool_calls": tool_calls,
+                    f"queries.{index}.timestamp": current_time
                }
            },
        )
@@ -145,6 +147,7 @@ def save_conversation(
                        "response": response,
                        "sources": source_log_docs,
                        "tool_calls": tool_calls,
+                        "timestamp": current_time
                    }
                }
            },
@@ -169,21 +172,25 @@ def save_conversation(
        ]

        completion = llm.gen(model=gpt_model, messages=messages_summary, max_tokens=30)
-        conversation_id = conversations_collection.insert_one(
-            {
-                "user": "local",
-                "date": datetime.datetime.utcnow(),
-                "name": completion,
-                "queries": [
-                    {
-                        "prompt": question,
-                        "response": response,
-                        "sources": source_log_docs,
-                        "tool_calls": tool_calls,
-                    }
-                ],
-            }
-        ).inserted_id
+        conversation_data = {
+            "user": "local",
+            "date": datetime.datetime.utcnow(),
+            "name": completion,
+            "queries": [
+                {
+                    "prompt": question,
+                    "response": response,
+                    "sources": source_log_docs,
+                    "tool_calls": tool_calls,
+                    "timestamp": current_time
+                }
+            ],
+        }
+        if api_key:
+            api_key_doc = api_key_collection.find_one({"key": api_key})
+            if api_key_doc:
+                conversation_data["api_key"] = api_key_doc["key"]
+        conversation_id = conversations_collection.insert_one(conversation_data).inserted_id
    return conversation_id


@@ -198,7 +205,6 @@ def get_prompt(prompt_id):
        prompt = prompts_collection.find_one({"_id": ObjectId(prompt_id)})["content"]
    return prompt

-
 def complete_stream(
    question,
    agent,
@@ -207,8 +213,14 @@ def complete_stream(
    user_api_key,
    isNoneDoc=False,
    index=None,
+    question, 
+    retriever, 
+    conversation_id, 
+    user_api_key, 
+    isNoneDoc=False, 
+    index=None,
+    should_save_conversation=True
 ):
-
    try:
        response_full = ""
        source_log_docs = []
@@ -239,9 +251,12 @@ def complete_stream(
                doc["source"] = "None"

        llm = LLMCreator.create_llm(
-            settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=user_api_key
+            settings.LLM_NAME, 
+            api_key=settings.API_KEY, 
+            user_api_key=user_api_key
        )
-        if user_api_key is None:
+        
+        if should_save_conversation:
            conversation_id = save_conversation(
                conversation_id,
                question,
@@ -250,10 +265,14 @@ def complete_stream(
                tool_calls,
                llm,
                index,
+                api_key=user_api_key 
            )
-            # send data.type = "end" to indicate that the stream has ended as json
-            data = json.dumps({"type": "id", "id": str(conversation_id)})
-            yield f"data: {data}\n\n"
+        else:
+            conversation_id = None
+
+        # send data.type = "end" to indicate that the stream has ended as json
+        data = json.dumps({"type": "id", "id": str(conversation_id)})
+        yield f"data: {data}\n\n"

        retriever_params = retriever.get_params()
        user_logs_collection.insert_one(
@@ -316,6 +335,9 @@ class Stream(Resource):
            "index": fields.Integer(
                required=False, description="The position where query is to be updated"
            ),
+            "save_conversation": fields.Boolean(
+                required=False, default=True, description="Flag to save conversation"
+            ),
        },
    )

@@ -330,6 +352,8 @@ class Stream(Resource):
        if missing_fields:
            return missing_fields

+        save_conv = data.get("save_conversation", True)
+
        try:
            question = data["question"]
            history = limit_chat_history(
@@ -400,6 +424,7 @@ class Stream(Resource):
                    user_api_key=user_api_key,
                    isNoneDoc=data.get("isNoneDoc"),
                    index=index,
+                    should_save_conversation=save_conv,
                ),
                mimetype="text/event-stream",
            )
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -107,11 +107,14 @@ class DeleteAllConversations(Resource):
@user_ns.route("/api/get_conversations")
 class GetConversations(Resource):
    @api.doc(
-        description="Retrieve a list of the latest 30 conversations",
+        description="Retrieve a list of the latest 30 conversations (excluding API key conversations)",
    )
    def get(self):
        try:
-            conversations = conversations_collection.find().sort("date", -1).limit(30)
+            conversations = conversations_collection.find(
+                {"api_key": {"$exists": False}}
+            ).sort("date", -1).limit(30)
+            
            list_conversations = [
                {"id": str(conversation["_id"]), "name": conversation["name"]}
                for conversation in conversations
@@ -214,17 +217,34 @@ class SubmitFeedback(Resource):
            return missing_fields

        try:
-            conversations_collection.update_one(
-                {
-                    "_id": ObjectId(data["conversation_id"]),
-                    f"queries.{data['question_index']}": {"$exists": True},
-                },
-                {
-                    "$set": {
-                        f"queries.{data['question_index']}.feedback": data["feedback"]
-                    }
-                },
-            )
+            if data["feedback"] is None:
+                # Remove feedback and feedback_timestamp if feedback is null
+                conversations_collection.update_one(
+                    {
+                        "_id": ObjectId(data["conversation_id"]),
+                        f"queries.{data['question_index']}": {"$exists": True},
+                    },
+                    {
+                        "$unset": {
+                            f"queries.{data['question_index']}.feedback": "",
+                            f"queries.{data['question_index']}.feedback_timestamp": ""
+                        }
+                    },
+                )
+            else:
+                # Set feedback and feedback_timestamp if feedback has a value
+                conversations_collection.update_one(
+                    {
+                        "_id": ObjectId(data["conversation_id"]),
+                        f"queries.{data['question_index']}": {"$exists": True},
+                    },
+                    {
+                        "$set": {
+                            f"queries.{data['question_index']}.feedback": data["feedback"],
+                            f"queries.{data['question_index']}.feedback_timestamp": datetime.datetime.now(datetime.timezone.utc)
+                        }
+                    },
+                )

        except Exception as err:
            current_app.logger.error(f"Error submitting feedback: {err}")
@@ -1186,21 +1206,12 @@ class GetMessageAnalytics(Resource):
    get_message_analytics_model = api.model(
        "GetMessageAnalyticsModel",
        {
-            "api_key_id": fields.String(
-                required=False,
-                description="API Key ID",
-            ),
+            "api_key_id": fields.String(required=False, description="API Key ID"),
            "filter_option": fields.String(
                required=False,
                description="Filter option for analytics",
                default="last_30_days",
-                enum=[
-                    "last_hour",
-                    "last_24_hour",
-                    "last_7_days",
-                    "last_15_days",
-                    "last_30_days",
-                ],
+                enum=["last_hour", "last_24_hour", "last_7_days", "last_15_days", "last_30_days"],
            ),
        },
    )
@@ -1221,42 +1232,21 @@ class GetMessageAnalytics(Resource):
        except Exception as err:
            current_app.logger.error(f"Error getting API key: {err}")
            return make_response(jsonify({"success": False}), 400)
+
        end_date = datetime.datetime.now(datetime.timezone.utc)

        if filter_option == "last_hour":
            start_date = end_date - datetime.timedelta(hours=1)
            group_format = "%Y-%m-%d %H:%M:00"
-            group_stage = {
-                "$group": {
-                    "_id": {
-                        "minute": {
-                            "$dateToString": {"format": group_format, "date": "$date"}
-                        }
-                    },
-                    "total_messages": {"$sum": 1},
-                }
-            }
-
        elif filter_option == "last_24_hour":
            start_date = end_date - datetime.timedelta(hours=24)
            group_format = "%Y-%m-%d %H:00"
-            group_stage = {
-                "$group": {
-                    "_id": {
-                        "hour": {
-                            "$dateToString": {"format": group_format, "date": "$date"}
-                        }
-                    },
-                    "total_messages": {"$sum": 1},
-                }
-            }
-
        else:
            if filter_option in ["last_7_days", "last_15_days", "last_30_days"]:
                filter_days = (
-                    6
-                    if filter_option == "last_7_days"
-                    else (14 if filter_option == "last_15_days" else 29)
+                    6 if filter_option == "last_7_days"
+                    else 14 if filter_option == "last_15_days"
+                    else 29
                )
            else:
                return make_response(
@@ -1264,36 +1254,44 @@ class GetMessageAnalytics(Resource):
                )
            start_date = end_date - datetime.timedelta(days=filter_days)
            start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0)
-            end_date = end_date.replace(
-                hour=23, minute=59, second=59, microsecond=999999
-            )
+            end_date = end_date.replace(hour=23, minute=59, second=59, microsecond=999999)
            group_format = "%Y-%m-%d"
-            group_stage = {
-                "$group": {
-                    "_id": {
-                        "day": {
-                            "$dateToString": {"format": group_format, "date": "$date"}
-                        }
-                    },
-                    "total_messages": {"$sum": 1},
-                }
-            }

        try:
-            match_stage = {
-                "$match": {
-                    "date": {"$gte": start_date, "$lte": end_date},
-                }
-            }
-            if api_key:
-                match_stage["$match"]["api_key"] = api_key
-            message_data = conversations_collection.aggregate(
-                [
-                    match_stage,
-                    group_stage,
-                    {"$sort": {"_id": 1}},
-                ]
-            )
+            pipeline = [
+                # Initial match for API key if provided
+                {
+                    "$match": {
+                        "api_key": api_key if api_key else {"$exists": False}
+                    }
+                },
+                {"$unwind": "$queries"},
+                # Match queries within the time range
+                {
+                    "$match": {
+                        "queries.timestamp": {
+                            "$gte": start_date,
+                            "$lte": end_date
+                        }
+                    }
+                },
+                # Group by formatted timestamp
+                {
+                    "$group": {
+                        "_id": {
+                            "$dateToString": {
+                                "format": group_format,
+                                "date": "$queries.timestamp"
+                            }
+                        },
+                        "count": {"$sum": 1}
+                    }
+                },
+                # Sort by timestamp
+                {"$sort": {"_id": 1}}
+            ]
+
+            message_data = conversations_collection.aggregate(pipeline)

            if filter_option == "last_hour":
                intervals = generate_minute_range(start_date, end_date)
@@ -1305,12 +1303,7 @@ class GetMessageAnalytics(Resource):
            daily_messages = {interval: 0 for interval in intervals}

            for entry in message_data:
-                if filter_option == "last_hour":
-                    daily_messages[entry["_id"]["minute"]] = entry["total_messages"]
-                elif filter_option == "last_24_hour":
-                    daily_messages[entry["_id"]["hour"]] = entry["total_messages"]
-                else:
-                    daily_messages[entry["_id"]["day"]] = entry["total_messages"]
+                daily_messages[entry["_id"]] = entry["count"]

        except Exception as err:
            current_app.logger.error(f"Error getting message analytics: {err}")
@@ -1358,6 +1351,7 @@ class GetTokenAnalytics(Resource):
        except Exception as err:
            current_app.logger.error(f"Error getting API key: {err}")
            return make_response(jsonify({"success": False}), 400)
+
        end_date = datetime.datetime.now(datetime.timezone.utc)

        if filter_option == "last_hour":
@@ -1378,7 +1372,6 @@ class GetTokenAnalytics(Resource):
                    },
                }
            }
-
        elif filter_option == "last_24_hour":
            start_date = end_date - datetime.timedelta(hours=24)
            group_format = "%Y-%m-%d %H:00"
@@ -1397,7 +1390,6 @@ class GetTokenAnalytics(Resource):
                    },
                }
            }
-
        else:
            if filter_option in ["last_7_days", "last_15_days", "last_30_days"]:
                filter_days = (
@@ -1439,6 +1431,8 @@ class GetTokenAnalytics(Resource):
            }
            if api_key:
                match_stage["$match"]["api_key"] = api_key
+            else:
+                match_stage["$match"]["api_key"] = {"$exists": False}

            token_usage_data = token_usage_collection.aggregate(
                [
@@ -1517,11 +1511,11 @@ class GetFeedbackAnalytics(Resource):
        if filter_option == "last_hour":
            start_date = end_date - datetime.timedelta(hours=1)
            group_format = "%Y-%m-%d %H:%M:00"
-            date_field = {"$dateToString": {"format": group_format, "date": "$date"}}
+            date_field = {"$dateToString": {"format": group_format, "date": "$queries.feedback_timestamp"}}
        elif filter_option == "last_24_hour":
            start_date = end_date - datetime.timedelta(hours=24)
            group_format = "%Y-%m-%d %H:00"
-            date_field = {"$dateToString": {"format": group_format, "date": "$date"}}
+            date_field = {"$dateToString": {"format": group_format, "date": "$queries.feedback_timestamp"}}
        else:
            if filter_option in ["last_7_days", "last_15_days", "last_30_days"]:
                filter_days = (
@@ -1539,17 +1533,19 @@ class GetFeedbackAnalytics(Resource):
                hour=23, minute=59, second=59, microsecond=999999
            )
            group_format = "%Y-%m-%d"
-            date_field = {"$dateToString": {"format": group_format, "date": "$date"}}
+            date_field = {"$dateToString": {"format": group_format, "date": "$queries.feedback_timestamp"}}

        try:
            match_stage = {
                "$match": {
-                    "date": {"$gte": start_date, "$lte": end_date},
-                    "queries": {"$exists": True, "$ne": []},
+                    "queries.feedback_timestamp": {"$gte": start_date, "$lte": end_date},
+                    "queries.feedback": {"$exists": True}
                }
            }
            if api_key:
                match_stage["$match"]["api_key"] = api_key
+            else:
+                match_stage["$match"]["api_key"] = {"$exists": False}

            # Unwind the queries array to process each query separately
            pipeline = [
--- a/application/llm/google_ai.py
+++ b/application/llm/google_ai.py
@@ -22,7 +22,7 @@ class GoogleLLM(BaseLLM):
            parts = []
            if role and content is not None:
                if isinstance(content, str):
-                    parts = [types.Part.from_text(content)]
+                    parts = [types.Part.from_text(text=content)]
                elif isinstance(content, list):
                    for item in content:
                        if "text" in item:
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -1,4 +1,4 @@
-anthropic==0.45.2
+anthropic==0.49.0
 boto3==1.35.97
 beautifulsoup4==4.12.3
 celery==5.4.0
@@ -7,14 +7,14 @@ docx2txt==0.8
 duckduckgo-search==7.4.2
 ebooklib==0.18
 elastic-transport==8.17.0
-elasticsearch==8.17.0
+elasticsearch==8.17.1
 escodegen==1.0.11
 esprima==4.0.1
 esutils==1.0.1
 Flask==3.1.0
 faiss-cpu==1.9.0.post1
 flask-restx==1.3.0
-google-genai==0.5.0
+google-genai==1.3.0
 google-generativeai==0.8.3
 gTTS==2.5.4
 gunicorn==23.0.0
@@ -32,12 +32,12 @@ jsonschema-specifications==2023.7.1
 kombu==5.4.2
 langchain==0.3.14
 langchain-community==0.3.14
-langchain-core==0.3.29
+langchain-core==0.3.40
 langchain-openai==0.3.0
 langchain-text-splitters==0.3.5
 langsmith==0.2.10
 lazy-object-proxy==1.10.0
-lxml==5.3.0
+lxml==5.3.1
 markupsafe==3.0.2
 marshmallow==3.26.1
 mpmath==1.3.0
@@ -46,7 +46,7 @@ mypy-extensions==1.0.0
 networkx==3.4.2
 numpy==2.2.1
 openai==1.59.5
-openapi-schema-validator==0.6.2
+openapi-schema-validator==0.6.3
 openapi-spec-validator==0.6.0
 openapi3-parser==1.1.19
 orjson==3.10.14
@@ -57,12 +57,12 @@ pathable==0.4.4
 pillow==11.1.0
 portalocker==2.10.1
 prance==23.6.21.0
-primp==0.10.0
+primp==0.14.0
 prompt-toolkit==3.0.50
 protobuf==5.29.3
 psycopg2-binary==2.9.10
 py==1.11.0
-pydantic==2.10.4
+pydantic==2.10.6
 pydantic-core==2.27.2
 pydantic-settings==2.7.1
 pymongo==4.10.1
@@ -70,7 +70,7 @@ pypdf==5.2.0
 python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
 python-pptx==1.0.2
-qdrant-client==1.12.2
+qdrant-client==1.13.2
 redis==5.2.1
 referencing==0.30.2
 regex==2024.11.6
@@ -81,7 +81,7 @@ tiktoken==0.8.0
 tokenizers==0.21.0
 torch==2.5.1
 tqdm==4.67.1
-transformers==4.48.0
+transformers==4.49.0
 typing-extensions==4.12.2
 typing-inspect==0.9.0
 tzdata==2024.2