mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
Feat: Agent Token or Request Limiting (#2041)
* Update routes.py, added token and request limits to create/update agent operations * added usage limit check to api endpoints cannot create agents with usage limit right now that will be implemented * implemented api limiting as either token limiting or request limiting modes * minor typo & bug fix
This commit is contained in:
@@ -30,6 +30,10 @@ class BaseAgent(ABC):
|
||||
decoded_token: Optional[Dict] = None,
|
||||
attachments: Optional[List[Dict]] = None,
|
||||
json_schema: Optional[Dict] = None,
|
||||
limited_token_mode: Optional[bool] = False,
|
||||
token_limit: Optional[int] = settings.DEFAULT_AGENT_LIMITS["token_limit"],
|
||||
limited_request_mode: Optional[bool] = False,
|
||||
request_limit: Optional[int] = settings.DEFAULT_AGENT_LIMITS["request_limit"],
|
||||
):
|
||||
self.endpoint = endpoint
|
||||
self.llm_name = llm_name
|
||||
@@ -54,6 +58,10 @@ class BaseAgent(ABC):
|
||||
)
|
||||
self.attachments = attachments or []
|
||||
self.json_schema = json_schema
|
||||
self.limited_token_mode = limited_token_mode
|
||||
self.token_limit = token_limit
|
||||
self.limited_request_mode = limited_request_mode
|
||||
self.request_limit = request_limit
|
||||
|
||||
@log_activity()
|
||||
def gen(
|
||||
|
||||
@@ -72,6 +72,9 @@ class AnswerResource(Resource, BaseAnswerResource):
|
||||
agent = processor.create_agent()
|
||||
retriever = processor.create_retriever()
|
||||
|
||||
if error := self.check_usage(processor.agent_config):
|
||||
return error
|
||||
|
||||
stream = self.complete_stream(
|
||||
question=data["question"],
|
||||
agent=agent,
|
||||
|
||||
@@ -3,7 +3,7 @@ import json
|
||||
import logging
|
||||
from typing import Any, Dict, Generator, List, Optional
|
||||
|
||||
from flask import Response
|
||||
from flask import Response, make_response, jsonify
|
||||
from flask_restx import Namespace
|
||||
|
||||
from application.api.answer.services.conversation_service import ConversationService
|
||||
@@ -25,6 +25,7 @@ class BaseAnswerResource:
|
||||
def __init__(self):
|
||||
mongo = MongoDB.get_client()
|
||||
db = mongo[settings.MONGO_DB_NAME]
|
||||
self.db = db
|
||||
self.user_logs_collection = db["user_logs"]
|
||||
self.gpt_model = get_gpt_model()
|
||||
self.conversation_service = ConversationService()
|
||||
@@ -40,6 +41,88 @@ class BaseAnswerResource:
|
||||
return missing_fields
|
||||
return None
|
||||
|
||||
def check_usage(
|
||||
self, agent_config: Dict
|
||||
) -> Optional[Response]:
|
||||
"""Check if there is a usage limit and if it is exceeded
|
||||
|
||||
Args:
|
||||
agent_config: The config dict of agent instance
|
||||
|
||||
Returns:
|
||||
None or Response if either of limits exceeded.
|
||||
|
||||
"""
|
||||
api_key = agent_config.get("user_api_key")
|
||||
if not api_key:
|
||||
return None
|
||||
|
||||
agents_collection = self.db["agents"]
|
||||
agent = agents_collection.find_one({"key": api_key})
|
||||
|
||||
if not agent:
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": False,
|
||||
"message": "Invalid API key."
|
||||
}
|
||||
),
|
||||
401
|
||||
)
|
||||
|
||||
limited_token_mode = agent.get("limited_token_mode", False)
|
||||
limited_request_mode = agent.get("limited_request_mode", False)
|
||||
token_limit = int(agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]))
|
||||
request_limit = int(agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]))
|
||||
|
||||
token_usage_collection = self.db["token_usage"]
|
||||
|
||||
end_date = datetime.datetime.now()
|
||||
start_date = end_date - datetime.timedelta(hours=24)
|
||||
|
||||
match_query = {
|
||||
"timestamp": {"$gte": start_date, "$lte": end_date},
|
||||
"api_key": api_key
|
||||
}
|
||||
|
||||
if limited_token_mode:
|
||||
token_pipeline = [
|
||||
{"$match": match_query},
|
||||
{
|
||||
"$group": {
|
||||
"_id": None,
|
||||
"total_tokens": {"$sum": {"$add": ["$prompt_tokens", "$generated_tokens"]}}
|
||||
}
|
||||
}
|
||||
]
|
||||
token_result = list(token_usage_collection.aggregate(token_pipeline))
|
||||
daily_token_usage = token_result[0]["total_tokens"] if token_result else 0
|
||||
else:
|
||||
daily_token_usage = 0
|
||||
|
||||
if limited_request_mode:
|
||||
daily_request_usage = token_usage_collection.count_documents(match_query)
|
||||
else:
|
||||
daily_request_usage = 0
|
||||
|
||||
if not limited_token_mode and not limited_request_mode:
|
||||
return None
|
||||
elif limited_token_mode and token_limit > daily_token_usage:
|
||||
return None
|
||||
elif limited_request_mode and request_limit > daily_request_usage:
|
||||
return None
|
||||
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": False,
|
||||
"message": "Exceeding usage limit, please try again later."
|
||||
}
|
||||
),
|
||||
429, # too many requests
|
||||
)
|
||||
|
||||
def complete_stream(
|
||||
self,
|
||||
question: str,
|
||||
|
||||
@@ -76,6 +76,9 @@ class StreamResource(Resource, BaseAnswerResource):
|
||||
agent = processor.create_agent()
|
||||
retriever = processor.create_retriever()
|
||||
|
||||
if error := self.check_usage(processor.agent_config):
|
||||
return error
|
||||
|
||||
return Response(
|
||||
self.complete_stream(
|
||||
question=data["question"],
|
||||
|
||||
@@ -10,6 +10,7 @@ from flask import current_app, jsonify, make_response, request
|
||||
from flask_restx import fields, Namespace, Resource
|
||||
|
||||
from application.api import api
|
||||
from application.core.settings import settings
|
||||
from application.api.user.base import (
|
||||
agents_collection,
|
||||
db,
|
||||
@@ -74,6 +75,10 @@ class GetAgent(Resource):
|
||||
"agent_type": agent.get("agent_type", ""),
|
||||
"status": agent.get("status", ""),
|
||||
"json_schema": agent.get("json_schema"),
|
||||
"limited_token_mode": agent.get("limited_token_mode", False),
|
||||
"token_limit": agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
|
||||
"limited_request_mode": agent.get("limited_request_mode", False),
|
||||
"request_limit": agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
|
||||
"created_at": agent.get("createdAt", ""),
|
||||
"updated_at": agent.get("updatedAt", ""),
|
||||
"last_used_at": agent.get("lastUsedAt", ""),
|
||||
@@ -143,6 +148,10 @@ class GetAgents(Resource):
|
||||
"agent_type": agent.get("agent_type", ""),
|
||||
"status": agent.get("status", ""),
|
||||
"json_schema": agent.get("json_schema"),
|
||||
"limited_token_mode": agent.get("limited_token_mode", False),
|
||||
"token_limit": agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
|
||||
"limited_request_mode": agent.get("limited_request_mode", False),
|
||||
"request_limit": agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
|
||||
"created_at": agent.get("createdAt", ""),
|
||||
"updated_at": agent.get("updatedAt", ""),
|
||||
"last_used_at": agent.get("lastUsedAt", ""),
|
||||
@@ -199,6 +208,22 @@ class CreateAgent(Resource):
|
||||
required=False,
|
||||
description="JSON schema for enforcing structured output format",
|
||||
),
|
||||
"limited_token_mode": fields.Boolean(
|
||||
required=False,
|
||||
description="Whether the agent is in limited token mode"
|
||||
),
|
||||
"token_limit": fields.Integer(
|
||||
required=False,
|
||||
description="Token limit for the agent in limited mode"
|
||||
),
|
||||
"limited_request_mode": fields.Boolean(
|
||||
required=False,
|
||||
description="Whether the agent is in limited request mode"
|
||||
),
|
||||
"request_limit": fields.Integer(
|
||||
required=False,
|
||||
description="Request limit for the agent in limited mode"
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
@@ -344,6 +369,10 @@ class CreateAgent(Resource):
|
||||
"agent_type": data.get("agent_type", ""),
|
||||
"status": data.get("status"),
|
||||
"json_schema": data.get("json_schema"),
|
||||
"limited_token_mode": data.get("limited_token_mode", False),
|
||||
"token_limit": data.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
|
||||
"limited_request_mode": data.get("limited_request_mode", False),
|
||||
"request_limit": data.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
|
||||
"createdAt": datetime.datetime.now(datetime.timezone.utc),
|
||||
"updatedAt": datetime.datetime.now(datetime.timezone.utc),
|
||||
"lastUsedAt": None,
|
||||
@@ -399,6 +428,22 @@ class UpdateAgent(Resource):
|
||||
required=False,
|
||||
description="JSON schema for enforcing structured output format",
|
||||
),
|
||||
"limited_token_mode": fields.Boolean(
|
||||
required=False,
|
||||
description="Whether the agent is in limited token mode"
|
||||
),
|
||||
"token_limit": fields.Integer(
|
||||
required=False,
|
||||
description="Token limit for the agent in limited mode"
|
||||
),
|
||||
"limited_request_mode": fields.Boolean(
|
||||
require=False,
|
||||
description="Whether the agent is in limited request mode"
|
||||
),
|
||||
"request_limit": fields.Integer(
|
||||
required=False,
|
||||
description="Request limit for the agent in limited mode"
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
@@ -486,6 +531,10 @@ class UpdateAgent(Resource):
|
||||
"agent_type",
|
||||
"status",
|
||||
"json_schema",
|
||||
"limited_token_mode",
|
||||
"token_limit",
|
||||
"limited_request_mode",
|
||||
"request_limit"
|
||||
]
|
||||
|
||||
for field in allowed_fields:
|
||||
@@ -602,6 +651,54 @@ class UpdateAgent(Resource):
|
||||
update_fields[field] = json_schema
|
||||
else:
|
||||
update_fields[field] = None
|
||||
elif field == "limited_token_mode":
|
||||
is_mode_enabled = data.get("limited_token_mode", False)
|
||||
if is_mode_enabled and data.get("token_limit") is None:
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": False,
|
||||
"message": "Token limit must be provided when limited token mode is enabled",
|
||||
}
|
||||
),
|
||||
400,
|
||||
)
|
||||
elif field == "limited_request_mode":
|
||||
is_mode_enabled = data.get("limited_request_mode", False)
|
||||
if is_mode_enabled and data.get("request_limit") is None:
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": False,
|
||||
"message": "Request limit must be provided when limited request mode is enabled",
|
||||
}
|
||||
),
|
||||
400,
|
||||
)
|
||||
elif field == "token_limit":
|
||||
token_limit = data.get("token_limit")
|
||||
if token_limit is not None and not data.get("limited_token_mode"):
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": False,
|
||||
"message": "Token limit cannot be set when limited token mode is disabled",
|
||||
}
|
||||
),
|
||||
400,
|
||||
)
|
||||
elif field == "request_limit":
|
||||
request_limit = data.get("request_limit")
|
||||
if request_limit is not None and not data.get("limited_request_mode"):
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": False,
|
||||
"message": "Request limit cannot be set when limited request mode is disabled",
|
||||
}
|
||||
),
|
||||
400,
|
||||
)
|
||||
else:
|
||||
value = data[field]
|
||||
if field in ["name", "description", "prompt_id", "agent_type"]:
|
||||
|
||||
@@ -9,6 +9,7 @@ from flask import current_app, jsonify, make_response, request
|
||||
from flask_restx import fields, Namespace, Resource
|
||||
|
||||
from application.api import api
|
||||
from application.core.settings import settings
|
||||
from application.api.user.base import (
|
||||
agents_collection,
|
||||
db,
|
||||
@@ -75,6 +76,10 @@ class SharedAgent(Resource):
|
||||
"agent_type": shared_agent.get("agent_type", ""),
|
||||
"status": shared_agent.get("status", ""),
|
||||
"json_schema": shared_agent.get("json_schema"),
|
||||
"limited_token_mode": shared_agent.get("limited_token_mode", False),
|
||||
"token_limit": shared_agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
|
||||
"limited_request_mode": shared_agent.get("limited_request_mode", False),
|
||||
"request_limit": shared_agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
|
||||
"created_at": shared_agent.get("createdAt", ""),
|
||||
"updated_at": shared_agent.get("updatedAt", ""),
|
||||
"shared": shared_agent.get("shared_publicly", False),
|
||||
@@ -149,6 +154,10 @@ class SharedAgents(Resource):
|
||||
"agent_type": agent.get("agent_type", ""),
|
||||
"status": agent.get("status", ""),
|
||||
"json_schema": agent.get("json_schema"),
|
||||
"limited_token_mode": agent.get("limited_token_mode", False),
|
||||
"token_limit": agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
|
||||
"limited_request_mode": agent.get("limited_request_mode", False),
|
||||
"request_limit": agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
|
||||
"created_at": agent.get("createdAt", ""),
|
||||
"updated_at": agent.get("updatedAt", ""),
|
||||
"pinned": str(agent["_id"]) in pinned_ids,
|
||||
|
||||
@@ -28,6 +28,10 @@ class Settings(BaseSettings):
|
||||
"claude-2": 1e5,
|
||||
"gemini-2.5-flash": 1e6,
|
||||
}
|
||||
DEFAULT_AGENT_LIMITS: dict = {
|
||||
"token_limit": 50000,
|
||||
"request_limit": 500,
|
||||
}
|
||||
UPLOAD_FOLDER: str = "inputs"
|
||||
PARSE_PDF_AS_IMAGE: bool = False
|
||||
PARSE_IMAGE_REMOTE: bool = False
|
||||
|
||||
Reference in New Issue
Block a user