From ce32dd2907baf2bb209909167cc90f3268bae58a Mon Sep 17 00:00:00 2001 From: Ali Arda Fincan <45637956+ardafincan@users.noreply.github.com> Date: Mon, 13 Oct 2025 21:32:46 +0300 Subject: [PATCH] Feat: Agent Token or Request Limiting (#2041) * Update routes.py, added token and request limits to create/update agent operations * added usage limit check to api endpoints cannot create agents with usage limit right now that will be implemented * implemented api limiting as either token limiting or request limiting modes * minor typo & bug fix --- application/agents/base.py | 8 ++ application/api/answer/routes/answer.py | 3 + application/api/answer/routes/base.py | 85 +++++++++++++- application/api/answer/routes/stream.py | 3 + application/api/user/agents/routes.py | 97 ++++++++++++++++ application/api/user/agents/sharing.py | 9 ++ application/core/settings.py | 4 + frontend/src/agents/NewAgent.tsx | 144 +++++++++++++++++++++++- frontend/src/agents/types/index.ts | 4 + 9 files changed, 352 insertions(+), 5 deletions(-) diff --git a/application/agents/base.py b/application/agents/base.py index 9cdaa13a..f014e392 100644 --- a/application/agents/base.py +++ b/application/agents/base.py @@ -30,6 +30,10 @@ class BaseAgent(ABC): decoded_token: Optional[Dict] = None, attachments: Optional[List[Dict]] = None, json_schema: Optional[Dict] = None, + limited_token_mode: Optional[bool] = False, + token_limit: Optional[int] = settings.DEFAULT_AGENT_LIMITS["token_limit"], + limited_request_mode: Optional[bool] = False, + request_limit: Optional[int] = settings.DEFAULT_AGENT_LIMITS["request_limit"], ): self.endpoint = endpoint self.llm_name = llm_name @@ -54,6 +58,10 @@ class BaseAgent(ABC): ) self.attachments = attachments or [] self.json_schema = json_schema + self.limited_token_mode = limited_token_mode + self.token_limit = token_limit + self.limited_request_mode = limited_request_mode + self.request_limit = request_limit @log_activity() def gen( diff --git a/application/api/answer/routes/answer.py b/application/api/answer/routes/answer.py index 2c2d8f7b..043dc3f9 100644 --- a/application/api/answer/routes/answer.py +++ b/application/api/answer/routes/answer.py @@ -72,6 +72,9 @@ class AnswerResource(Resource, BaseAnswerResource): agent = processor.create_agent() retriever = processor.create_retriever() + if error := self.check_usage(processor.agent_config): + return error + stream = self.complete_stream( question=data["question"], agent=agent, diff --git a/application/api/answer/routes/base.py b/application/api/answer/routes/base.py index 5162ab77..f4585c98 100644 --- a/application/api/answer/routes/base.py +++ b/application/api/answer/routes/base.py @@ -3,7 +3,7 @@ import json import logging from typing import Any, Dict, Generator, List, Optional -from flask import Response +from flask import Response, make_response, jsonify from flask_restx import Namespace from application.api.answer.services.conversation_service import ConversationService @@ -25,6 +25,7 @@ class BaseAnswerResource: def __init__(self): mongo = MongoDB.get_client() db = mongo[settings.MONGO_DB_NAME] + self.db = db self.user_logs_collection = db["user_logs"] self.gpt_model = get_gpt_model() self.conversation_service = ConversationService() @@ -40,6 +41,88 @@ class BaseAnswerResource: return missing_fields return None + def check_usage( + self, agent_config: Dict + ) -> Optional[Response]: + """Check if there is a usage limit and if it is exceeded + + Args: + agent_config: The config dict of agent instance + + Returns: + None or Response if either of limits exceeded. + + """ + api_key = agent_config.get("user_api_key") + if not api_key: + return None + + agents_collection = self.db["agents"] + agent = agents_collection.find_one({"key": api_key}) + + if not agent: + return make_response( + jsonify( + { + "success": False, + "message": "Invalid API key." + } + ), + 401 + ) + + limited_token_mode = agent.get("limited_token_mode", False) + limited_request_mode = agent.get("limited_request_mode", False) + token_limit = int(agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"])) + request_limit = int(agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"])) + + token_usage_collection = self.db["token_usage"] + + end_date = datetime.datetime.now() + start_date = end_date - datetime.timedelta(hours=24) + + match_query = { + "timestamp": {"$gte": start_date, "$lte": end_date}, + "api_key": api_key + } + + if limited_token_mode: + token_pipeline = [ + {"$match": match_query}, + { + "$group": { + "_id": None, + "total_tokens": {"$sum": {"$add": ["$prompt_tokens", "$generated_tokens"]}} + } + } + ] + token_result = list(token_usage_collection.aggregate(token_pipeline)) + daily_token_usage = token_result[0]["total_tokens"] if token_result else 0 + else: + daily_token_usage = 0 + + if limited_request_mode: + daily_request_usage = token_usage_collection.count_documents(match_query) + else: + daily_request_usage = 0 + + if not limited_token_mode and not limited_request_mode: + return None + elif limited_token_mode and token_limit > daily_token_usage: + return None + elif limited_request_mode and request_limit > daily_request_usage: + return None + + return make_response( + jsonify( + { + "success": False, + "message": "Exceeding usage limit, please try again later." + } + ), + 429, # too many requests + ) + def complete_stream( self, question: str, diff --git a/application/api/answer/routes/stream.py b/application/api/answer/routes/stream.py index 3fee3f5b..6d15a964 100644 --- a/application/api/answer/routes/stream.py +++ b/application/api/answer/routes/stream.py @@ -76,6 +76,9 @@ class StreamResource(Resource, BaseAnswerResource): agent = processor.create_agent() retriever = processor.create_retriever() + if error := self.check_usage(processor.agent_config): + return error + return Response( self.complete_stream( question=data["question"], diff --git a/application/api/user/agents/routes.py b/application/api/user/agents/routes.py index 6755a647..a6512292 100644 --- a/application/api/user/agents/routes.py +++ b/application/api/user/agents/routes.py @@ -10,6 +10,7 @@ from flask import current_app, jsonify, make_response, request from flask_restx import fields, Namespace, Resource from application.api import api +from application.core.settings import settings from application.api.user.base import ( agents_collection, db, @@ -74,6 +75,10 @@ class GetAgent(Resource): "agent_type": agent.get("agent_type", ""), "status": agent.get("status", ""), "json_schema": agent.get("json_schema"), + "limited_token_mode": agent.get("limited_token_mode", False), + "token_limit": agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]), + "limited_request_mode": agent.get("limited_request_mode", False), + "request_limit": agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]), "created_at": agent.get("createdAt", ""), "updated_at": agent.get("updatedAt", ""), "last_used_at": agent.get("lastUsedAt", ""), @@ -143,6 +148,10 @@ class GetAgents(Resource): "agent_type": agent.get("agent_type", ""), "status": agent.get("status", ""), "json_schema": agent.get("json_schema"), + "limited_token_mode": agent.get("limited_token_mode", False), + "token_limit": agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]), + "limited_request_mode": agent.get("limited_request_mode", False), + "request_limit": agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]), "created_at": agent.get("createdAt", ""), "updated_at": agent.get("updatedAt", ""), "last_used_at": agent.get("lastUsedAt", ""), @@ -199,6 +208,22 @@ class CreateAgent(Resource): required=False, description="JSON schema for enforcing structured output format", ), + "limited_token_mode": fields.Boolean( + required=False, + description="Whether the agent is in limited token mode" + ), + "token_limit": fields.Integer( + required=False, + description="Token limit for the agent in limited mode" + ), + "limited_request_mode": fields.Boolean( + required=False, + description="Whether the agent is in limited request mode" + ), + "request_limit": fields.Integer( + required=False, + description="Request limit for the agent in limited mode" + ) }, ) @@ -344,6 +369,10 @@ class CreateAgent(Resource): "agent_type": data.get("agent_type", ""), "status": data.get("status"), "json_schema": data.get("json_schema"), + "limited_token_mode": data.get("limited_token_mode", False), + "token_limit": data.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]), + "limited_request_mode": data.get("limited_request_mode", False), + "request_limit": data.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]), "createdAt": datetime.datetime.now(datetime.timezone.utc), "updatedAt": datetime.datetime.now(datetime.timezone.utc), "lastUsedAt": None, @@ -399,6 +428,22 @@ class UpdateAgent(Resource): required=False, description="JSON schema for enforcing structured output format", ), + "limited_token_mode": fields.Boolean( + required=False, + description="Whether the agent is in limited token mode" + ), + "token_limit": fields.Integer( + required=False, + description="Token limit for the agent in limited mode" + ), + "limited_request_mode": fields.Boolean( + require=False, + description="Whether the agent is in limited request mode" + ), + "request_limit": fields.Integer( + required=False, + description="Request limit for the agent in limited mode" + ) }, ) @@ -486,6 +531,10 @@ class UpdateAgent(Resource): "agent_type", "status", "json_schema", + "limited_token_mode", + "token_limit", + "limited_request_mode", + "request_limit" ] for field in allowed_fields: @@ -602,6 +651,54 @@ class UpdateAgent(Resource): update_fields[field] = json_schema else: update_fields[field] = None + elif field == "limited_token_mode": + is_mode_enabled = data.get("limited_token_mode", False) + if is_mode_enabled and data.get("token_limit") is None: + return make_response( + jsonify( + { + "success": False, + "message": "Token limit must be provided when limited token mode is enabled", + } + ), + 400, + ) + elif field == "limited_request_mode": + is_mode_enabled = data.get("limited_request_mode", False) + if is_mode_enabled and data.get("request_limit") is None: + return make_response( + jsonify( + { + "success": False, + "message": "Request limit must be provided when limited request mode is enabled", + } + ), + 400, + ) + elif field == "token_limit": + token_limit = data.get("token_limit") + if token_limit is not None and not data.get("limited_token_mode"): + return make_response( + jsonify( + { + "success": False, + "message": "Token limit cannot be set when limited token mode is disabled", + } + ), + 400, + ) + elif field == "request_limit": + request_limit = data.get("request_limit") + if request_limit is not None and not data.get("limited_request_mode"): + return make_response( + jsonify( + { + "success": False, + "message": "Request limit cannot be set when limited request mode is disabled", + } + ), + 400, + ) else: value = data[field] if field in ["name", "description", "prompt_id", "agent_type"]: diff --git a/application/api/user/agents/sharing.py b/application/api/user/agents/sharing.py index 7c823307..ca9044e4 100644 --- a/application/api/user/agents/sharing.py +++ b/application/api/user/agents/sharing.py @@ -9,6 +9,7 @@ from flask import current_app, jsonify, make_response, request from flask_restx import fields, Namespace, Resource from application.api import api +from application.core.settings import settings from application.api.user.base import ( agents_collection, db, @@ -75,6 +76,10 @@ class SharedAgent(Resource): "agent_type": shared_agent.get("agent_type", ""), "status": shared_agent.get("status", ""), "json_schema": shared_agent.get("json_schema"), + "limited_token_mode": shared_agent.get("limited_token_mode", False), + "token_limit": shared_agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]), + "limited_request_mode": shared_agent.get("limited_request_mode", False), + "request_limit": shared_agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]), "created_at": shared_agent.get("createdAt", ""), "updated_at": shared_agent.get("updatedAt", ""), "shared": shared_agent.get("shared_publicly", False), @@ -149,6 +154,10 @@ class SharedAgents(Resource): "agent_type": agent.get("agent_type", ""), "status": agent.get("status", ""), "json_schema": agent.get("json_schema"), + "limited_token_mode": agent.get("limited_token_mode", False), + "token_limit": agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]), + "limited_request_mode": agent.get("limited_request_mode", False), + "request_limit": agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]), "created_at": agent.get("createdAt", ""), "updated_at": agent.get("updatedAt", ""), "pinned": str(agent["_id"]) in pinned_ids, diff --git a/application/core/settings.py b/application/core/settings.py index 2dc159ba..0be38275 100644 --- a/application/core/settings.py +++ b/application/core/settings.py @@ -28,6 +28,10 @@ class Settings(BaseSettings): "claude-2": 1e5, "gemini-2.5-flash": 1e6, } + DEFAULT_AGENT_LIMITS: dict = { + "token_limit": 50000, + "request_limit": 500, + } UPLOAD_FOLDER: str = "inputs" PARSE_PDF_AS_IMAGE: bool = False PARSE_IMAGE_REMOTE: bool = False diff --git a/frontend/src/agents/NewAgent.tsx b/frontend/src/agents/NewAgent.tsx index 21a1f3f5..ada4b9ae 100644 --- a/frontend/src/agents/NewAgent.tsx +++ b/frontend/src/agents/NewAgent.tsx @@ -53,6 +53,10 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) { agent_type: 'classic', status: '', json_schema: undefined, + limited_token_mode: false, + token_limit: undefined, + limited_request_mode: false, + request_limit: undefined, }); const [imageFile, setImageFile] = useState(null); const [prompts, setPrompts] = useState< @@ -74,7 +78,8 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) { const [publishLoading, setPublishLoading] = useState(false); const [jsonSchemaText, setJsonSchemaText] = useState(''); const [jsonSchemaValid, setJsonSchemaValid] = useState(true); - const [isJsonSchemaExpanded, setIsJsonSchemaExpanded] = useState(false); + const [isAdvancedSectionExpanded, setIsAdvancedSectionExpanded] = + useState(false); const initialAgentRef = useRef(null); const sourceAnchorButtonRef = useRef(null); @@ -191,6 +196,16 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) { formData.append('agent_type', agent.agent_type); formData.append('status', 'draft'); + if (agent.limited_token_mode && agent.token_limit) { + formData.append('limited_token_mode', 'True'); + formData.append('token_limit', JSON.stringify(agent.token_limit)); + } else formData.append('token_limit', '0'); + + if (agent.limited_request_mode && agent.request_limit) { + formData.append('limited_request_mode', 'True'); + formData.append('request_limit', JSON.stringify(agent.request_limit)); + } else formData.append('request_limit', '0'); + if (imageFile) formData.append('image', imageFile); if (agent.tools && agent.tools.length > 0) @@ -280,6 +295,16 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) { formData.append('json_schema', JSON.stringify(agent.json_schema)); } + if (agent.limited_token_mode && agent.token_limit) { + formData.append('limited_token_mode', 'True'); + formData.append('token_limit', JSON.stringify(agent.token_limit)); + } else formData.append('token_limit', '0'); + + if (agent.limited_request_mode && agent.request_limit) { + formData.append('limited_request_mode', 'True'); + formData.append('request_limit', JSON.stringify(agent.request_limit)); + } else formData.append('request_limit', '0'); + try { setPublishLoading(true); const response = @@ -825,7 +850,9 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
- {isJsonSchemaExpanded && ( + {isAdvancedSectionExpanded && (

JSON response schema

@@ -892,6 +919,115 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) { : 'Invalid JSON - fix to enable saving'}
)} + +
+
+
+

Token limiting

+

+ Limit daily total tokens that can be used by this agent +

+
+ +
+ + setAgent({ + ...agent, + token_limit: e.target.value + ? parseInt(e.target.value) + : undefined, + }) + } + disabled={!agent.limited_token_mode} + placeholder="Enter token limit" + className={`border-silver text-jet dark:bg-raisin-black dark:text-bright-gray dark:placeholder:text-silver mt-2 w-full rounded-3xl border bg-white px-5 py-3 text-sm outline-hidden placeholder:text-gray-400 dark:border-[#7E7E7E] ${ + !agent.limited_token_mode + ? 'cursor-not-allowed opacity-50' + : '' + }`} + /> +
+ +
+
+
+

Request limiting

+

+ Limit daily total requests that can be made to this + agent +

+
+ +
+ + setAgent({ + ...agent, + request_limit: e.target.value + ? parseInt(e.target.value) + : undefined, + }) + } + disabled={!agent.limited_request_mode} + placeholder="Enter request limit" + className={`border-silver text-jet dark:bg-raisin-black dark:text-bright-gray dark:placeholder:text-silver mt-2 w-full rounded-3xl border bg-white px-5 py-3 text-sm outline-hidden placeholder:text-gray-400 dark:border-[#7E7E7E] ${ + !agent.limited_request_mode + ? 'cursor-not-allowed opacity-50' + : '' + }`} + /> +
)}
diff --git a/frontend/src/agents/types/index.ts b/frontend/src/agents/types/index.ts index 442097a1..6287b3d9 100644 --- a/frontend/src/agents/types/index.ts +++ b/frontend/src/agents/types/index.ts @@ -28,4 +28,8 @@ export type Agent = { updated_at?: string; last_used_at?: string; json_schema?: object; + limited_token_mode?: boolean; + token_limit?: number; + limited_request_mode?: boolean; + request_limit?: number; };