From ce32dd2907baf2bb209909167cc90f3268bae58a Mon Sep 17 00:00:00 2001
From: Ali Arda Fincan <45637956+ardafincan@users.noreply.github.com>
Date: Mon, 13 Oct 2025 21:32:46 +0300
Subject: [PATCH] Feat: Agent Token or Request Limiting (#2041)

* Update routes.py, added token and request limits to create/update agent operations

* added usage limit check to api endpoints

cannot create agents with usage limit right now that will be implemented

* implemented api limiting as either token limiting or request limiting modes

* minor typo & bug fix
---
 application/agents/base.py              |   8 ++
 application/api/answer/routes/answer.py |   3 +
 application/api/answer/routes/base.py   |  85 +++++++++++++-
 application/api/answer/routes/stream.py |   3 +
 application/api/user/agents/routes.py   |  97 ++++++++++++++++
 application/api/user/agents/sharing.py  |   9 ++
 application/core/settings.py            |   4 +
 frontend/src/agents/NewAgent.tsx        | 144 +++++++++++++++++++++++-
 frontend/src/agents/types/index.ts      |   4 +
 9 files changed, 352 insertions(+), 5 deletions(-)

diff --git a/application/agents/base.py b/application/agents/base.py
index 9cdaa13a..f014e392 100644
--- a/application/agents/base.py
+++ b/application/agents/base.py
@@ -30,6 +30,10 @@ class BaseAgent(ABC):
         decoded_token: Optional[Dict] = None,
         attachments: Optional[List[Dict]] = None,
         json_schema: Optional[Dict] = None,
+        limited_token_mode: Optional[bool] = False,
+        token_limit: Optional[int] = settings.DEFAULT_AGENT_LIMITS["token_limit"],
+        limited_request_mode: Optional[bool] = False,
+        request_limit: Optional[int] = settings.DEFAULT_AGENT_LIMITS["request_limit"],
     ):
         self.endpoint = endpoint
         self.llm_name = llm_name
@@ -54,6 +58,10 @@ class BaseAgent(ABC):
         )
         self.attachments = attachments or []
         self.json_schema = json_schema
+        self.limited_token_mode = limited_token_mode
+        self.token_limit = token_limit
+        self.limited_request_mode = limited_request_mode
+        self.request_limit = request_limit
 
     @log_activity()
     def gen(
diff --git a/application/api/answer/routes/answer.py b/application/api/answer/routes/answer.py
index 2c2d8f7b..043dc3f9 100644
--- a/application/api/answer/routes/answer.py
+++ b/application/api/answer/routes/answer.py
@@ -72,6 +72,9 @@ class AnswerResource(Resource, BaseAnswerResource):
             agent = processor.create_agent()
             retriever = processor.create_retriever()
 
+            if error := self.check_usage(processor.agent_config):
+                return error
+
             stream = self.complete_stream(
                 question=data["question"],
                 agent=agent,
diff --git a/application/api/answer/routes/base.py b/application/api/answer/routes/base.py
index 5162ab77..f4585c98 100644
--- a/application/api/answer/routes/base.py
+++ b/application/api/answer/routes/base.py
@@ -3,7 +3,7 @@ import json
 import logging
 from typing import Any, Dict, Generator, List, Optional
 
-from flask import Response
+from flask import Response, make_response, jsonify
 from flask_restx import Namespace
 
 from application.api.answer.services.conversation_service import ConversationService
@@ -25,6 +25,7 @@ class BaseAnswerResource:
     def __init__(self):
         mongo = MongoDB.get_client()
         db = mongo[settings.MONGO_DB_NAME]
+        self.db = db
         self.user_logs_collection = db["user_logs"]
         self.gpt_model = get_gpt_model()
         self.conversation_service = ConversationService()
@@ -40,6 +41,88 @@ class BaseAnswerResource:
             return missing_fields
         return None
 
+    def check_usage(
+            self, agent_config: Dict
+    ) -> Optional[Response]:
+        """Check if there is a usage limit and if it is exceeded
+
+        Args:
+            agent_config: The config dict of agent instance
+
+        Returns:
+            None or Response if either of limits exceeded.
+        
+        """
+        api_key = agent_config.get("user_api_key")
+        if not api_key:
+            return None
+        
+        agents_collection = self.db["agents"]
+        agent = agents_collection.find_one({"key": api_key})
+
+        if not agent:
+            return make_response(
+                jsonify(
+                    {
+                        "success": False,
+                        "message": "Invalid API key."
+                    }
+                ),
+                401
+            )
+
+        limited_token_mode = agent.get("limited_token_mode", False)
+        limited_request_mode = agent.get("limited_request_mode", False)
+        token_limit = int(agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]))
+        request_limit = int(agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]))
+
+        token_usage_collection = self.db["token_usage"]
+
+        end_date = datetime.datetime.now()
+        start_date = end_date - datetime.timedelta(hours=24)
+
+        match_query = {
+            "timestamp": {"$gte": start_date, "$lte": end_date},
+            "api_key": api_key
+        }
+        
+        if limited_token_mode:
+            token_pipeline = [
+                {"$match": match_query},
+                {
+                    "$group": {
+                        "_id": None,
+                        "total_tokens": {"$sum": {"$add": ["$prompt_tokens", "$generated_tokens"]}}
+                    }
+                }
+            ]
+            token_result = list(token_usage_collection.aggregate(token_pipeline))
+            daily_token_usage = token_result[0]["total_tokens"] if token_result else 0
+        else:
+            daily_token_usage = 0
+
+        if limited_request_mode:
+            daily_request_usage = token_usage_collection.count_documents(match_query)
+        else:
+            daily_request_usage = 0
+
+        if not limited_token_mode and not limited_request_mode:
+            return None
+        elif limited_token_mode and token_limit > daily_token_usage:
+            return None
+        elif limited_request_mode and request_limit > daily_request_usage:
+            return None
+
+        return make_response(
+            jsonify(
+                {
+                    "success": False,
+                    "message": "Exceeding usage limit, please try again later."
+                }
+            ),
+            429, # too many requests
+        )
+
     def complete_stream(
         self,
         question: str,
diff --git a/application/api/answer/routes/stream.py b/application/api/answer/routes/stream.py
index 3fee3f5b..6d15a964 100644
--- a/application/api/answer/routes/stream.py
+++ b/application/api/answer/routes/stream.py
@@ -76,6 +76,9 @@ class StreamResource(Resource, BaseAnswerResource):
             agent = processor.create_agent()
             retriever = processor.create_retriever()
 
+            if error := self.check_usage(processor.agent_config):
+                return error
+
             return Response(
                 self.complete_stream(
                     question=data["question"],
diff --git a/application/api/user/agents/routes.py b/application/api/user/agents/routes.py
index 6755a647..a6512292 100644
--- a/application/api/user/agents/routes.py
+++ b/application/api/user/agents/routes.py
@@ -10,6 +10,7 @@ from flask import current_app, jsonify, make_response, request
 from flask_restx import fields, Namespace, Resource
 
 from application.api import api
+from application.core.settings import settings
 from application.api.user.base import (
     agents_collection,
     db,
@@ -74,6 +75,10 @@ class GetAgent(Resource):
                 "agent_type": agent.get("agent_type", ""),
                 "status": agent.get("status", ""),
                 "json_schema": agent.get("json_schema"),
+                "limited_token_mode": agent.get("limited_token_mode", False),
+                "token_limit": agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
+                "limited_request_mode": agent.get("limited_request_mode", False),
+                "request_limit": agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
                 "created_at": agent.get("createdAt", ""),
                 "updated_at": agent.get("updatedAt", ""),
                 "last_used_at": agent.get("lastUsedAt", ""),
@@ -143,6 +148,10 @@ class GetAgents(Resource):
                     "agent_type": agent.get("agent_type", ""),
                     "status": agent.get("status", ""),
                     "json_schema": agent.get("json_schema"),
+                    "limited_token_mode": agent.get("limited_token_mode", False),
+                    "token_limit": agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
+                    "limited_request_mode": agent.get("limited_request_mode", False),
+                    "request_limit": agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
                     "created_at": agent.get("createdAt", ""),
                     "updated_at": agent.get("updatedAt", ""),
                     "last_used_at": agent.get("lastUsedAt", ""),
@@ -199,6 +208,22 @@ class CreateAgent(Resource):
                 required=False,
                 description="JSON schema for enforcing structured output format",
             ),
+            "limited_token_mode": fields.Boolean(
+                required=False,
+                description="Whether the agent is in limited token mode"
+            ),
+            "token_limit": fields.Integer(
+                required=False,
+                description="Token limit for the agent in limited mode"
+            ),
+            "limited_request_mode": fields.Boolean(
+                required=False,
+                description="Whether the agent is in limited request mode"
+            ),
+            "request_limit": fields.Integer(
+                required=False,
+                description="Request limit for the agent in limited mode"
+            )
         },
     )
 
@@ -344,6 +369,10 @@ class CreateAgent(Resource):
                 "agent_type": data.get("agent_type", ""),
                 "status": data.get("status"),
                 "json_schema": data.get("json_schema"),
+                "limited_token_mode": data.get("limited_token_mode", False),
+                "token_limit": data.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
+                "limited_request_mode": data.get("limited_request_mode", False),
+                "request_limit": data.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
                 "createdAt": datetime.datetime.now(datetime.timezone.utc),
                 "updatedAt": datetime.datetime.now(datetime.timezone.utc),
                 "lastUsedAt": None,
@@ -399,6 +428,22 @@ class UpdateAgent(Resource):
                 required=False,
                 description="JSON schema for enforcing structured output format",
             ),
+            "limited_token_mode": fields.Boolean(
+                required=False,
+                description="Whether the agent is in limited token mode"
+            ),
+            "token_limit": fields.Integer(
+                required=False,
+                description="Token limit for the agent in limited mode"
+            ),
+            "limited_request_mode": fields.Boolean(
+                require=False,
+                description="Whether the agent is in limited request mode"
+            ),
+            "request_limit": fields.Integer(
+                required=False,
+                description="Request limit for the agent in limited mode"
+            )
         },
     )
 
@@ -486,6 +531,10 @@ class UpdateAgent(Resource):
             "agent_type",
             "status",
             "json_schema",
+            "limited_token_mode",
+            "token_limit",
+            "limited_request_mode",
+            "request_limit"
         ]
 
         for field in allowed_fields:
@@ -602,6 +651,54 @@ class UpdateAgent(Resource):
                     update_fields[field] = json_schema
                 else:
                     update_fields[field] = None
+            elif field == "limited_token_mode":
+                is_mode_enabled = data.get("limited_token_mode", False)
+                if is_mode_enabled and data.get("token_limit") is None:
+                    return make_response(
+                        jsonify(
+                            {
+                                "success": False,
+                                "message": "Token limit must be provided when limited token mode is enabled",
+                            }
+                        ),
+                        400,
+                    )
+            elif field == "limited_request_mode":
+                is_mode_enabled = data.get("limited_request_mode", False)
+                if is_mode_enabled and data.get("request_limit") is None:
+                    return make_response(
+                        jsonify(
+                            {
+                                "success": False,
+                                "message": "Request limit must be provided when limited request mode is enabled",
+                            }
+                        ),
+                        400,
+                    )
+            elif field == "token_limit":
+                token_limit = data.get("token_limit")
+                if token_limit is not None and not data.get("limited_token_mode"):
+                    return make_response(
+                        jsonify(
+                            {
+                                "success": False,
+                                "message": "Token limit cannot be set when limited token mode is disabled",
+                            }
+                        ),
+                        400,
+                    )
+            elif field == "request_limit":
+                request_limit = data.get("request_limit")
+                if request_limit is not None and not data.get("limited_request_mode"):
+                    return make_response(
+                        jsonify(
+                            {
+                                "success": False,
+                                "message": "Request limit cannot be set when limited request mode is disabled",
+                            }
+                        ),
+                        400,
+                    )
             else:
                 value = data[field]
                 if field in ["name", "description", "prompt_id", "agent_type"]:
diff --git a/application/api/user/agents/sharing.py b/application/api/user/agents/sharing.py
index 7c823307..ca9044e4 100644
--- a/application/api/user/agents/sharing.py
+++ b/application/api/user/agents/sharing.py
@@ -9,6 +9,7 @@ from flask import current_app, jsonify, make_response, request
 from flask_restx import fields, Namespace, Resource
 
 from application.api import api
+from application.core.settings import settings
 from application.api.user.base import (
     agents_collection,
     db,
@@ -75,6 +76,10 @@ class SharedAgent(Resource):
                 "agent_type": shared_agent.get("agent_type", ""),
                 "status": shared_agent.get("status", ""),
                 "json_schema": shared_agent.get("json_schema"),
+                "limited_token_mode": shared_agent.get("limited_token_mode", False),
+                "token_limit": shared_agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
+                "limited_request_mode": shared_agent.get("limited_request_mode", False),
+                "request_limit": shared_agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
                 "created_at": shared_agent.get("createdAt", ""),
                 "updated_at": shared_agent.get("updatedAt", ""),
                 "shared": shared_agent.get("shared_publicly", False),
@@ -149,6 +154,10 @@ class SharedAgents(Resource):
                     "agent_type": agent.get("agent_type", ""),
                     "status": agent.get("status", ""),
                     "json_schema": agent.get("json_schema"),
+                    "limited_token_mode": agent.get("limited_token_mode", False),
+                    "token_limit": agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
+                    "limited_request_mode": agent.get("limited_request_mode", False),
+                    "request_limit": agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
                     "created_at": agent.get("createdAt", ""),
                     "updated_at": agent.get("updatedAt", ""),
                     "pinned": str(agent["_id"]) in pinned_ids,
diff --git a/application/core/settings.py b/application/core/settings.py
index 2dc159ba..0be38275 100644
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -28,6 +28,10 @@ class Settings(BaseSettings):
         "claude-2": 1e5,
         "gemini-2.5-flash": 1e6,
     }
+    DEFAULT_AGENT_LIMITS: dict = {
+        "token_limit": 50000,
+        "request_limit": 500,
+    }
     UPLOAD_FOLDER: str = "inputs"
     PARSE_PDF_AS_IMAGE: bool = False
     PARSE_IMAGE_REMOTE: bool = False
diff --git a/frontend/src/agents/NewAgent.tsx b/frontend/src/agents/NewAgent.tsx
index 21a1f3f5..ada4b9ae 100644
--- a/frontend/src/agents/NewAgent.tsx
+++ b/frontend/src/agents/NewAgent.tsx
@@ -53,6 +53,10 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
     agent_type: 'classic',
     status: '',
     json_schema: undefined,
+    limited_token_mode: false,
+    token_limit: undefined,
+    limited_request_mode: false,
+    request_limit: undefined,
   });
   const [imageFile, setImageFile] = useState<File | null>(null);
   const [prompts, setPrompts] = useState<
@@ -74,7 +78,8 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
   const [publishLoading, setPublishLoading] = useState(false);
   const [jsonSchemaText, setJsonSchemaText] = useState('');
   const [jsonSchemaValid, setJsonSchemaValid] = useState(true);
-  const [isJsonSchemaExpanded, setIsJsonSchemaExpanded] = useState(false);
+  const [isAdvancedSectionExpanded, setIsAdvancedSectionExpanded] =
+    useState(false);
 
   const initialAgentRef = useRef<Agent | null>(null);
   const sourceAnchorButtonRef = useRef<HTMLButtonElement>(null);
@@ -191,6 +196,16 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
     formData.append('agent_type', agent.agent_type);
     formData.append('status', 'draft');
 
+    if (agent.limited_token_mode && agent.token_limit) {
+      formData.append('limited_token_mode', 'True');
+      formData.append('token_limit', JSON.stringify(agent.token_limit));
+    } else formData.append('token_limit', '0');
+
+    if (agent.limited_request_mode && agent.request_limit) {
+      formData.append('limited_request_mode', 'True');
+      formData.append('request_limit', JSON.stringify(agent.request_limit));
+    } else formData.append('request_limit', '0');
+
     if (imageFile) formData.append('image', imageFile);
 
     if (agent.tools && agent.tools.length > 0)
@@ -280,6 +295,16 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
       formData.append('json_schema', JSON.stringify(agent.json_schema));
     }
 
+    if (agent.limited_token_mode && agent.token_limit) {
+      formData.append('limited_token_mode', 'True');
+      formData.append('token_limit', JSON.stringify(agent.token_limit));
+    } else formData.append('token_limit', '0');
+
+    if (agent.limited_request_mode && agent.request_limit) {
+      formData.append('limited_request_mode', 'True');
+      formData.append('request_limit', JSON.stringify(agent.request_limit));
+    } else formData.append('request_limit', '0');
+
     try {
       setPublishLoading(true);
       const response =
@@ -825,7 +850,9 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
           </div>
           <div className="rounded-[30px] bg-[#F6F6F6] px-6 py-3 dark:bg-[#383838] dark:text-[#E0E0E0]">
             <button
-              onClick={() => setIsJsonSchemaExpanded(!isJsonSchemaExpanded)}
+              onClick={() =>
+                setIsAdvancedSectionExpanded(!isAdvancedSectionExpanded)
+              }
               className="flex w-full items-center justify-between text-left focus:outline-none"
             >
               <div>
@@ -834,7 +861,7 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
               <div className="ml-4 flex items-center">
                 <svg
                   className={`h-5 w-5 transform transition-transform duration-200 ${
-                    isJsonSchemaExpanded ? 'rotate-180' : ''
+                    isAdvancedSectionExpanded ? 'rotate-180' : ''
                   }`}
                   fill="none"
                   stroke="currentColor"
@@ -849,7 +876,7 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
                 </svg>
               </div>
             </button>
-            {isJsonSchemaExpanded && (
+            {isAdvancedSectionExpanded && (
               <div className="mt-3">
                 <div>
                   <h2 className="text-sm font-medium">JSON response schema</h2>
@@ -892,6 +919,115 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
                       : 'Invalid JSON - fix to enable saving'}
                   </div>
                 )}
+
+                <div className="mt-6">
+                  <div className="flex items-center justify-between">
+                    <div>
+                      <h2 className="text-sm font-medium">Token limiting</h2>
+                      <p className="mt-1 text-xs text-gray-600 dark:text-gray-400">
+                        Limit daily total tokens that can be used by this agent
+                      </p>
+                    </div>
+                    <button
+                      onClick={() => {
+                        const newTokenMode = !agent.limited_token_mode;
+                        setAgent({
+                          ...agent,
+                          limited_token_mode: newTokenMode,
+                          limited_request_mode: newTokenMode
+                            ? false
+                            : agent.limited_request_mode,
+                        });
+                      }}
+                      className={`relative h-6 w-11 rounded-full transition-colors ${
+                        agent.limited_token_mode
+                          ? 'bg-purple-30'
+                          : 'bg-gray-300 dark:bg-gray-600'
+                      }`}
+                    >
+                      <span
+                        className={`absolute top-0.5 h-5 w-5 transform rounded-full bg-white transition-transform ${
+                          agent.limited_token_mode ? '' : '-translate-x-5'
+                        }`}
+                      />
+                    </button>
+                  </div>
+                  <input
+                    type="number"
+                    min="0"
+                    value={agent.token_limit || ''}
+                    onChange={(e) =>
+                      setAgent({
+                        ...agent,
+                        token_limit: e.target.value
+                          ? parseInt(e.target.value)
+                          : undefined,
+                      })
+                    }
+                    disabled={!agent.limited_token_mode}
+                    placeholder="Enter token limit"
+                    className={`border-silver text-jet dark:bg-raisin-black dark:text-bright-gray dark:placeholder:text-silver mt-2 w-full rounded-3xl border bg-white px-5 py-3 text-sm outline-hidden placeholder:text-gray-400 dark:border-[#7E7E7E] ${
+                      !agent.limited_token_mode
+                        ? 'cursor-not-allowed opacity-50'
+                        : ''
+                    }`}
+                  />
+                </div>
+
+                <div className="mt-6">
+                  <div className="flex items-center justify-between">
+                    <div>
+                      <h2 className="text-sm font-medium">Request limiting</h2>
+                      <p className="mt-1 text-xs text-gray-600 dark:text-gray-400">
+                        Limit daily total requests that can be made to this
+                        agent
+                      </p>
+                    </div>
+                    <button
+                      onClick={() => {
+                        const newRequestMode = !agent.limited_request_mode;
+                        setAgent({
+                          ...agent,
+                          limited_request_mode: newRequestMode,
+                          limited_token_mode: newRequestMode
+                            ? false
+                            : agent.limited_token_mode,
+                        });
+                      }}
+                      className={`relative h-6 w-11 rounded-full transition-colors ${
+                        agent.limited_request_mode
+                          ? 'bg-purple-30'
+                          : 'bg-gray-300 dark:bg-gray-600'
+                      }`}
+                    >
+                      <span
+                        className={`absolute top-0.5 h-5 w-5 transform rounded-full bg-white transition-transform ${
+                          agent.limited_request_mode ? '' : '-translate-x-5'
+                        }`}
+                      />
+                    </button>
+                  </div>
+                  <input
+                    type="number"
+                    min="0"
+                    value={agent.request_limit || ''}
+                    onChange={(e) =>
+                      setAgent({
+                        ...agent,
+                        request_limit: e.target.value
+                          ? parseInt(e.target.value)
+                          : undefined,
+                      })
+                    }
+                    disabled={!agent.limited_request_mode}
+                    placeholder="Enter request limit"
+                    className={`border-silver text-jet dark:bg-raisin-black dark:text-bright-gray dark:placeholder:text-silver mt-2 w-full rounded-3xl border bg-white px-5 py-3 text-sm outline-hidden placeholder:text-gray-400 dark:border-[#7E7E7E] ${
+                      !agent.limited_request_mode
+                        ? 'cursor-not-allowed opacity-50'
+                        : ''
+                    }`}
+                  />
+                </div>
               </div>
             )}
           </div>
diff --git a/frontend/src/agents/types/index.ts b/frontend/src/agents/types/index.ts
index 442097a1..6287b3d9 100644
--- a/frontend/src/agents/types/index.ts
+++ b/frontend/src/agents/types/index.ts
@@ -28,4 +28,8 @@ export type Agent = {
   updated_at?: string;
   last_used_at?: string;
   json_schema?: object;
+  limited_token_mode?: boolean;
+  token_limit?: number;
+  limited_request_mode?: boolean;
+  request_limit?: number;
 };