mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 16:43:16 +00:00
94 lines
3.3 KiB
Python
94 lines
3.3 KiB
Python
import sys
|
|
import redis
|
|
import time
|
|
from datetime import datetime
|
|
from application.core.settings import settings
|
|
from application.utils import get_hash
|
|
|
|
|
|
# Initialize Redis client
|
|
redis_client = redis.Redis(
|
|
host=settings.REDIS_HOST,
|
|
port=settings.REDIS_PORT,
|
|
db=settings.REDIS_DB,
|
|
)
|
|
|
|
def gen_cache_key(messages):
|
|
"""
|
|
Generate a unique cache key using the latest user prompt.
|
|
"""
|
|
latest_user_prompt = next((msg['content'] for msg in reversed(messages) if msg['role'] == 'user'), None)
|
|
if latest_user_prompt is None:
|
|
raise ValueError("No user message found in the conversation to generate a cache key.")
|
|
cache_key = get_hash(latest_user_prompt)
|
|
return cache_key
|
|
|
|
|
|
def gen_cache(func):
|
|
"""
|
|
Decorator to cache the response of a function that generates a response using an LLM.
|
|
|
|
This decorator first checks if a response is cached for the given input (model and messages).
|
|
If a cached response is found, it returns that. If not, it generates the response,
|
|
caches it, and returns the generated response.
|
|
Args:
|
|
func (function): The function to be decorated.
|
|
Returns:
|
|
function: The wrapped function that handles caching and LLM response generation.
|
|
"""
|
|
def wrapper(self, model, messages, *args, **kwargs):
|
|
cache_key = gen_cache_key(messages=messages)
|
|
cached_response = redis_client.get(cache_key)
|
|
if cached_response:
|
|
print(f"Cache hit for key: {cache_key}")
|
|
return cached_response.decode('utf-8')
|
|
result = func(self, model, messages, *args, **kwargs)
|
|
redis_client.set(cache_key, result, ex=3600)
|
|
print(f"Cache saved for key: {cache_key}")
|
|
return result
|
|
return wrapper
|
|
|
|
def stream_cache(func):
|
|
"""
|
|
Decorator to cache the streamed response of an LLM function.
|
|
|
|
This decorator first checks if a streamed response is cached for the given input (model and messages).
|
|
If a cached response is found, it yields that. If not, it streams the response, caches it,
|
|
and then yields the response.
|
|
|
|
Args:
|
|
func (function): The function to be decorated.
|
|
|
|
Returns:
|
|
function: The wrapped function that handles caching and streaming LLM responses.
|
|
"""
|
|
def wrapper(self, model, messages, *args, **kwargs):
|
|
cache_key = gen_cache_key(messages=messages)
|
|
|
|
# we are using lrange and rpush to simulate streaming
|
|
cached_response = redis_client.lrange(cache_key, 0, -1)
|
|
if cached_response:
|
|
print(f"Cache hit for stream key: {cache_key}")
|
|
for chunk in cached_response:
|
|
print(f"Streaming cached chunk: {chunk.decode('utf-8')}")
|
|
yield chunk.decode('utf-8')
|
|
# need to slow down the response to simulate streaming
|
|
# because the cached response is instantaneous
|
|
# and redis is using in-memory storage
|
|
time.sleep(0.07)
|
|
return
|
|
|
|
result = func(self, model, messages, *args, **kwargs)
|
|
|
|
for chunk in result:
|
|
print(f"Streaming live chunk: {chunk}")
|
|
redis_client.rpush(cache_key, chunk)
|
|
yield chunk
|
|
|
|
# expire the cache after 30 minutes
|
|
redis_client.expire(cache_key, 1800)
|
|
print(f"Stream cache saved for key: {cache_key}")
|
|
|
|
return wrapper
|
|
|