mirror of
https://github.com/arc53/DocsGPT.git
synced 2026-05-15 17:03:47 +00:00
Compare commits
28 Commits
agent-miss
...
feat-notif
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
827a0bb382 | ||
|
|
b04cb44ab5 | ||
|
|
42384a0e92 | ||
|
|
0bce35ad29 | ||
|
|
9de8bb4499 | ||
|
|
cdbd3f061d | ||
|
|
2ac46fd858 | ||
|
|
daa4320da2 | ||
|
|
e70a7a5115 | ||
|
|
150d9f4e37 | ||
|
|
746bcbc5f9 | ||
|
|
aa91117fbf | ||
|
|
abbd56cb66 | ||
|
|
85d8375e6c | ||
|
|
7e98d21b61 | ||
|
|
249f9f9fe0 | ||
|
|
6c4346eb84 | ||
|
|
cb3ca8a36b | ||
|
|
4c8230fb6c | ||
|
|
649557798d | ||
|
|
afe8354ca5 | ||
|
|
5483eb0e27 | ||
|
|
bd2985db47 | ||
|
|
b99147ba83 | ||
|
|
c3023f8b71 | ||
|
|
c168a530f5 | ||
|
|
2d539f3199 | ||
|
|
ed9444cf3d |
@@ -114,8 +114,6 @@ class BaseAgent(ABC):
|
||||
self.compressed_summary = compressed_summary
|
||||
self.current_token_count = 0
|
||||
self.context_limit_reached = False
|
||||
self.conversation_id: Optional[str] = None
|
||||
self.initial_user_id: Optional[str] = None
|
||||
|
||||
@log_activity()
|
||||
def gen(
|
||||
|
||||
@@ -4,24 +4,19 @@ Fixed 5-second generation (100 tokens × 50 ms/token). No auth. Emits SSE
|
||||
chunks in OpenAI's chat.completions streaming format, or a single response
|
||||
when stream=false. Run on 127.0.0.1:8090 — point DocsGPT at it via
|
||||
OPENAI_BASE_URL=http://127.0.0.1:8090/v1.
|
||||
|
||||
Flags:
|
||||
--tool-calls First response returns a tool call instead of text.
|
||||
Subsequent responses (after a tool_result) return text.
|
||||
Useful for triggering the tool-execution loop.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from flask import Flask, Response, request, jsonify
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
|
||||
TOKEN_COUNT = 100
|
||||
TOKEN_DELAY_S = 0.05 # 100 * 0.05 = 5.0 s
|
||||
TOOL_CALL_MODE = False
|
||||
|
||||
logger = logging.getLogger("mock_llm")
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s mock: %(message)s")
|
||||
@@ -44,7 +39,7 @@ FILLER_TOKENS = [
|
||||
".",
|
||||
]
|
||||
|
||||
app = Flask(__name__)
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
def _token_stream_id() -> str:
|
||||
@@ -68,57 +63,11 @@ def _sse_chunk(completion_id: str, model: str, delta: dict, finish_reason=None)
|
||||
return f"data: {json.dumps(payload)}\n\n"
|
||||
|
||||
|
||||
def _gen_tool_call_stream(model: str, req_id: str):
|
||||
"""Emit two tool_calls (search) in streaming format.
|
||||
|
||||
Two calls ensure the handler executes the first (which can return a
|
||||
huge result), then hits _check_context_limit before the second.
|
||||
"""
|
||||
completion_id = _token_stream_id()
|
||||
call_id_1 = f"call_{uuid.uuid4().hex[:12]}"
|
||||
call_id_2 = f"call_{uuid.uuid4().hex[:12]}"
|
||||
|
||||
yield _sse_chunk(completion_id, model, {
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": call_id_1,
|
||||
"type": "function",
|
||||
"function": {"name": "search", "arguments": ""},
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"id": call_id_2,
|
||||
"type": "function",
|
||||
"function": {"name": "search", "arguments": ""},
|
||||
},
|
||||
],
|
||||
})
|
||||
args_json = json.dumps({"query": "Python programming basics"})
|
||||
for ch in args_json:
|
||||
time.sleep(TOKEN_DELAY_S)
|
||||
yield _sse_chunk(completion_id, model, {
|
||||
"tool_calls": [
|
||||
{"index": 0, "function": {"arguments": ch}},
|
||||
{"index": 1, "function": {"arguments": ch}},
|
||||
],
|
||||
})
|
||||
yield _sse_chunk(completion_id, model, {}, finish_reason="tool_calls")
|
||||
yield "data: [DONE]\n\n"
|
||||
logger.info("[%s] tool_call stream done (ids=%s, %s)", req_id, call_id_1, call_id_2)
|
||||
|
||||
|
||||
def _has_tool_result(messages: list) -> bool:
|
||||
return any(m.get("role") == "tool" for m in messages)
|
||||
|
||||
|
||||
def _gen_text_stream(model: str, req_id: str):
|
||||
async def _stream_response(model: str, req_id: str):
|
||||
completion_id = _token_stream_id()
|
||||
yield _sse_chunk(completion_id, model, {"role": "assistant", "content": ""})
|
||||
for tok in FILLER_TOKENS[:TOKEN_COUNT]:
|
||||
time.sleep(TOKEN_DELAY_S)
|
||||
for i, tok in enumerate(FILLER_TOKENS[:TOKEN_COUNT]):
|
||||
await asyncio.sleep(TOKEN_DELAY_S)
|
||||
yield _sse_chunk(completion_id, model, {"content": tok})
|
||||
yield _sse_chunk(completion_id, model, {}, finish_reason="stop")
|
||||
yield "data: [DONE]\n\n"
|
||||
@@ -126,84 +75,63 @@ def _gen_text_stream(model: str, req_id: str):
|
||||
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
def chat_completions():
|
||||
body = request.get_json(force=True)
|
||||
async def chat_completions(request: Request):
|
||||
body = await request.json()
|
||||
model = body.get("model", "mock")
|
||||
stream = bool(body.get("stream", False))
|
||||
messages = body.get("messages", [])
|
||||
tools = body.get("tools")
|
||||
req_id = uuid.uuid4().hex[:8]
|
||||
logger.info(
|
||||
"[%s] /chat/completions stream=%s model=%s tools=%s msgs=%d",
|
||||
req_id, stream, model, bool(tools), len(messages),
|
||||
)
|
||||
|
||||
use_tool_call = (
|
||||
TOOL_CALL_MODE
|
||||
and tools
|
||||
and not _has_tool_result(messages)
|
||||
)
|
||||
logger.info("[%s] /chat/completions stream=%s model=%s max_tokens=%s", req_id, stream, model, body.get("max_tokens"))
|
||||
|
||||
if stream:
|
||||
gen = (
|
||||
_gen_tool_call_stream(model, req_id) if use_tool_call
|
||||
else _gen_text_stream(model, req_id)
|
||||
)
|
||||
return Response(
|
||||
gen,
|
||||
mimetype="text/event-stream",
|
||||
return StreamingResponse(
|
||||
_stream_response(model, req_id),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache, no-transform",
|
||||
"X-Accel-Buffering": "no",
|
||||
},
|
||||
)
|
||||
|
||||
time.sleep(TOKEN_COUNT * TOKEN_DELAY_S)
|
||||
await asyncio.sleep(TOKEN_COUNT * TOKEN_DELAY_S)
|
||||
logger.info("[%s] non-stream done", req_id)
|
||||
text = "".join(FILLER_TOKENS[:TOKEN_COUNT])
|
||||
completion_id = _token_stream_id()
|
||||
return jsonify({
|
||||
"id": completion_id,
|
||||
"object": "chat.completion",
|
||||
"created": int(time.time()),
|
||||
"model": model,
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {"role": "assistant", "content": text},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
"usage": {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": TOKEN_COUNT,
|
||||
"total_tokens": 10 + TOKEN_COUNT,
|
||||
},
|
||||
})
|
||||
return JSONResponse(
|
||||
{
|
||||
"id": completion_id,
|
||||
"object": "chat.completion",
|
||||
"created": int(time.time()),
|
||||
"model": model,
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {"role": "assistant", "content": text},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
"usage": {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": TOKEN_COUNT,
|
||||
"total_tokens": 10 + TOKEN_COUNT,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/v1/models")
|
||||
def list_models():
|
||||
return jsonify({
|
||||
async def list_models():
|
||||
return {
|
||||
"object": "list",
|
||||
"data": [{"id": "mock", "object": "model", "owned_by": "mock"}],
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return jsonify({"status": "ok"})
|
||||
async def health():
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--tool-calls", action="store_true",
|
||||
help="First response returns a tool_call; subsequent responses return text.",
|
||||
)
|
||||
parser.add_argument("--port", type=int, default=8090)
|
||||
args = parser.parse_args()
|
||||
TOOL_CALL_MODE = args.tool_calls
|
||||
if TOOL_CALL_MODE:
|
||||
logger.info("Tool-call mode enabled")
|
||||
app.run(host="127.0.0.1", port=args.port, debug=False, threaded=True)
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, host="127.0.0.1", port=8090, log_level="info")
|
||||
|
||||
Reference in New Issue
Block a user