Files
DocsGPT/tests/integration/test_workflows.py
Alex 81b6ee5daa Pg 4 (#2390)
* feat: postgres tests

* feat: mongo cutoff

* feat: mongo cutoff

* feat: adjust docs and compose files

* fix: mini code mongo removals

* fix: tests and k8s mongo stuff

* feat: test fixes

* fix: ruff

* fix: vale

* Potential fix for pull request finding 'CodeQL / Clear-text logging of sensitive information'

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>

* fix: mini suggestions

* vale lint fix 2

* fix: codeql columns thing

* fix: test mongo

* fix: tests coverage

* feat: better tests 4

* feat: more tests

* feat: decent coverage

* fix: ruff fixes

* fix: remove mongo mock

* feat: enhance workflow engine and API routes; add document retrieval and source handling

* feat: e2e tests

* fix: mcp, mongo and more

* fix: mini codeql warning

* fix: agent chunk view

* fix: mini issues

* fix: more pg fixes

* feat: postgres prep on start

* feat: qa tests

* fix: mini improvements

* fix: tests

---------

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
Co-authored-by: Siddhant Rai <siddhant.rai.5686@gmail.com>
2026-04-18 13:13:57 +01:00

578 lines
22 KiB
Python

#!/usr/bin/env python3
"""
Integration tests for DocsGPT workflow management endpoints.
Uses Flask test client with a real Postgres instance (must be running).
Endpoints tested:
- /api/workflows (POST) - Create workflow
- /api/workflows/<id> (GET) - Get workflow
- /api/workflows/<id> (PUT) - Update workflow
- /api/workflows/<id> (DELETE) - Delete workflow
Run:
pytest tests/integration/test_workflows.py -v
"""
import time
import pytest
from jose import jwt
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture(scope="module")
def app():
"""Create the real Flask app (connects to real Postgres)."""
from application.app import app as flask_app
flask_app.config["TESTING"] = True
return flask_app
@pytest.fixture(scope="module")
def client(app):
"""Flask test client.
When AUTH_TYPE is set to simple_jwt/session_jwt a Bearer token is
injected; otherwise the backend already returns {"sub": "local"}
for every request so no token is needed.
"""
from application.core.settings import settings
c = app.test_client()
if settings.AUTH_TYPE in ("simple_jwt", "session_jwt"):
secret = settings.JWT_SECRET_KEY
if not secret:
pytest.skip("JWT_SECRET_KEY not configured")
payload = {"sub": f"test_workflow_integration_{int(time.time())}"}
token = jwt.encode(payload, secret, algorithm="HS256")
c.environ_base["HTTP_AUTHORIZATION"] = f"Bearer {token}"
return c
@pytest.fixture(scope="module")
def created_ids():
"""Accumulator for workflow IDs to clean up after all tests."""
return []
@pytest.fixture(autouse=True, scope="module")
def cleanup(client, created_ids):
"""Delete all test-created workflows after the module finishes."""
yield
for wf_id in created_ids:
try:
client.delete(f"/api/workflows/{wf_id}")
except Exception:
pass
# ---------------------------------------------------------------------------
# Payload helpers
# ---------------------------------------------------------------------------
def simple_workflow(suffix=""):
"""Start -> End."""
return {
"name": f"Simple WF {int(time.time())}{suffix}",
"description": "integration test",
"nodes": [
{"id": "start_1", "type": "start", "title": "Start",
"position": {"x": 0, "y": 0}, "data": {}},
{"id": "end_1", "type": "end", "title": "End",
"position": {"x": 400, "y": 0}, "data": {}},
],
"edges": [
{"id": "edge_1", "source": "start_1", "target": "end_1"},
],
}
def linear_workflow(suffix=""):
"""Start -> Agent -> End."""
return {
"name": f"Linear WF {int(time.time())}{suffix}",
"description": "integration test",
"nodes": [
{"id": "start_1", "type": "start", "title": "Start",
"position": {"x": 0, "y": 0}, "data": {}},
{"id": "agent_1", "type": "agent", "title": "Agent",
"position": {"x": 200, "y": 0}, "data": {
"agent_type": "classic",
"system_prompt": "You are helpful.",
"prompt_template": "",
"stream_to_user": False,
}},
{"id": "end_1", "type": "end", "title": "End",
"position": {"x": 400, "y": 0}, "data": {}},
],
"edges": [
{"id": "edge_1", "source": "start_1", "target": "agent_1"},
{"id": "edge_2", "source": "agent_1", "target": "end_1"},
],
}
def multi_input_end_workflow(suffix=""):
"""Condition branches into two agents, both converging on one end node.
Graph:
start -> condition --(case_1)--> agent_a --\
--(else)----> agent_b ---+--> end
"""
return {
"name": f"Multi-Input End {int(time.time())}{suffix}",
"description": "end node with multiple inputs",
"nodes": [
{"id": "start_1", "type": "start", "title": "Start",
"position": {"x": 0, "y": 100}, "data": {}},
{"id": "cond_1", "type": "condition", "title": "Branch",
"position": {"x": 200, "y": 100}, "data": {
"mode": "simple",
"cases": [
{"name": "Case 1", "expression": "true",
"sourceHandle": "case_1"},
],
}},
{"id": "agent_a", "type": "agent", "title": "Agent A",
"position": {"x": 400, "y": 0}, "data": {
"agent_type": "classic",
"system_prompt": "Branch A",
"prompt_template": "",
"stream_to_user": False,
}},
{"id": "agent_b", "type": "agent", "title": "Agent B",
"position": {"x": 400, "y": 200}, "data": {
"agent_type": "classic",
"system_prompt": "Branch B",
"prompt_template": "",
"stream_to_user": False,
}},
{"id": "end_1", "type": "end", "title": "End",
"position": {"x": 600, "y": 100}, "data": {}},
],
"edges": [
{"id": "e1", "source": "start_1", "target": "cond_1"},
{"id": "e2", "source": "cond_1", "target": "agent_a",
"sourceHandle": "case_1"},
{"id": "e3", "source": "cond_1", "target": "agent_b",
"sourceHandle": "else"},
# Both agents feed into the SAME end node
{"id": "e4", "source": "agent_a", "target": "end_1"},
{"id": "e5", "source": "agent_b", "target": "end_1"},
],
}
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _extract_id(resp):
"""Pull workflow id from create/update response."""
body = resp.get_json()
data = body.get("data") or body
return data.get("id")
def _get_graph(client, wf_id):
"""Fetch workflow and return (nodes, edges)."""
resp = client.get(f"/api/workflows/{wf_id}")
assert resp.status_code == 200, resp.get_data(as_text=True)
body = resp.get_json()
data = body.get("data") or body
return data.get("nodes", []), data.get("edges", [])
# ===========================================================================
# CRUD tests
# ===========================================================================
class TestWorkflowCRUD:
def test_create_simple_workflow(self, client, created_ids):
resp = client.post("/api/workflows", json=simple_workflow())
assert resp.status_code in (200, 201), resp.get_data(as_text=True)
wf_id = _extract_id(resp)
assert wf_id
created_ids.append(wf_id)
def test_create_linear_workflow(self, client, created_ids):
resp = client.post("/api/workflows", json=linear_workflow())
assert resp.status_code in (200, 201), resp.get_data(as_text=True)
wf_id = _extract_id(resp)
assert wf_id
created_ids.append(wf_id)
def test_get_workflow_returns_nodes_and_edges(self, client, created_ids):
resp = client.post("/api/workflows", json=simple_workflow(" get"))
wf_id = _extract_id(resp)
created_ids.append(wf_id)
nodes, edges = _get_graph(client, wf_id)
assert len(nodes) == 2
assert len(edges) == 1
def test_update_workflow(self, client, created_ids):
resp = client.post("/api/workflows", json=simple_workflow(" upd"))
wf_id = _extract_id(resp)
created_ids.append(wf_id)
update_resp = client.put(
f"/api/workflows/{wf_id}", json=linear_workflow(" updated")
)
assert update_resp.status_code == 200, update_resp.get_data(as_text=True)
nodes, edges = _get_graph(client, wf_id)
assert len(nodes) == 3 # start, agent, end
assert len(edges) == 2
def test_delete_workflow(self, client):
resp = client.post("/api/workflows", json=simple_workflow(" del"))
wf_id = _extract_id(resp)
del_resp = client.delete(f"/api/workflows/{wf_id}")
assert del_resp.status_code == 200
get_resp = client.get(f"/api/workflows/{wf_id}")
assert get_resp.status_code in (400, 404)
def test_reject_workflow_without_end_node(self, client):
payload = {
"name": "No End",
"nodes": [
{"id": "s", "type": "start", "title": "Start",
"position": {"x": 0, "y": 0}, "data": {}},
],
"edges": [],
}
resp = client.post("/api/workflows", json=payload)
assert resp.status_code == 400, resp.get_data(as_text=True)
# ===========================================================================
# Multi-input end node tests
# ===========================================================================
class TestMultiInputEndNode:
"""Verify that an end node can receive edges from multiple source nodes."""
def test_create_multi_input_end_workflow_accepted(self, client, created_ids):
"""Backend must accept a workflow where two edges target the same end node."""
resp = client.post("/api/workflows", json=multi_input_end_workflow())
assert resp.status_code in (200, 201), resp.get_data(as_text=True)
wf_id = _extract_id(resp)
assert wf_id
created_ids.append(wf_id)
def test_multi_input_end_all_edges_persisted(self, client, created_ids):
"""After round-trip, both edges into the end node must still be present."""
resp = client.post(
"/api/workflows", json=multi_input_end_workflow(" persist")
)
assert resp.status_code in (200, 201), resp.get_data(as_text=True)
wf_id = _extract_id(resp)
created_ids.append(wf_id)
nodes, edges = _get_graph(client, wf_id)
# Locate end node
end_ids = {n["id"] for n in nodes if n["type"] == "end"}
assert end_ids, "no end node in response"
# Count edges targeting any end node
edges_to_end = [e for e in edges if e["target"] in end_ids]
assert len(edges_to_end) >= 2, (
f"Expected >=2 edges to end, got {len(edges_to_end)}: {edges_to_end}"
)
def test_multi_input_end_total_edge_count(self, client, created_ids):
"""All 5 edges of the multi-input graph must survive persistence."""
resp = client.post(
"/api/workflows", json=multi_input_end_workflow(" count")
)
wf_id = _extract_id(resp)
created_ids.append(wf_id)
_, edges = _get_graph(client, wf_id)
assert len(edges) == 5, f"Expected 5 edges, got {len(edges)}"
def test_update_to_multi_input_end_preserves_edges(self, client, created_ids):
"""Updating a simple workflow to multi-input end keeps all edges."""
# Create simple
resp = client.post("/api/workflows", json=simple_workflow(" pre"))
wf_id = _extract_id(resp)
created_ids.append(wf_id)
# Update to multi-input end
update_resp = client.put(
f"/api/workflows/{wf_id}",
json=multi_input_end_workflow(" post"),
)
assert update_resp.status_code == 200, update_resp.get_data(as_text=True)
nodes, edges = _get_graph(client, wf_id)
end_ids = {n["id"] for n in nodes if n["type"] == "end"}
edges_to_end = [e for e in edges if e["target"] in end_ids]
assert len(edges_to_end) >= 2, (
f"Expected >=2 edges to end after update, got {len(edges_to_end)}"
)
# ---------------------------------------------------------------------------
# Source-aware payload helpers
# ---------------------------------------------------------------------------
def workflow_with_sources(sources, suffix=""):
"""Start -> Agent (with sources) -> End."""
return {
"name": f"Source WF {int(time.time())}{suffix}",
"description": "integration test with sources",
"nodes": [
{"id": "start_1", "type": "start", "title": "Start",
"position": {"x": 0, "y": 0}, "data": {}},
{"id": "agent_1", "type": "agent", "title": "Agent",
"position": {"x": 200, "y": 0}, "data": {
"agent_type": "classic",
"system_prompt": "You are helpful.",
"prompt_template": "",
"stream_to_user": False,
"sources": sources,
"tools": [],
}},
{"id": "end_1", "type": "end", "title": "End",
"position": {"x": 400, "y": 0}, "data": {}},
],
"edges": [
{"id": "edge_1", "source": "start_1", "target": "agent_1"},
{"id": "edge_2", "source": "agent_1", "target": "end_1"},
],
}
def workflow_multi_agent_sources(suffix=""):
"""Start -> Agent A (sources A) -> Agent B (sources B) -> End."""
return {
"name": f"Multi-Agent Sources {int(time.time())}{suffix}",
"description": "two agents with different sources",
"nodes": [
{"id": "start_1", "type": "start", "title": "Start",
"position": {"x": 0, "y": 0}, "data": {}},
{"id": "agent_a", "type": "agent", "title": "Agent A",
"position": {"x": 200, "y": 0}, "data": {
"agent_type": "agentic",
"system_prompt": "Agent A prompt",
"prompt_template": "",
"stream_to_user": False,
"sources": ["src_alpha", "src_beta"],
"tools": [],
}},
{"id": "agent_b", "type": "agent", "title": "Agent B",
"position": {"x": 400, "y": 0}, "data": {
"agent_type": "classic",
"system_prompt": "Agent B prompt",
"prompt_template": "",
"stream_to_user": True,
"sources": ["src_gamma"],
"tools": [],
}},
{"id": "end_1", "type": "end", "title": "End",
"position": {"x": 600, "y": 0}, "data": {}},
],
"edges": [
{"id": "e1", "source": "start_1", "target": "agent_a"},
{"id": "e2", "source": "agent_a", "target": "agent_b"},
{"id": "e3", "source": "agent_b", "target": "end_1"},
],
}
def _find_agent_node(nodes, node_id):
"""Find a specific node by id."""
return next((n for n in nodes if n["id"] == node_id), None)
# ===========================================================================
# Workflow integration tests
# ===========================================================================
class TestWorkflowIntegration:
"""Verify end-to-end workflow create → get → update → get round-trips."""
def test_linear_workflow_round_trip(self, client, created_ids):
"""Create a linear workflow and verify all nodes/edges survive the round-trip."""
payload = linear_workflow(" round-trip")
resp = client.post("/api/workflows", json=payload)
assert resp.status_code in (200, 201), resp.get_data(as_text=True)
wf_id = _extract_id(resp)
assert wf_id
created_ids.append(wf_id)
nodes, edges = _get_graph(client, wf_id)
assert len(nodes) == 3
assert len(edges) == 2
# Verify node types
types = {n["id"]: n["type"] for n in nodes}
assert types["start_1"] == "start"
assert types["agent_1"] == "agent"
assert types["end_1"] == "end"
def test_agent_config_persisted(self, client, created_ids):
"""Agent node config (type, prompts, stream_to_user) round-trips correctly."""
payload = linear_workflow(" config")
resp = client.post("/api/workflows", json=payload)
wf_id = _extract_id(resp)
created_ids.append(wf_id)
nodes, _ = _get_graph(client, wf_id)
agent = _find_agent_node(nodes, "agent_1")
assert agent is not None
assert agent["data"]["agent_type"] == "classic"
assert agent["data"]["system_prompt"] == "You are helpful."
assert agent["data"]["stream_to_user"] is False
def test_update_workflow_replaces_graph(self, client, created_ids):
"""Updating a workflow fully replaces nodes and edges."""
resp = client.post("/api/workflows", json=simple_workflow(" replace"))
wf_id = _extract_id(resp)
created_ids.append(wf_id)
nodes, edges = _get_graph(client, wf_id)
assert len(nodes) == 2
# Update to linear
update_resp = client.put(
f"/api/workflows/{wf_id}", json=linear_workflow(" replaced")
)
assert update_resp.status_code == 200
nodes, edges = _get_graph(client, wf_id)
assert len(nodes) == 3
assert len(edges) == 2
# ===========================================================================
# Source-specific integration tests
# ===========================================================================
class TestWorkflowSources:
"""Verify that agent node sources are persisted and retrieved correctly."""
def test_create_workflow_with_single_source(self, client, created_ids):
"""A workflow with one source on an agent node persists it."""
payload = workflow_with_sources(["default"])
resp = client.post("/api/workflows", json=payload)
assert resp.status_code in (200, 201), resp.get_data(as_text=True)
wf_id = _extract_id(resp)
assert wf_id
created_ids.append(wf_id)
nodes, _ = _get_graph(client, wf_id)
agent = _find_agent_node(nodes, "agent_1")
assert agent is not None, "Agent node not found"
assert agent["data"].get("sources") == ["default"], (
f"Expected sources=['default'], got {agent['data'].get('sources')}"
)
def test_create_workflow_with_multiple_sources(self, client, created_ids):
"""Multiple sources on an agent node are all persisted."""
sources = ["src_1", "src_2", "src_3"]
payload = workflow_with_sources(sources)
resp = client.post("/api/workflows", json=payload)
assert resp.status_code in (200, 201), resp.get_data(as_text=True)
wf_id = _extract_id(resp)
created_ids.append(wf_id)
nodes, _ = _get_graph(client, wf_id)
agent = _find_agent_node(nodes, "agent_1")
assert agent is not None
assert agent["data"].get("sources") == sources
def test_create_workflow_with_empty_sources(self, client, created_ids):
"""An agent node with empty sources list is accepted and persisted."""
payload = workflow_with_sources([])
resp = client.post("/api/workflows", json=payload)
assert resp.status_code in (200, 201), resp.get_data(as_text=True)
wf_id = _extract_id(resp)
assert wf_id
created_ids.append(wf_id)
nodes, _ = _get_graph(client, wf_id)
agent = _find_agent_node(nodes, "agent_1")
assert agent is not None
assert agent["data"].get("sources") == []
def test_update_workflow_sources(self, client, created_ids):
"""Updating a workflow replaces agent sources."""
# Create with original sources
payload = workflow_with_sources(["old_src"])
resp = client.post("/api/workflows", json=payload)
wf_id = _extract_id(resp)
created_ids.append(wf_id)
# Update with new sources
updated_payload = workflow_with_sources(["new_src_1", "new_src_2"], " upd")
update_resp = client.put(f"/api/workflows/{wf_id}", json=updated_payload)
assert update_resp.status_code == 200, update_resp.get_data(as_text=True)
nodes, _ = _get_graph(client, wf_id)
agent = _find_agent_node(nodes, "agent_1")
assert agent is not None
assert agent["data"].get("sources") == ["new_src_1", "new_src_2"]
def test_multi_agent_independent_sources(self, client, created_ids):
"""Each agent node keeps its own distinct sources list."""
payload = workflow_multi_agent_sources()
resp = client.post("/api/workflows", json=payload)
assert resp.status_code in (200, 201), resp.get_data(as_text=True)
wf_id = _extract_id(resp)
created_ids.append(wf_id)
nodes, _ = _get_graph(client, wf_id)
agent_a = _find_agent_node(nodes, "agent_a")
agent_b = _find_agent_node(nodes, "agent_b")
assert agent_a is not None, "Agent A not found"
assert agent_b is not None, "Agent B not found"
assert agent_a["data"].get("sources") == ["src_alpha", "src_beta"]
assert agent_b["data"].get("sources") == ["src_gamma"]
def test_sources_survive_workflow_update(self, client, created_ids):
"""Sources survive when a workflow is updated without changing sources."""
payload = workflow_with_sources(["persistent_src"])
resp = client.post("/api/workflows", json=payload)
wf_id = _extract_id(resp)
created_ids.append(wf_id)
# Update keeping same sources
update_resp = client.put(f"/api/workflows/{wf_id}", json=payload)
assert update_resp.status_code == 200
nodes, _ = _get_graph(client, wf_id)
agent = _find_agent_node(nodes, "agent_1")
assert agent["data"].get("sources") == ["persistent_src"]
def test_remove_sources_on_update(self, client, created_ids):
"""Clearing sources on update results in empty list."""
payload = workflow_with_sources(["will_be_removed"])
resp = client.post("/api/workflows", json=payload)
wf_id = _extract_id(resp)
created_ids.append(wf_id)
# Update with no sources
cleared_payload = workflow_with_sources([], " cleared")
update_resp = client.put(f"/api/workflows/{wf_id}", json=cleared_payload)
assert update_resp.status_code == 200
nodes, _ = _get_graph(client, wf_id)
agent = _find_agent_node(nodes, "agent_1")
assert agent["data"].get("sources") == []