ducklm/tests/smoke/test_api_stream_chat.py

686 lines
25 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from fastapi.testclient import TestClient
import json
import re
import time
from duck_core.model_client import ModelResponse
from duck_core.api import create_app
def test_stream_chat_endpoint_emits_sse_reasoning_and_content(tmp_path, monkeypatch):
monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
async def fake_chat(self, role, messages):
return ModelResponse(
role=role,
model="local-main",
content=json.dumps(
{
"kind": "action_directive",
"intent": "answer directly",
"risk_level": "none",
"actions": [],
}
),
reasoning_content=None,
raw={},
latency_ms=1.0,
)
async def fake_stream_chat(self, role, messages):
yield {"type": "reasoning_delta", "delta": "thinking"}
yield {"type": "content_delta", "delta": "answer"}
monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat)
app = create_app()
client = TestClient(app)
with client.stream(
"POST",
"/v1/chat/stream",
json={"message": "hello", "workspace": "./workspace", "debug": True},
) as response:
body = "".join(response.iter_text())
assert response.status_code == 200
assert "event: reasoning_delta" in body
assert "event: content_delta" in body
assert "event: done" in body
assert '"generation_stats":' in body
assert '"min_tokens_per_second":' in body
assert '"avg_tokens_per_second":' in body
assert '"max_tokens_per_second":' in body
assert "thinking" in body
assert "answer" in body
def test_stream_chat_forwards_reasoning_toggle_to_thinker(tmp_path, monkeypatch):
monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
captured = {}
async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
return ModelResponse(
role=role,
model="local-main",
content=json.dumps(
{
"kind": "action_directive",
"intent": "answer directly",
"risk_level": "none",
"actions": [],
}
),
reasoning_content=None,
raw={},
latency_ms=1.0,
)
async def fake_stream_chat(
self,
role,
messages,
temperature=None,
max_output_tokens=None,
response_format=None,
reasoning=None,
):
captured["role"] = role
captured["reasoning"] = reasoning
yield {"type": "content_delta", "delta": "answer"}
monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat)
client = TestClient(create_app())
with client.stream(
"POST",
"/v1/chat/stream",
json={
"message": "hello",
"workspace": "./workspace",
"debug": True,
"reasoning": "off",
},
) as response:
body = "".join(response.iter_text())
assert response.status_code == 200
assert "event: done" in body
assert captured == {"role": "thinker", "reasoning": "off"}
def test_stream_chat_skips_action_role_for_direct_memory_question(tmp_path, monkeypatch):
monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
calls = []
async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
calls.append(role)
if role == "action":
raise AssertionError("direct memory question should skip action role")
if role == "recall":
content = {
"relevant_ids": ["0"],
"sufficient_to_answer": True,
"reasoning": "name memory is relevant and sufficient",
}
elif role == "memory_policy":
content = {
"should_store": False,
"memory_type": "note",
"summary": "Recall answer, no new memory.",
"importance": 0.1,
"scope": "conversation",
"metadata": {},
}
else:
content = {
"kind": "action_directive",
"intent": "unused",
"risk_level": "none",
"actions": [],
}
return ModelResponse(
role=role,
model="local-main",
content=json.dumps(content),
reasoning_content=None,
raw={},
latency_ms=1.0,
)
async def fake_stream_chat(self, role, messages, **kwargs):
assert role == "thinker"
yield {"type": "content_delta", "delta": "Вас зовут Владимир."}
monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat)
with TestClient(create_app()) as client:
client.post(
"/v1/memory",
json={
"text": "Пользователя зовут Владимир.",
"workspace": "./workspace",
"scope": "global",
"memory_type": "fact",
"importance": 0.8,
},
)
with client.stream(
"POST",
"/v1/chat/stream",
json={"message": "Как меня зовут? Ответь коротко.", "workspace": "./workspace"},
) as response:
body = "".join(response.iter_text())
task_id = re.search(r'"task_id"\s*:\s*"([^"]+)"', body).group(1)
events = client.get(f"/v1/tasks/{task_id}/events").json()
assert response.status_code == 200
assert "Вас зовут Владимир." in body
assert "action" not in calls
assert any(event["event_type"] == "action_loop_skipped" for event in events)
def test_stream_chat_runs_memory_policy_and_reflection_after_completion(tmp_path, monkeypatch):
monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
monkeypatch.setenv("DUCK_ENABLE_REFLECTION", "1")
async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
if role == "action":
content = {
"kind": "action_directive",
"intent": "answer directly",
"risk_level": "none",
"actions": [],
}
elif role == "memory_policy":
content = {
"should_store": True,
"memory_type": "preference",
"summary": "User wants streamed chats to retain memory.",
"importance": 0.8,
"scope": "workspace",
"metadata": {},
}
elif role == "critic":
return ModelResponse(
role=role,
model="local-main",
content="Task completed. Reusable lesson: streamed tasks need post-processing.",
reasoning_content=None,
raw={},
latency_ms=1.0,
)
else:
raise AssertionError(f"unexpected role: {role}")
return ModelResponse(
role=role,
model="local-main",
content=json.dumps(content),
reasoning_content=None,
raw={},
latency_ms=1.0,
)
async def fake_stream_chat(self, role, messages):
assert role == "thinker"
yield {"type": "content_delta", "delta": "streamed answer"}
monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat)
with TestClient(create_app()) as client:
with client.stream(
"POST",
"/v1/chat/stream",
json={"message": "remember this", "workspace": str(tmp_path), "debug": True},
) as response:
body = "".join(response.iter_text())
task_id = re.search(r'"task_id"\s*:\s*"([^"]+)"', body).group(1)
events = []
for _ in range(20):
events = client.get(f"/v1/tasks/{task_id}/events").json()
if any(event["event_type"] == "reflection_completed" for event in events):
break
time.sleep(0.05)
event_types = [event["event_type"] for event in events]
memory = client.get("/v1/memory", params={"workspace": str(tmp_path)}).json()
experience = client.get("/v1/experience").json()
assert response.status_code == 200
assert "event: done" in body
assert "memory_policy_decision" in event_types
assert "memory_stored" in event_types
assert "reflection_completed" in event_types
assert memory["results"][0]["text"] == "User wants streamed chats to retain memory."
assert len(experience) == 1
def test_stream_chat_endpoint_executes_tool_before_streaming_answer(tmp_path, monkeypatch):
monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
(tmp_path / "note.txt").write_text("stream tool content")
async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
assert role == "action"
if any("tool_observations" in message["content"] for message in messages):
actions = []
else:
actions = [
{
"tool": "file_read",
"args": {"path": "note.txt"},
"reason": "User asked for file contents",
}
]
return ModelResponse(
role=role,
model="local-main",
content=json.dumps(
{
"kind": "action_directive",
"intent": "read requested file",
"risk_level": "low",
"actions": actions,
}
),
reasoning_content=None,
raw={},
latency_ms=1.0,
)
async def fake_stream_chat(self, role, messages):
assert role == "thinker"
assert any("tool_observations" in message["content"] for message in messages)
yield {"type": "content_delta", "delta": "answer from tool"}
monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat)
client = TestClient(create_app())
with client.stream(
"POST",
"/v1/chat/stream",
json={"message": "read note.txt", "workspace": str(tmp_path), "debug": True},
) as response:
body = "".join(response.iter_text())
assert response.status_code == 200
assert 'event: runtime_status\ndata: {"task_id":' in body
assert '"stage": "planning"' in body
assert body.index('"stage": "planning"') < body.index("event: tool_call_started")
assert "event: tool_call_started" in body
assert "event: tool_call_finished" in body
assert "stream tool content" in body
assert "event: content_delta" in body
assert "answer from tool" in body
assert "event: done" in body
def test_stream_chat_injects_candidate_skill_summary(tmp_path, monkeypatch):
monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
captured_messages = []
async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
assert role == "action"
captured_messages.extend(messages)
return ModelResponse(
role=role,
model="local-main",
content=json.dumps(
{
"kind": "action_directive",
"intent": "answer directly",
"risk_level": "none",
"actions": [],
}
),
reasoning_content=None,
raw={},
latency_ms=1.0,
)
async def fake_stream_chat(self, role, messages):
yield {"type": "content_delta", "delta": "ok"}
monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat)
client = TestClient(create_app())
with client.stream(
"POST",
"/v1/chat/stream",
json={"message": "analyze repository structure", "workspace": str(tmp_path), "debug": True},
) as response:
body = "".join(response.iter_text())
assert response.status_code == 200
assert "event: done" in body
assert any("Active skill:" in message["content"] for message in captured_messages)
assert any("analyze_project" in message["content"] for message in captured_messages)
def test_stream_chat_requests_approval_for_directory_outside_workspace(tmp_path, monkeypatch):
monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
workspace = tmp_path / "workspace"
outside = tmp_path / "outside"
workspace.mkdir()
outside.mkdir()
(outside / "note.txt").write_text("external")
async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
assert role == "action"
return ModelResponse(
role=role,
model="local-main",
content=json.dumps(
{
"kind": "action_directive",
"intent": "list external directory",
"risk_level": "low",
"actions": [
{
"tool": "list_dir",
"args": {"path": str(outside)},
"reason": "User asked for that directory",
}
],
}
),
reasoning_content=None,
raw={},
latency_ms=1.0,
)
monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
client = TestClient(create_app())
with client.stream(
"POST",
"/v1/chat/stream",
json={"message": "show outside", "workspace": str(workspace), "debug": True},
) as response:
body = "".join(response.iter_text())
assert response.status_code == 200
assert "event: tool_approval_requested" in body
assert '"tool": "list_dir"' in body
assert str(outside) in body
assert '"status": "waiting_for_approval"' in body
def test_continue_stream_executes_approved_tool_and_streams_answer(tmp_path, monkeypatch):
monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
action_calls = 0
async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
nonlocal action_calls
assert role == "action"
action_calls += 1
if any("tool_observations" in message["content"] for message in messages):
actions = []
else:
actions = [
{
"tool": "shell_exec_safe",
"args": {"command": "uname -a"},
"reason": "User asked for system information",
}
]
return ModelResponse(
role=role,
model="local-main",
content=json.dumps(
{
"kind": "action_directive",
"intent": "run command",
"risk_level": "medium",
"actions": actions,
}
),
reasoning_content=None,
raw={},
latency_ms=1.0,
)
async def fake_stream_chat(self, role, messages):
assert role == "thinker"
observation_message = next(message for message in messages if "tool_observations" in message["content"])
assert "uname" in observation_message["content"]
yield {"type": "content_delta", "delta": "continued after approval"}
monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat)
client = TestClient(create_app())
with client.stream(
"POST",
"/v1/chat/stream",
json={"message": "run uname", "workspace": str(tmp_path), "debug": True},
) as response:
initial_body = "".join(response.iter_text())
task_id = re.search(r'"task_id"\s*:\s*"([^"]+)"', initial_body).group(1)
pending = client.get("/v1/approvals/pending").json()
approval = next(item for item in pending if item["task_id"] == task_id)
client.post(f"/v1/approvals/{approval['approval_id']}/allow_once")
with client.stream(
"POST",
f"/v1/tasks/{approval['task_id']}/continue/stream",
json={"approval_id": approval["approval_id"]},
) as response:
body = "".join(response.iter_text())
assert "event: tool_approval_requested" in initial_body
assert response.status_code == 200
assert '"stage": "running_tool"' in body
assert body.index('"stage": "running_tool"') < body.index("event: tool_call_finished")
assert "event: tool_call_finished" in body
assert "event: content_delta" in body
assert "continued after approval" in body
assert "event: done" in body
assert action_calls == 1
conversation_id = re.search(r'"conversation_id"\s*:\s*"([^"]+)"', initial_body).group(1)
conversation = client.get(f"/v1/conversations/{conversation_id}").json()
assert conversation["messages"][-1]["content"] == "continued after approval"
assert conversation["messages"][-1]["status"] == "completed"
def test_continue_stream_requests_password_for_approved_sudo(tmp_path, monkeypatch):
monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
assert role == "action"
actions = []
if not any("tool_observations" in message["content"] for message in messages):
actions = [
{
"tool": "shell_exec_safe",
"args": {"command": "sudo apt update"},
"reason": "Check updates with root privileges",
}
]
return ModelResponse(
role=role,
model="local-main",
content=json.dumps(
{
"kind": "action_directive",
"intent": "run sudo command",
"risk_level": "medium",
"actions": actions,
}
),
reasoning_content=None,
raw={},
latency_ms=1.0,
)
monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
client = TestClient(create_app())
with client.stream(
"POST",
"/v1/chat/stream",
json={"message": "run sudo update", "workspace": str(tmp_path), "debug": True},
) as response:
initial_body = "".join(response.iter_text())
task_id = re.search(r'"task_id"\s*:\s*"([^"]+)"', initial_body).group(1)
approval = next(
item for item in client.get("/v1/approvals/pending").json()
if item["task_id"] == task_id
)
client.post(f"/v1/approvals/{approval['approval_id']}/allow_once")
with client.stream(
"POST",
f"/v1/tasks/{task_id}/continue/stream",
json={"approval_id": approval["approval_id"]},
) as response:
body = "".join(response.iter_text())
assert response.status_code == 200
assert "event: tool_password_requested" in body
assert "waiting_for_password" in body
assert "password" in body.lower()
def test_password_stream_runs_sudo_with_password_and_streams_answer(tmp_path, monkeypatch):
monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
calls = []
class Completed:
returncode = 0
stdout = "sudo updated\n"
stderr = ""
def fake_run(command, **kwargs):
calls.append((command, kwargs))
return Completed()
async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
if role == "action":
actions = []
if not any("tool_observations" in message["content"] for message in messages):
actions = [
{
"tool": "shell_exec_safe",
"args": {"command": "sudo apt update"},
"reason": "Check updates with root privileges",
}
]
return ModelResponse(
role=role,
model="local-main",
content=json.dumps(
{
"kind": "action_directive",
"intent": "run sudo command",
"risk_level": "medium",
"actions": actions,
}
),
reasoning_content=None,
raw={},
latency_ms=1.0,
)
raise AssertionError("non-stream thinker should not be used")
async def fake_stream_chat(self, role, messages):
assert role == "thinker"
observation = next(message for message in messages if "tool_observations" in message["content"])
assert "sudo updated" in observation["content"]
assert "secret" not in observation["content"]
yield {"type": "content_delta", "delta": "sudo command completed"}
monkeypatch.setattr("duck_core.tools.shell_exec_safe.subprocess.run", fake_run)
monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat)
client = TestClient(create_app())
with client.stream(
"POST",
"/v1/chat/stream",
json={"message": "run sudo update", "workspace": str(tmp_path), "debug": True},
) as response:
initial_body = "".join(response.iter_text())
task_id = re.search(r'"task_id"\s*:\s*"([^"]+)"', initial_body).group(1)
approval = next(
item for item in client.get("/v1/approvals/pending").json()
if item["task_id"] == task_id
)
client.post(f"/v1/approvals/{approval['approval_id']}/allow_once")
with client.stream(
"POST",
f"/v1/tasks/{task_id}/continue/stream",
json={"approval_id": approval["approval_id"]},
) as response:
_ = "".join(response.iter_text())
with client.stream(
"POST",
f"/v1/tasks/{task_id}/password/stream",
json={"approval_id": approval["approval_id"], "password": "secret"},
) as response:
body = "".join(response.iter_text())
assert response.status_code == 200
assert calls[0][1]["input"] == "secret\n"
assert "event: tool_call_finished" in body
assert "event: content_delta" in body
assert "sudo command completed" in body
assert "secret" not in body
def test_command_audit_endpoint_exposes_redacted_shell_events(tmp_path, monkeypatch):
monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
assert role == "action"
actions = []
if not any("tool_observations" in message["content"] for message in messages):
actions = [
{
"tool": "shell_exec_safe",
"args": {"command": "apt list --upgradable"},
"reason": "Check available updates",
}
]
return ModelResponse(
role=role,
model="local-main",
content=json.dumps(
{
"kind": "action_directive",
"intent": "check updates",
"risk_level": "low",
"actions": actions,
}
),
reasoning_content=None,
raw={},
latency_ms=1.0,
)
async def fake_stream_chat(self, role, messages):
yield {"type": "content_delta", "delta": "updates checked"}
monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat)
client = TestClient(create_app())
with client.stream(
"POST",
"/v1/chat/stream",
json={"message": "check updates", "workspace": str(tmp_path), "debug": True},
) as response:
_ = "".join(response.iter_text())
audit = client.get("/v1/audit/commands").json()
assert response.status_code == 200
assert audit[0]["event_type"] == "command_audit"
assert audit[0]["payload"]["command"] == "apt list --upgradable"
assert audit[0]["payload"]["risk_level"] == "low"
assert audit[0]["payload"]["action_type"] == "package_check"
assert audit[0]["payload"]["approved"] is False
assert "password" not in json.dumps(audit).lower()