228 lines
7.1 KiB
Python
228 lines
7.1 KiB
Python
import json
|
|
from unittest.mock import AsyncMock
|
|
|
|
import pytest
|
|
|
|
from duck_core.events.store import EventStore
|
|
from duck_core.experience.recorder import ExperienceRecorder
|
|
from duck_core.memory.policy import MemoryPolicy
|
|
from duck_core.memory.store import MemoryStore
|
|
from duck_core.model_client import ModelClient, ModelResponse
|
|
from duck_core.runtime_loop import RuntimeLoop
|
|
from duck_core.tasks.store import TaskStore
|
|
|
|
|
|
@pytest.fixture
|
|
def task_store(tmp_path):
|
|
store = TaskStore(str(tmp_path / "duck.sqlite3"))
|
|
return store
|
|
|
|
|
|
@pytest.fixture
|
|
def event_store(tmp_path):
|
|
store = EventStore(str(tmp_path / "duck.sqlite3"))
|
|
return store
|
|
|
|
|
|
@pytest.fixture
|
|
def memory_store(tmp_path):
|
|
store = MemoryStore(str(tmp_path / "duck.sqlite3"))
|
|
return store
|
|
|
|
|
|
@pytest.fixture
|
|
def experience_recorder(tmp_path):
|
|
recorder = ExperienceRecorder(str(tmp_path / "duck.sqlite3"))
|
|
return recorder
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_model_client():
|
|
client = AsyncMock(spec=ModelClient)
|
|
client.chat = AsyncMock(
|
|
side_effect=[
|
|
# First call: action role — return empty actions
|
|
ModelResponse(
|
|
role="action",
|
|
model="local-main",
|
|
content=json.dumps({
|
|
"kind": "action_directive",
|
|
"intent": "answer directly",
|
|
"risk_level": "none",
|
|
"actions": [],
|
|
}),
|
|
reasoning_content=None,
|
|
raw={},
|
|
latency_ms=1.0,
|
|
),
|
|
# Second call: thinker role — final answer
|
|
ModelResponse(
|
|
role="thinker",
|
|
model="local-main",
|
|
content="DuckLM is a local cognitive runtime.",
|
|
reasoning_content=None,
|
|
raw={},
|
|
latency_ms=1.0,
|
|
),
|
|
# Third call: memory_policy role
|
|
ModelResponse(
|
|
role="critic",
|
|
model="local-main",
|
|
content=json.dumps({
|
|
"should_store": False,
|
|
"memory_type": "event",
|
|
"summary": "Routine answer, nothing to remember.",
|
|
"importance": 0.1,
|
|
"scope": "workspace",
|
|
"metadata": {},
|
|
}),
|
|
reasoning_content=None,
|
|
raw={},
|
|
latency_ms=1.0,
|
|
),
|
|
# Fourth call: critic role (reflection)
|
|
ModelResponse(
|
|
role="critic",
|
|
model="local-main",
|
|
content="Task completed successfully. No issues found. Reusable lesson: direct answers work well for simple queries.",
|
|
reasoning_content=None,
|
|
raw={},
|
|
latency_ms=1.0,
|
|
),
|
|
]
|
|
)
|
|
return client
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_reflection_is_called_after_task_completion(
|
|
task_store, event_store, memory_store, experience_recorder, mock_model_client
|
|
):
|
|
policy = MemoryPolicy(model_client=mock_model_client)
|
|
runtime = RuntimeLoop(
|
|
task_store=task_store,
|
|
event_store=event_store,
|
|
model_client=mock_model_client,
|
|
memory_policy=policy,
|
|
memory_store=memory_store,
|
|
experience_recorder=experience_recorder,
|
|
)
|
|
|
|
result = await runtime.run_chat("What is DuckLM?", workspace="/tmp/test")
|
|
|
|
assert result.status == "completed"
|
|
assert "DuckLM" in result.final_response
|
|
|
|
# Check that reflection was called — experience record created
|
|
records = await experience_recorder.list_records()
|
|
assert len(records) == 1
|
|
assert records[0].task_id == result.task_id
|
|
assert "completed successfully" in records[0].reusable_lesson
|
|
|
|
# Check that reflection_completed event was recorded
|
|
events = await event_store.list_events(result.task_id)
|
|
event_types = [e.event_type for e in events]
|
|
assert "reflection_completed" in event_types
|
|
assert records[0].task_id == result.task_id
|
|
assert "completed successfully" in records[0].reusable_lesson
|
|
|
|
# Check that reflection_completed event was recorded
|
|
events = await event_store.list_events(result.task_id)
|
|
event_types = [e.event_type for e in events]
|
|
assert "reflection_completed" in event_types
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_reflection_failure_does_not_break_task(
|
|
task_store, event_store, memory_store, experience_recorder
|
|
):
|
|
"""If reflection fails, the task should still complete successfully."""
|
|
client = AsyncMock(spec=ModelClient)
|
|
client.chat = AsyncMock(
|
|
side_effect=[
|
|
# Action: empty
|
|
ModelResponse(
|
|
role="action",
|
|
model="local-main",
|
|
content=json.dumps({
|
|
"kind": "action_directive",
|
|
"intent": "answer",
|
|
"risk_level": "none",
|
|
"actions": [],
|
|
}),
|
|
reasoning_content=None,
|
|
raw={},
|
|
latency_ms=1.0,
|
|
),
|
|
# Thinker: answer
|
|
ModelResponse(
|
|
role="thinker",
|
|
model="local-main",
|
|
content="Answer.",
|
|
reasoning_content=None,
|
|
raw={},
|
|
latency_ms=1.0,
|
|
),
|
|
# Memory policy
|
|
ModelResponse(
|
|
role="critic",
|
|
model="local-main",
|
|
content=json.dumps({
|
|
"should_store": False,
|
|
"memory_type": "event",
|
|
"summary": "Routine.",
|
|
"importance": 0.1,
|
|
"scope": "workspace",
|
|
"metadata": {},
|
|
}),
|
|
reasoning_content=None,
|
|
raw={},
|
|
latency_ms=1.0,
|
|
),
|
|
# Critic (reflection) — raises exception
|
|
ConnectionError("LLM unavailable"),
|
|
]
|
|
)
|
|
|
|
policy = MemoryPolicy(model_client=client)
|
|
runtime = RuntimeLoop(
|
|
task_store=task_store,
|
|
event_store=event_store,
|
|
model_client=client,
|
|
memory_policy=policy,
|
|
memory_store=memory_store,
|
|
experience_recorder=experience_recorder,
|
|
)
|
|
|
|
result = await runtime.run_chat("test", workspace="/tmp/test")
|
|
|
|
# Task should still complete
|
|
assert result.status == "completed"
|
|
|
|
# Reflection failure event should be recorded
|
|
events = await event_store.list_events(result.task_id)
|
|
event_types = [e.event_type for e in events]
|
|
assert "reflection_failed" in event_types
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_reflection_not_called_when_disabled(
|
|
task_store, event_store, memory_store, mock_model_client
|
|
):
|
|
"""When reflect=False, no reflection should be called."""
|
|
policy = MemoryPolicy(model_client=mock_model_client)
|
|
runtime = RuntimeLoop(
|
|
task_store=task_store,
|
|
event_store=event_store,
|
|
model_client=mock_model_client,
|
|
memory_policy=policy,
|
|
memory_store=memory_store,
|
|
)
|
|
|
|
result = await runtime.run_chat("What is DuckLM?", workspace="/tmp/test", reflect=False)
|
|
|
|
assert result.status == "completed"
|
|
# mock_model_client.chat should have been called 3 times (action, thinker, memory_policy)
|
|
# NOT 4 times (no critic/reflection call)
|
|
assert mock_model_client.chat.call_count == 3
|