import json from unittest.mock import AsyncMock import pytest from duck_core.events.store import EventStore from duck_core.experience.recorder import ExperienceRecorder from duck_core.memory.policy import MemoryPolicy from duck_core.memory.store import MemoryStore from duck_core.model_client import ModelClient, ModelResponse from duck_core.runtime_loop import RuntimeLoop from duck_core.tasks.store import TaskStore @pytest.fixture def task_store(tmp_path): store = TaskStore(str(tmp_path / "duck.sqlite3")) return store @pytest.fixture def event_store(tmp_path): store = EventStore(str(tmp_path / "duck.sqlite3")) return store @pytest.fixture def memory_store(tmp_path): store = MemoryStore(str(tmp_path / "duck.sqlite3")) return store @pytest.fixture def experience_recorder(tmp_path): recorder = ExperienceRecorder(str(tmp_path / "duck.sqlite3")) return recorder @pytest.fixture def mock_model_client(): client = AsyncMock(spec=ModelClient) client.chat = AsyncMock( side_effect=[ # First call: action role — return empty actions ModelResponse( role="action", model="local-main", content=json.dumps({ "kind": "action_directive", "intent": "answer directly", "risk_level": "none", "actions": [], }), reasoning_content=None, raw={}, latency_ms=1.0, ), # Second call: thinker role — final answer ModelResponse( role="thinker", model="local-main", content="DuckLM is a local cognitive runtime.", reasoning_content=None, raw={}, latency_ms=1.0, ), # Third call: memory_policy role ModelResponse( role="critic", model="local-main", content=json.dumps({ "should_store": False, "memory_type": "event", "summary": "Routine answer, nothing to remember.", "importance": 0.1, "scope": "workspace", "metadata": {}, }), reasoning_content=None, raw={}, latency_ms=1.0, ), # Fourth call: critic role (reflection) ModelResponse( role="critic", model="local-main", content="Task completed successfully. No issues found. Reusable lesson: direct answers work well for simple queries.", reasoning_content=None, raw={}, latency_ms=1.0, ), ] ) return client @pytest.mark.asyncio async def test_reflection_is_called_after_task_completion( task_store, event_store, memory_store, experience_recorder, mock_model_client ): policy = MemoryPolicy(model_client=mock_model_client) runtime = RuntimeLoop( task_store=task_store, event_store=event_store, model_client=mock_model_client, memory_policy=policy, memory_store=memory_store, experience_recorder=experience_recorder, ) result = await runtime.run_chat("What is DuckLM?", workspace="/tmp/test") assert result.status == "completed" assert "DuckLM" in result.final_response # Check that reflection was called — experience record created records = await experience_recorder.list_records() assert len(records) == 1 assert records[0].task_id == result.task_id assert "completed successfully" in records[0].reusable_lesson # Check that reflection_completed event was recorded events = await event_store.list_events(result.task_id) event_types = [e.event_type for e in events] assert "reflection_completed" in event_types assert records[0].task_id == result.task_id assert "completed successfully" in records[0].reusable_lesson # Check that reflection_completed event was recorded events = await event_store.list_events(result.task_id) event_types = [e.event_type for e in events] assert "reflection_completed" in event_types @pytest.mark.asyncio async def test_reflection_failure_does_not_break_task( task_store, event_store, memory_store, experience_recorder ): """If reflection fails, the task should still complete successfully.""" client = AsyncMock(spec=ModelClient) client.chat = AsyncMock( side_effect=[ # Action: empty ModelResponse( role="action", model="local-main", content=json.dumps({ "kind": "action_directive", "intent": "answer", "risk_level": "none", "actions": [], }), reasoning_content=None, raw={}, latency_ms=1.0, ), # Thinker: answer ModelResponse( role="thinker", model="local-main", content="Answer.", reasoning_content=None, raw={}, latency_ms=1.0, ), # Memory policy ModelResponse( role="critic", model="local-main", content=json.dumps({ "should_store": False, "memory_type": "event", "summary": "Routine.", "importance": 0.1, "scope": "workspace", "metadata": {}, }), reasoning_content=None, raw={}, latency_ms=1.0, ), # Critic (reflection) — raises exception ConnectionError("LLM unavailable"), ] ) policy = MemoryPolicy(model_client=client) runtime = RuntimeLoop( task_store=task_store, event_store=event_store, model_client=client, memory_policy=policy, memory_store=memory_store, experience_recorder=experience_recorder, ) result = await runtime.run_chat("test", workspace="/tmp/test") # Task should still complete assert result.status == "completed" # Reflection failure event should be recorded events = await event_store.list_events(result.task_id) event_types = [e.event_type for e in events] assert "reflection_failed" in event_types @pytest.mark.asyncio async def test_reflection_not_called_when_disabled( task_store, event_store, memory_store, mock_model_client ): """When reflect=False, no reflection should be called.""" policy = MemoryPolicy(model_client=mock_model_client) runtime = RuntimeLoop( task_store=task_store, event_store=event_store, model_client=mock_model_client, memory_policy=policy, memory_store=memory_store, ) result = await runtime.run_chat("What is DuckLM?", workspace="/tmp/test", reflect=False) assert result.status == "completed" # mock_model_client.chat should have been called 3 times (action, thinker, memory_policy) # NOT 4 times (no critic/reflection call) assert mock_model_client.chat.call_count == 3