ducklm/tests/smoke/test_memory_policy.py

230 lines
7.9 KiB
Python

import json
from unittest.mock import AsyncMock
import pytest
from duck_core.memory.policy import MemoryPolicy
from duck_core.memory.store import MemoryStore
from duck_core.model_client import ModelClient, ModelResponse
from duck_core.events.store import EventStore
from duck_core.runtime_loop import RuntimeLoop
from duck_core.tasks.store import TaskStore
@pytest.fixture
def mock_model_client():
client = AsyncMock(spec=ModelClient)
client.chat = AsyncMock(
return_value=ModelResponse(
role="critic",
model="local-main",
content=json.dumps({
"should_store": True,
"memory_type": "preference",
"summary": "User prefers concise Russian answers.",
"importance": 0.9,
"scope": "global",
"metadata": {"source": "conversation"},
}),
reasoning_content=None,
raw={},
latency_ms=42.0,
)
)
return client
@pytest.mark.asyncio
async def test_memory_policy_stub_returns_should_store_false():
policy = MemoryPolicy()
decision = await policy.classify("some summary", "task_123")
assert decision.should_store is False
assert decision.memory_type == "event"
assert decision.importance == 0.0
assert decision.metadata["source"] == "stub_policy"
@pytest.mark.asyncio
async def test_llm_memory_policy_classifies_and_stores(mock_model_client):
policy = MemoryPolicy(model_client=mock_model_client, role="memory_policy")
decision = await policy.classify(
"User said they prefer short answers in Russian.", "task_456"
)
assert decision.should_store is True
assert decision.memory_type == "preference"
assert decision.importance == 0.9
assert decision.summary == "User prefers concise Russian answers."
mock_model_client.chat.assert_called_once()
call_args = mock_model_client.chat.call_args
# ModelClient.chat(role, messages, ...) — positional args
assert call_args.args[0] == "memory_policy"
messages = call_args.args[1]
assert len(messages) == 1
assert messages[0]["role"] == "user"
assert "User said they prefer short answers" in messages[0]["content"]
@pytest.mark.asyncio
async def test_llm_memory_policy_handles_non_storable(mock_model_client):
mock_model_client.chat.return_value = ModelResponse(
role="critic",
model="local-main",
content=json.dumps({
"should_store": False,
"memory_type": "event",
"summary": "Routine tool call, nothing to remember.",
"importance": 0.1,
"scope": "workspace",
"metadata": {},
}),
reasoning_content=None,
raw={},
latency_ms=30.0,
)
policy = MemoryPolicy(model_client=mock_model_client)
decision = await policy.classify("Ran ls -la in workspace.", "task_789")
assert decision.should_store is False
assert decision.importance == 0.1
@pytest.mark.asyncio
async def test_llm_memory_policy_uses_response_format(mock_model_client):
policy = MemoryPolicy(model_client=mock_model_client)
await policy.classify("test summary", "task_1")
call_args = mock_model_client.chat.call_args
assert call_args.kwargs["response_format"]["type"] == "json_schema"
@pytest.mark.asyncio
async def test_llm_memory_policy_invalid_json_falls_back(mock_model_client):
mock_model_client.chat.return_value = ModelResponse(
role="critic",
model="local-main",
content="not valid json {{{",
reasoning_content=None,
raw={},
latency_ms=10.0,
)
policy = MemoryPolicy(model_client=mock_model_client)
decision = await policy.classify("some summary", "task_x")
assert decision.should_store is False
assert decision.metadata["source"] == "llm_policy_fallback"
@pytest.mark.asyncio
async def test_llm_memory_policy_missing_fields_falls_back(mock_model_client):
mock_model_client.chat.return_value = ModelResponse(
role="critic",
model="local-main",
content=json.dumps({"should_store": True}),
reasoning_content=None,
raw={},
latency_ms=10.0,
)
policy = MemoryPolicy(model_client=mock_model_client)
decision = await policy.classify("some summary", "task_y")
assert decision.should_store is False
assert decision.metadata["source"] == "llm_policy_fallback"
@pytest.mark.asyncio
async def test_llm_memory_policy_schema_violation_falls_back(mock_model_client):
mock_model_client.chat.return_value = ModelResponse(
role="critic",
model="local-main",
content=json.dumps({
"should_store": True,
"memory_type": "secret",
"summary": "Store this invalid memory type.",
"importance": 1.5,
"scope": "everywhere",
"metadata": {},
}),
reasoning_content=None,
raw={},
latency_ms=10.0,
)
policy = MemoryPolicy(model_client=mock_model_client)
decision = await policy.classify("some summary", "task_z")
assert decision.should_store is False
assert decision.metadata["source"] == "llm_policy_fallback"
assert "schema violation" in decision.metadata["error"]
class FakeRuntimeMemoryModelClient:
def __init__(self):
self.memory_policy_prompt = ""
async def chat(self, role, messages, **kwargs):
if role == "action":
return ModelResponse(
role=role,
model="local-main",
content=json.dumps({
"kind": "action_directive",
"intent": "direct answer",
"risk_level": "none",
"actions": [],
}),
reasoning_content=None,
raw={},
latency_ms=1.0,
)
if role == "thinker":
return ModelResponse(
role=role,
model="local-main",
content="Приятно познакомиться. Чем помочь?",
reasoning_content=None,
raw={},
latency_ms=1.0,
)
if role == "memory_policy":
self.memory_policy_prompt = messages[0]["content"]
should_store = "Меня зовут Владимир" in self.memory_policy_prompt
return ModelResponse(
role=role,
model="local-main",
content=json.dumps({
"should_store": should_store,
"memory_type": "fact",
"summary": "User's name is Vladimir.",
"importance": 0.7,
"scope": "global",
"metadata": {},
}),
reasoning_content=None,
raw={},
latency_ms=1.0,
)
raise AssertionError(f"unexpected role: {role}")
@pytest.mark.asyncio
async def test_runtime_memory_policy_sees_user_message_not_only_final_answer(tmp_path):
db_path = str(tmp_path / "duck.sqlite3")
task_store = TaskStore(db_path)
event_store = EventStore(db_path)
memory_store = MemoryStore(db_path)
model_client = FakeRuntimeMemoryModelClient()
loop = RuntimeLoop(
task_store,
event_store,
model_client,
memory_store=memory_store,
)
result = await loop.run_chat(
"Меня зовут Владимир. Я работаю системным администратором.",
str(tmp_path),
debug=True,
reflect=False,
)
memories = await memory_store.list(workspace=str(tmp_path))
assert result.status == "completed"
assert "User message:" in model_client.memory_policy_prompt
assert "Меня зовут Владимир" in model_client.memory_policy_prompt
assert "Assistant final response:" in model_client.memory_policy_prompt
assert memories[0].text == "User's name is Vladimir."