230 lines
7.9 KiB
Python
230 lines
7.9 KiB
Python
import json
|
|
from unittest.mock import AsyncMock
|
|
|
|
import pytest
|
|
|
|
from duck_core.memory.policy import MemoryPolicy
|
|
from duck_core.memory.store import MemoryStore
|
|
from duck_core.model_client import ModelClient, ModelResponse
|
|
from duck_core.events.store import EventStore
|
|
from duck_core.runtime_loop import RuntimeLoop
|
|
from duck_core.tasks.store import TaskStore
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_model_client():
|
|
client = AsyncMock(spec=ModelClient)
|
|
client.chat = AsyncMock(
|
|
return_value=ModelResponse(
|
|
role="critic",
|
|
model="local-main",
|
|
content=json.dumps({
|
|
"should_store": True,
|
|
"memory_type": "preference",
|
|
"summary": "User prefers concise Russian answers.",
|
|
"importance": 0.9,
|
|
"scope": "global",
|
|
"metadata": {"source": "conversation"},
|
|
}),
|
|
reasoning_content=None,
|
|
raw={},
|
|
latency_ms=42.0,
|
|
)
|
|
)
|
|
return client
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_memory_policy_stub_returns_should_store_false():
|
|
policy = MemoryPolicy()
|
|
decision = await policy.classify("some summary", "task_123")
|
|
assert decision.should_store is False
|
|
assert decision.memory_type == "event"
|
|
assert decision.importance == 0.0
|
|
assert decision.metadata["source"] == "stub_policy"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_llm_memory_policy_classifies_and_stores(mock_model_client):
|
|
policy = MemoryPolicy(model_client=mock_model_client, role="memory_policy")
|
|
decision = await policy.classify(
|
|
"User said they prefer short answers in Russian.", "task_456"
|
|
)
|
|
assert decision.should_store is True
|
|
assert decision.memory_type == "preference"
|
|
assert decision.importance == 0.9
|
|
assert decision.summary == "User prefers concise Russian answers."
|
|
mock_model_client.chat.assert_called_once()
|
|
call_args = mock_model_client.chat.call_args
|
|
# ModelClient.chat(role, messages, ...) — positional args
|
|
assert call_args.args[0] == "memory_policy"
|
|
messages = call_args.args[1]
|
|
assert len(messages) == 1
|
|
assert messages[0]["role"] == "user"
|
|
assert "User said they prefer short answers" in messages[0]["content"]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_llm_memory_policy_handles_non_storable(mock_model_client):
|
|
mock_model_client.chat.return_value = ModelResponse(
|
|
role="critic",
|
|
model="local-main",
|
|
content=json.dumps({
|
|
"should_store": False,
|
|
"memory_type": "event",
|
|
"summary": "Routine tool call, nothing to remember.",
|
|
"importance": 0.1,
|
|
"scope": "workspace",
|
|
"metadata": {},
|
|
}),
|
|
reasoning_content=None,
|
|
raw={},
|
|
latency_ms=30.0,
|
|
)
|
|
policy = MemoryPolicy(model_client=mock_model_client)
|
|
decision = await policy.classify("Ran ls -la in workspace.", "task_789")
|
|
assert decision.should_store is False
|
|
assert decision.importance == 0.1
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_llm_memory_policy_uses_response_format(mock_model_client):
|
|
policy = MemoryPolicy(model_client=mock_model_client)
|
|
await policy.classify("test summary", "task_1")
|
|
call_args = mock_model_client.chat.call_args
|
|
assert call_args.kwargs["response_format"]["type"] == "json_schema"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_llm_memory_policy_invalid_json_falls_back(mock_model_client):
|
|
mock_model_client.chat.return_value = ModelResponse(
|
|
role="critic",
|
|
model="local-main",
|
|
content="not valid json {{{",
|
|
reasoning_content=None,
|
|
raw={},
|
|
latency_ms=10.0,
|
|
)
|
|
policy = MemoryPolicy(model_client=mock_model_client)
|
|
decision = await policy.classify("some summary", "task_x")
|
|
assert decision.should_store is False
|
|
assert decision.metadata["source"] == "llm_policy_fallback"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_llm_memory_policy_missing_fields_falls_back(mock_model_client):
|
|
mock_model_client.chat.return_value = ModelResponse(
|
|
role="critic",
|
|
model="local-main",
|
|
content=json.dumps({"should_store": True}),
|
|
reasoning_content=None,
|
|
raw={},
|
|
latency_ms=10.0,
|
|
)
|
|
policy = MemoryPolicy(model_client=mock_model_client)
|
|
decision = await policy.classify("some summary", "task_y")
|
|
assert decision.should_store is False
|
|
assert decision.metadata["source"] == "llm_policy_fallback"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_llm_memory_policy_schema_violation_falls_back(mock_model_client):
|
|
mock_model_client.chat.return_value = ModelResponse(
|
|
role="critic",
|
|
model="local-main",
|
|
content=json.dumps({
|
|
"should_store": True,
|
|
"memory_type": "secret",
|
|
"summary": "Store this invalid memory type.",
|
|
"importance": 1.5,
|
|
"scope": "everywhere",
|
|
"metadata": {},
|
|
}),
|
|
reasoning_content=None,
|
|
raw={},
|
|
latency_ms=10.0,
|
|
)
|
|
policy = MemoryPolicy(model_client=mock_model_client)
|
|
decision = await policy.classify("some summary", "task_z")
|
|
assert decision.should_store is False
|
|
assert decision.metadata["source"] == "llm_policy_fallback"
|
|
assert "schema violation" in decision.metadata["error"]
|
|
|
|
|
|
class FakeRuntimeMemoryModelClient:
|
|
def __init__(self):
|
|
self.memory_policy_prompt = ""
|
|
|
|
async def chat(self, role, messages, **kwargs):
|
|
if role == "action":
|
|
return ModelResponse(
|
|
role=role,
|
|
model="local-main",
|
|
content=json.dumps({
|
|
"kind": "action_directive",
|
|
"intent": "direct answer",
|
|
"risk_level": "none",
|
|
"actions": [],
|
|
}),
|
|
reasoning_content=None,
|
|
raw={},
|
|
latency_ms=1.0,
|
|
)
|
|
if role == "thinker":
|
|
return ModelResponse(
|
|
role=role,
|
|
model="local-main",
|
|
content="Приятно познакомиться. Чем помочь?",
|
|
reasoning_content=None,
|
|
raw={},
|
|
latency_ms=1.0,
|
|
)
|
|
if role == "memory_policy":
|
|
self.memory_policy_prompt = messages[0]["content"]
|
|
should_store = "Меня зовут Владимир" in self.memory_policy_prompt
|
|
return ModelResponse(
|
|
role=role,
|
|
model="local-main",
|
|
content=json.dumps({
|
|
"should_store": should_store,
|
|
"memory_type": "fact",
|
|
"summary": "User's name is Vladimir.",
|
|
"importance": 0.7,
|
|
"scope": "global",
|
|
"metadata": {},
|
|
}),
|
|
reasoning_content=None,
|
|
raw={},
|
|
latency_ms=1.0,
|
|
)
|
|
raise AssertionError(f"unexpected role: {role}")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runtime_memory_policy_sees_user_message_not_only_final_answer(tmp_path):
|
|
db_path = str(tmp_path / "duck.sqlite3")
|
|
task_store = TaskStore(db_path)
|
|
event_store = EventStore(db_path)
|
|
memory_store = MemoryStore(db_path)
|
|
model_client = FakeRuntimeMemoryModelClient()
|
|
loop = RuntimeLoop(
|
|
task_store,
|
|
event_store,
|
|
model_client,
|
|
memory_store=memory_store,
|
|
)
|
|
|
|
result = await loop.run_chat(
|
|
"Меня зовут Владимир. Я работаю системным администратором.",
|
|
str(tmp_path),
|
|
debug=True,
|
|
reflect=False,
|
|
)
|
|
memories = await memory_store.list(workspace=str(tmp_path))
|
|
|
|
assert result.status == "completed"
|
|
assert "User message:" in model_client.memory_policy_prompt
|
|
assert "Меня зовут Владимир" in model_client.memory_policy_prompt
|
|
assert "Assistant final response:" in model_client.memory_policy_prompt
|
|
assert memories[0].text == "User's name is Vladimir."
|