ducklm/tests/smoke/test_memory_policy.py

import json
from unittest.mock import AsyncMock

import pytest

from duck_core.memory.policy import MemoryPolicy
from duck_core.memory.store import MemoryStore
from duck_core.model_client import ModelClient, ModelResponse
from duck_core.events.store import EventStore
from duck_core.runtime_loop import RuntimeLoop
from duck_core.tasks.store import TaskStore


@pytest.fixture
def mock_model_client():
    client = AsyncMock(spec=ModelClient)
    client.chat = AsyncMock(
        return_value=ModelResponse(
            role="critic",
            model="local-main",
            content=json.dumps({
                "should_store": True,
                "memory_type": "preference",
                "summary": "User prefers concise Russian answers.",
                "importance": 0.9,
                "scope": "global",
                "metadata": {"source": "conversation"},
            }),
            reasoning_content=None,
            raw={},
            latency_ms=42.0,
        )
    )
    return client


@pytest.mark.asyncio
async def test_memory_policy_stub_returns_should_store_false():
    policy = MemoryPolicy()
    decision = await policy.classify("some summary", "task_123")
    assert decision.should_store is False
    assert decision.memory_type == "event"
    assert decision.importance == 0.0
    assert decision.metadata["source"] == "stub_policy"


@pytest.mark.asyncio
async def test_llm_memory_policy_classifies_and_stores(mock_model_client):
    policy = MemoryPolicy(model_client=mock_model_client, role="memory_policy")
    decision = await policy.classify(
        "User said they prefer short answers in Russian.", "task_456"
    )
    assert decision.should_store is True
    assert decision.memory_type == "preference"
    assert decision.importance == 0.9
    assert decision.summary == "User prefers concise Russian answers."
    mock_model_client.chat.assert_called_once()
    call_args = mock_model_client.chat.call_args
    # ModelClient.chat(role, messages, ...) — positional args
    assert call_args.args[0] == "memory_policy"
    messages = call_args.args[1]
    assert len(messages) == 1
    assert messages[0]["role"] == "user"
    assert "User said they prefer short answers" in messages[0]["content"]


@pytest.mark.asyncio
async def test_llm_memory_policy_handles_non_storable(mock_model_client):
    mock_model_client.chat.return_value = ModelResponse(
        role="critic",
        model="local-main",
        content=json.dumps({
            "should_store": False,
            "memory_type": "event",
            "summary": "Routine tool call, nothing to remember.",
            "importance": 0.1,
            "scope": "workspace",
            "metadata": {},
        }),
        reasoning_content=None,
        raw={},
        latency_ms=30.0,
    )
    policy = MemoryPolicy(model_client=mock_model_client)
    decision = await policy.classify("Ran ls -la in workspace.", "task_789")
    assert decision.should_store is False
    assert decision.importance == 0.1


@pytest.mark.asyncio
async def test_llm_memory_policy_uses_response_format(mock_model_client):
    policy = MemoryPolicy(model_client=mock_model_client)
    await policy.classify("test summary", "task_1")
    call_args = mock_model_client.chat.call_args
    assert call_args.kwargs["response_format"]["type"] == "json_schema"


@pytest.mark.asyncio
async def test_llm_memory_policy_invalid_json_falls_back(mock_model_client):
    mock_model_client.chat.return_value = ModelResponse(
        role="critic",
        model="local-main",
        content="not valid json {{{",
        reasoning_content=None,
        raw={},
        latency_ms=10.0,
    )
    policy = MemoryPolicy(model_client=mock_model_client)
    decision = await policy.classify("some summary", "task_x")
    assert decision.should_store is False
    assert decision.metadata["source"] == "llm_policy_fallback"


@pytest.mark.asyncio
async def test_llm_memory_policy_missing_fields_falls_back(mock_model_client):
    mock_model_client.chat.return_value = ModelResponse(
        role="critic",
        model="local-main",
        content=json.dumps({"should_store": True}),
        reasoning_content=None,
        raw={},
        latency_ms=10.0,
    )
    policy = MemoryPolicy(model_client=mock_model_client)
    decision = await policy.classify("some summary", "task_y")
    assert decision.should_store is False
    assert decision.metadata["source"] == "llm_policy_fallback"


@pytest.mark.asyncio
async def test_llm_memory_policy_schema_violation_falls_back(mock_model_client):
    mock_model_client.chat.return_value = ModelResponse(
        role="critic",
        model="local-main",
        content=json.dumps({
            "should_store": True,
            "memory_type": "secret",
            "summary": "Store this invalid memory type.",
            "importance": 1.5,
            "scope": "everywhere",
            "metadata": {},
        }),
        reasoning_content=None,
        raw={},
        latency_ms=10.0,
    )
    policy = MemoryPolicy(model_client=mock_model_client)
    decision = await policy.classify("some summary", "task_z")
    assert decision.should_store is False
    assert decision.metadata["source"] == "llm_policy_fallback"
    assert "schema violation" in decision.metadata["error"]


class FakeRuntimeMemoryModelClient:
    def __init__(self):
        self.memory_policy_prompt = ""

    async def chat(self, role, messages, **kwargs):
        if role == "action":
            return ModelResponse(
                role=role,
                model="local-main",
                content=json.dumps({
                    "kind": "action_directive",
                    "intent": "direct answer",
                    "risk_level": "none",
                    "actions": [],
                }),
                reasoning_content=None,
                raw={},
                latency_ms=1.0,
            )
        if role == "thinker":
            return ModelResponse(
                role=role,
                model="local-main",
                content="Приятно познакомиться. Чем помочь?",
                reasoning_content=None,
                raw={},
                latency_ms=1.0,
            )
        if role == "memory_policy":
            self.memory_policy_prompt = messages[0]["content"]
            should_store = "Меня зовут Владимир" in self.memory_policy_prompt
            return ModelResponse(
                role=role,
                model="local-main",
                content=json.dumps({
                    "should_store": should_store,
                    "memory_type": "fact",
                    "summary": "User's name is Vladimir.",
                    "importance": 0.7,
                    "scope": "global",
                    "metadata": {},
                }),
                reasoning_content=None,
                raw={},
                latency_ms=1.0,
            )
        raise AssertionError(f"unexpected role: {role}")


@pytest.mark.asyncio
async def test_runtime_memory_policy_sees_user_message_not_only_final_answer(tmp_path):
    db_path = str(tmp_path / "duck.sqlite3")
    task_store = TaskStore(db_path)
    event_store = EventStore(db_path)
    memory_store = MemoryStore(db_path)
    model_client = FakeRuntimeMemoryModelClient()
    loop = RuntimeLoop(
        task_store,
        event_store,
        model_client,
        memory_store=memory_store,
    )

    result = await loop.run_chat(
        "Меня зовут Владимир. Я работаю системным администратором.",
        str(tmp_path),
        debug=True,
        reflect=False,
    )
    memories = await memory_store.list(workspace=str(tmp_path))

    assert result.status == "completed"
    assert "User message:" in model_client.memory_policy_prompt
    assert "Меня зовут Владимир" in model_client.memory_policy_prompt
    assert "Assistant final response:" in model_client.memory_policy_prompt
    assert memories[0].text == "User's name is Vladimir."