ducklm/tests/smoke/test_runtime_tools.py

import json

import pytest

from duck_core.events.store import EventStore
from duck_core.model_client import ModelResponse
from duck_core.approvals.service import ApprovalService
from duck_core.runtime_loop import RuntimeLoop
from duck_core.tasks.store import TaskStore


class FakeToolModelClient:
    async def chat(self, role, messages):
        if role == "action":
            return ModelResponse(
                role=role,
                model="local-main",
                content=json.dumps(
                    {
                        "kind": "action_directive",
                        "intent": "read requested file",
                        "risk_level": "low",
                        "actions": [
                            {
                                "tool": "file_read",
                                "args": {"path": "note.txt"},
                                "reason": "User asked for file contents",
                            }
                        ],
                    }
                ),
                reasoning_content=None,
                raw={},
                latency_ms=5.0,
            )
        assert role == "thinker"
        assert any("tool_observations" in message["content"] for message in messages)
        return ModelResponse(
            role=role,
            model="local-main",
            content="The file says: hello from tool",
            reasoning_content="used file_read",
            raw={},
            latency_ms=12.0,
        )


@pytest.mark.asyncio
async def test_runtime_executes_action_directive_tool_and_finishes_with_observation(tmp_path):
    (tmp_path / "note.txt").write_text("hello from tool")
    db_path = str(tmp_path / "duck.sqlite3")
    task_store = TaskStore(db_path)
    event_store = EventStore(db_path)
    loop = RuntimeLoop(task_store, event_store, FakeToolModelClient())

    result = await loop.run_chat("read note.txt", str(tmp_path), debug=True)
    events = await event_store.list_events(result.task_id)
    event_types = [event.event_type for event in events]
    tool_finished = next(event for event in events if event.event_type == "tool_call_finished")

    assert result.status == "completed"
    assert result.final_response == "The file says: hello from tool"
    assert "action_directive" in event_types
    assert "tool_call_started" in event_types
    assert tool_finished.payload["tool"] == "file_read"
    assert tool_finished.payload["result"]["ok"] is True
    assert tool_finished.payload["result"]["output"] == "hello from tool"


class FakeApprovalModelClient:
    async def chat(self, role, messages):
        if role == "action":
            return ModelResponse(
                role=role,
                model="local-main",
                content=json.dumps(
                    {
                        "kind": "action_directive",
                        "intent": "run command",
                        "risk_level": "medium",
                        "actions": [
                            {
                                "tool": "shell_exec_safe",
                                "args": {"command": "uname -a"},
                                "reason": "User requested system information",
                            }
                        ],
                    }
                ),
                reasoning_content=None,
                raw={},
                latency_ms=5.0,
            )
        raise AssertionError("thinker must not be called while approval is pending")


@pytest.mark.asyncio
async def test_runtime_creates_pending_approval_when_tool_requires_it(tmp_path):
    db_path = str(tmp_path / "duck.sqlite3")
    task_store = TaskStore(db_path)
    event_store = EventStore(db_path)
    approvals = ApprovalService(db_path)
    loop = RuntimeLoop(task_store, event_store, FakeApprovalModelClient(), approval_service=approvals)

    result = await loop.run_chat("run uname", str(tmp_path), debug=True)
    pending = await approvals.pending()
    events = await event_store.list_events(result.task_id)

    assert result.status == "waiting_for_approval"
    assert pending[0].task_id == result.task_id
    assert pending[0].normalized_action["tool"] == "shell_exec_safe"
    assert any(event.event_type == "tool_approval_requested" for event in events)