ducklm/tests/test_tools_flow.py

import json
from pathlib import Path

from app.core.contracts import ExecutionDirective, UserTask
from app.core.contracts import PermissionDecision
from app.core.contracts import ToolResult
from app.events.event_types import TOOL_OUTPUT_CHUNK
from app.runtime.runtime_controller import RuntimeController
from app.tools.sandbox import ToolSandbox


def _write_config_tree(base_dir: Path) -> None:
    (base_dir / "config").mkdir()
    (base_dir / "data" / "events").mkdir(parents=True, exist_ok=True)
    (base_dir / "data" / "state").mkdir(parents=True, exist_ok=True)
    (base_dir / "data" / "permissions").mkdir(parents=True, exist_ok=True)
    (base_dir / "models").mkdir(exist_ok=True)

    configs = {
        "models.json": {
            "orchestrator_path": "models/llama.gguf",
            "coder_path": "models/xcoder.gguf",
            "critic_path": "models/gemma.gguf",
            "embeddings_path": "models/all-MiniLM-L6-v2",
            "inference": {},
        },
        "prompts.json": {
            "orchestration_prompt": "",
            "planning_prompt": "",
            "coder_prompt": "",
            "critic_prompt": "",
        },
        "permissions.json": {
            "settings": {
                "allow_caching": True,
                "cache_file": str(base_dir / "data/runtime/allowed_commands.json"),
                "normalize_commands": True,
                "split_chained": True
            },
            "command_categories": {
                "hard_stop": {
                    "commands": ["rm -rf /", "rm -rf /*", "dd if=/dev/zero of=/dev/sd*"]
                },
                "no_always": {
                    "allow_once": True,
                    "allow_always": False,
                    "commands": [
                        "rm -rf *", "rm -rf .*", "shutdown", "reboot", "halt",
                        "apt", "apt-get", "dpkg", "yum", "dnf", "pacman",
                        "systemctl stop", "systemctl start", "systemctl restart",
                        "service stop", "service start", "killall", "pkill -9"
                    ]
                },
                "normal": {
                    "allow_once": True,
                    "allow_always": True,
                    "commands": ["shell_exec", "file_write"]
                }
            },
            "path_settings": {
                "allow_read_outside": True,
                "allow_write_paths": [str(base_dir), "/tmp"],
                "require_confirmation_for_write": True,
                "require_confirmation_for_shell": True
            }
        },
        "runtime.json": {
            "step_timeout_ms": 5000,
            "task_timeout_ms": 30000,
            "planner_retry_limit": 1,
            "tool_retry_limit": 0,
            "replan_limit": 0,
            "max_execution_steps": 5,
            "retrieval_top_k": 3,
            "memory_thresholds": {},
            "critic_fallback_policy": "continue_without_critic",
            "checkpoint_policy": {"save_on_transition": True},
            "event_retention_policy": {"keep_all": True},
            "streaming_settings": {"enabled": True},
        },
    }
    for name, payload in configs.items():
        (base_dir / "config" / name).write_text(json.dumps(payload), encoding="utf-8")


def test_file_write_and_read_tool_flow(tmp_path: Path) -> None:
    _write_config_tree(tmp_path)
    controller = RuntimeController(base_dir=tmp_path)
    target = tmp_path / "notes" / "test.txt"

    write_result = controller.handle_task(
        UserTask(
            input="write a file",
            context={
                "requested_tool": "file_write",
                "tool_args": {"path": str(target), "content": "hello from ducklm"},
            },
        )
    )
    assert write_result["status"] == "completed"
    assert target.read_text(encoding="utf-8") == "hello from ducklm"

    read_result = controller.handle_task(
        UserTask(
            input="read the file",
            context={
                "requested_tool": "file_read",
                "tool_args": {"path": str(target)},
            },
        )
    )
    assert read_result["status"] == "completed"
    assert read_result["result"]["output"] == "hello from ducklm"


def test_shell_exec_requires_permission_for_dangerous_command(tmp_path: Path) -> None:
    _write_config_tree(tmp_path)
    controller = RuntimeController(base_dir=tmp_path)
    result = controller.handle_task(
        UserTask(
            input="run dangerous shell command",
            context={
                "requested_tool": "shell_exec",
                "tool_args": {"command": "rm -rf /tmp/nonexistent"},
            },
        )
    )
    # rm -rf /tmp/nonexistent is not hard_stop (only exact "rm -rf /" is)
    # but it matches "rm -rf *" in no_always category
    assert result["status"] == "awaiting_permission"
    assert "permission_request" in result["result"]


def test_shell_exec_allows_safe_command(tmp_path: Path) -> None:
    _write_config_tree(tmp_path)
    controller = RuntimeController(base_dir=tmp_path)
    result = controller.handle_task(
        UserTask(
            input="run safe shell command",
            context={
                "requested_tool": "shell_exec",
                "tool_args": {"command": "pwd"},
            },
        )
    )
    # Even safe commands require permission in the new permission model
    assert result["status"] == "awaiting_permission"
    assert "permission_request" in result["result"]
    # Grant permission and verify execution
    resumed = controller.resolve_permission(task_id=result["task_id"], decision="allow_once")
    assert resumed["status"] == "completed"
    assert str(tmp_path) in resumed["result"]["output"]


def test_shell_exec_publishes_output_chunks_before_completion(tmp_path: Path) -> None:
    _write_config_tree(tmp_path)
    controller = RuntimeController(base_dir=tmp_path)
    perm_override = PermissionDecision(
        action_type="shell_command",
        pattern="printf",
        decision="allow_always",
    )

    task = UserTask(
        input="stream shell output",
        context={
            "requested_tool": "shell_exec",
            "tool_args": {"command": "printf 'first\\n'; sleep 0.1; printf 'second\\n'"},
        },
    )
    result = controller.execution_engine.execute(
        task,
        ExecutionDirective(
            type="tool",
            payload={
                "tool": "shell_exec",
                "args": {"command": "printf 'first\\n'; sleep 0.1; printf 'second\\n'"},
            },
        ),
        permission_override=perm_override,
    )

    events = controller.event_bus.list_for_task(task.task_id)
    chunk_events = [event for event in events if event.type == TOOL_OUTPUT_CHUNK]
    completed_index = next(index for index, event in enumerate(events) if event.type == "tool_completed")
    first_chunk_index = next(index for index, event in enumerate(events) if event.type == TOOL_OUTPUT_CHUNK)
    assert result["status"] == "completed"
    assert [event.payload["chunk"] for event in chunk_events] == ["first\n", "second\n"]
    assert first_chunk_index < completed_index


def test_streaming_shell_uses_idle_timeout_not_step_timeout(tmp_path: Path) -> None:
    sandbox = ToolSandbox(
        allowed_root=tmp_path,
        timeout_ms=100,
        command_timeout_ms=2000,
        idle_timeout_ms=500,
    )
    chunks: list[str] = []

    result = sandbox.run_shell(
        command="printf 'first\\n'; sleep 0.2; printf 'second\\n'",
        output_callback=lambda _stream, chunk: chunks.append(chunk),
    )

    assert result.returncode == 0
    assert result.stdout == "first\nsecond\n"
    assert chunks == ["first\n", "second\n"]


def test_streaming_shell_timeout_kills_child_process_group(tmp_path: Path) -> None:
    marker = tmp_path / "child-survived"
    sandbox = ToolSandbox(
        allowed_root=tmp_path,
        timeout_ms=100,
        command_timeout_ms=100,
        idle_timeout_ms=1000,
    )

    result = sandbox.run_shell(
        command=f"sh -c 'sleep 1; touch {marker}'",
        output_callback=lambda _stream, _chunk: None,
    )

    assert result.returncode == -9
    assert not marker.exists()


class _RecoveryCritic:
    async def generate(self, prompt: str, max_tokens: int | None = None) -> str:
        return '{"action":"continue","reason":"No matches is acceptable information for this exploratory check."}'


def test_failed_shell_step_can_recover_and_continue(tmp_path: Path) -> None:
    _write_config_tree(tmp_path)
    controller = RuntimeController(base_dir=tmp_path)
    controller.execution_engine.set_critic(_RecoveryCritic())
    controller.execution_engine._recovery_limit = 1
    # Bypass permission check for this test — we're testing recovery, not permissions
    from app.core.contracts import PermissionDecision
    perm_override = PermissionDecision(
        action_type="shell_command",
        pattern="grep",
        decision="allow_always",
    )
    result = controller.execution_engine.execute(
        UserTask(
            input="run grep with no matches and recover",
        ),
        ExecutionDirective(
            type="plan",
            payload={
                "steps": [
                    {
                        "id": "1",
                        "tool": "shell_exec",
                        "args": {"command": "printf 'abc\\n' | grep definitely_missing"},
                        "depends_on": [],
                    }
                ]
            },
        ),
        permission_override=perm_override,
    )
    assert result["status"] == "completed"
    failed_result = result["result"]["step_results"][0]["result"]["result"]
    assert failed_result["metadata"]["exit_code"] == 1


def test_privilege_scope_failure_awaits_user_review_before_replan(tmp_path: Path) -> None:
    _write_config_tree(tmp_path)
    controller = RuntimeController(base_dir=tmp_path)
    task = UserTask(
        input="обнови систему",
        context={
            "requested_tool": "shell_exec",
            "tool_args": {"command": "sudo apt update && apt upgrade -y"},
        },
    )
    class FailingShellTool:
        def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
            return ToolResult(
                tool="shell_exec",
                ok=False,
                output="Error: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?",
                error="Command failed with exit code 100",
                metadata={"exit_code": 100},
            )

    controller.tool_registry._tools["shell_exec"] = FailingShellTool()

    initial = controller.handle_task(task)
    assert initial["status"] == "awaiting_permission"
    controller.resolve_permission(task_id=task.task_id, decision="allow_once")
    result = controller.resolve_secret(task_id=task.task_id, secret="secret")

    assert result["status"] == "awaiting_review"
    assert result["result"]["review"]["diagnosis"]["type"] == "privilege_scope_error"
    assert result["result"]["review"]["critic_assessment"]["classification"] == "model_planning_error"


def test_plan_pauses_on_privilege_scope_review_instead_of_completing(tmp_path: Path) -> None:
    _write_config_tree(tmp_path)
    controller = RuntimeController(base_dir=tmp_path)

    class FailingShellTool:
        def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
            return ToolResult(
                tool="shell_exec",
                ok=False,
                output="Error: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?",
                error="Command failed with exit code 100",
                metadata={"exit_code": 100},
            )

    controller.tool_registry._tools["shell_exec"] = FailingShellTool()
    result = controller.execution_engine.execute(
        UserTask(input="обнови систему"),
        ExecutionDirective(
            type="plan",
            payload={
                "steps": [
                    {
                        "id": "1",
                        "tool": "shell_exec",
                        "args": {"command": "sudo apt update && apt upgrade -y"},
                        "depends_on": [],
                    }
                ]
            },
        ),
        permission_override=PermissionDecision(
            action_type="shell_command",
            pattern="apt",
            decision="allow_once",
        ),
        secret_override="secret",
    )

    assert result["status"] == "awaiting_review"
    assert result["result"]["review"]["diagnosis"]["type"] == "privilege_scope_error"


def test_sudo_auth_failure_requests_secret_retry_not_review(tmp_path: Path) -> None:
    _write_config_tree(tmp_path)
    controller = RuntimeController(base_dir=tmp_path)

    class BadPasswordShellTool:
        def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
            return ToolResult(
                tool="shell_exec",
                ok=False,
                output="Sorry, try again.\nsudo: no password was provided\nsudo: 1 incorrect password attempt\n",
                error="Command failed with exit code 1",
                metadata={"exit_code": 1, "sudo_auth_failed": True},
            )

    controller.tool_registry._tools["shell_exec"] = BadPasswordShellTool()
    result = controller.execution_engine.execute(
        UserTask(input="обнови систему"),
        ExecutionDirective(
            type="plan",
            payload={
                "steps": [
                    {
                        "id": "1",
                        "tool": "shell_exec",
                        "args": {"command": "sudo apt update && apt upgrade -y"},
                        "depends_on": [],
                    }
                ]
            },
        ),
        permission_override=PermissionDecision(
            action_type="shell_command",
            pattern="apt",
            decision="allow_once",
        ),
        secret_override="wrong",
    )

    assert result["status"] == "awaiting_input"
    assert result["result"]["secret_request"]["kind"] == "sudo_password"
    assert result["result"]["secret_request"]["prompt"] == "Sudo password incorrect. Try again"
    assert result["result"]["attempt_failed"] is True


def test_runtime_keeps_secret_state_after_bad_sudo_password(tmp_path: Path) -> None:
    _write_config_tree(tmp_path)
    controller = RuntimeController(base_dir=tmp_path)

    class RetryPasswordShellTool:
        calls = 0

        def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
            self.calls += 1
            if self.calls == 1:
                return ToolResult(
                    tool="shell_exec",
                    ok=False,
                    output="Sorry, try again.\nsudo: no password was provided\nsudo: 1 incorrect password attempt\n",
                    error="Command failed with exit code 1",
                    metadata={"exit_code": 1, "sudo_auth_failed": True},
                )
            return ToolResult(
                tool="shell_exec",
                ok=True,
                output="root\n",
                metadata={"exit_code": 0},
            )

    controller.tool_registry._tools["shell_exec"] = RetryPasswordShellTool()
    task = UserTask(
        input="кто root",
        context={
            "requested_tool": "shell_exec",
            "tool_args": {"command": "sudo whoami"},
        },
    )
    initial = controller.handle_task(task)
    assert initial["status"] == "awaiting_permission"
    allowed = controller.resolve_permission(task_id=task.task_id, decision="allow_once")
    assert allowed["status"] == "awaiting_input"

    retry = controller.resolve_secret(task_id=task.task_id, secret="wrong")
    assert retry["status"] == "awaiting_input"
    assert retry["result"]["attempt_failed"] is True

    final = controller.resolve_secret(task_id=task.task_id, secret="correct")
    assert final["status"] == "completed"
    assert final["result"]["output"] == "root\n"


def test_permission_resolution_can_resume_task(tmp_path: Path) -> None:
    _write_config_tree(tmp_path)
    controller = RuntimeController(base_dir=tmp_path)
    initial = controller.handle_task(
        UserTask(
            input="запусти sudo apt update",
        )
    )
    assert initial["status"] == "awaiting_permission"
    resumed = controller.resolve_permission(task_id=initial["task_id"], decision="deny")
    assert resumed["status"] == "failed"
    assert resumed["result"]["error"] == "Permission denied by user."


def test_sudo_permission_resolution_requests_secret_input(tmp_path: Path) -> None:
    _write_config_tree(tmp_path)
    controller = RuntimeController(base_dir=tmp_path)
    initial = controller.handle_task(UserTask(input="запусти sudo apt update"))
    assert initial["status"] == "awaiting_permission"
    resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once")
    assert resumed["status"] == "awaiting_input"
    assert resumed["result"]["secret_request"]["kind"] == "sudo_password"


def test_implicit_sudo_command_requests_password(tmp_path: Path) -> None:
    """Commands like 'apt list --upgradable' that require sudo but don't start with 'sudo'
    should also trigger password request after permission is granted."""
    _write_config_tree(tmp_path)
    controller = RuntimeController(base_dir=tmp_path)
    # apt list --upgradable requires root but doesn't start with 'sudo'
    initial = controller.handle_task(
        UserTask(
            input="проверь обновления",
            context={
                "requested_tool": "shell_exec",
                "tool_args": {"command": "apt list --upgradable"},
            },
        )
    )
    assert initial["status"] == "awaiting_permission"
    # Grant permission — should request sudo password since apt requires root
    resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once")
    assert resumed["status"] == "awaiting_input"
    assert resumed["result"]["secret_request"]["kind"] == "sudo_password"


def test_secret_resolution_continues_after_pending_secret_saved(tmp_path: Path) -> None:
    _write_config_tree(tmp_path)
    controller = RuntimeController(base_dir=tmp_path)
    initial = controller.handle_task(UserTask(input="запусти sudo apt update"))
    assert initial["status"] == "awaiting_permission"
    resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once")
    assert resumed["status"] == "awaiting_input"
    final = controller.resolve_secret(task_id=initial["task_id"], secret="wrongpass")
    assert final["status"] in {"completed", "failed", "awaiting_input"}
    assert "error" in final["result"] or "output" in final["result"]