Support sudo password flow for shell commands

2026-05-20 22:45:19 +08:00 · 2026-05-20 22:45:19 +08:00 · 45b70d2800
parent 12bed7b6ba
commit 45b70d2800
11 changed files with 546 additions and 87 deletions
--- a/duck_core/api.py
+++ b/duck_core/api.py
@ -41,6 +41,11 @@ class ContinueRequest(BaseModel):
    approval_id: str
 class PasswordRequest(BaseModel):
    approval_id: str
    password: str
 def create_app() -> FastAPI:
    get_settings.cache_clear()
    settings = get_settings()
@ -423,7 +428,7 @@ def create_app() -> FastAPI:
                )
                messages = runtime.context_builder.build_basic_messages(task)
                tool_observations = [tool_observation]
-                if approval.decision != "deny":
+                if approval.decision != "deny" and not has_password_request(tool_observations):
                    tool_observations = await runtime._run_action_loop(
                        task_id,
                        messages,
@ -432,6 +437,30 @@ def create_app() -> FastAPI:
                    )
                async for tool_event in emit_tool_events(task_id, continued_event.sequence):
                    yield tool_event
                if has_password_request(tool_observations):
                    await task_store.update_status(task_id, "waiting_for_password")
                    password_event = await event_store.append(
                        task_id,
                        "tool_password_requested",
                        {
                            "approval_id": body.approval_id,
                            "tool": approval.normalized_action.get("tool"),
                            "action": redacted_action(approval.normalized_action),
                            "reason": "Sudo password is required to continue.",
                        },
                    )
                    yield sse("tool_password_requested", password_event.model_dump())
                    yield sse(
                        "done",
                        {
                            "task_id": task_id,
                            "conversation_id": conversation_id,
                            "status": "waiting_for_password",
                            "final_response": "Waiting for sudo password.",
                            "reasoning_content": None,
                        },
                    )
                    return
                if any(observation.get("requires_approval") for observation in tool_observations):
                    await task_store.waiting_for_approval(task_id)
                    await event_store.append(
@ -538,6 +567,162 @@ def create_app() -> FastAPI:
        return StreamingResponse(generator(), media_type="text/event-stream")
    def has_password_request(observations: list[dict[str, Any]]) -> bool:
        return any(
            observation.get("result", {}).get("metadata", {}).get("requires_password")
            for observation in observations
        )
    def redacted_action(action: dict[str, Any]) -> dict[str, Any]:
        redacted = json.loads(json.dumps(action))
        args = redacted.get("args")
        if isinstance(args, dict):
            args.pop("_password", None)
        return redacted
    @app.post("/v1/tasks/{task_id}/password/stream")
    async def password_task_stream(task_id: str, body: PasswordRequest) -> StreamingResponse:
        task = await task_store.get_task(task_id)
        if task is None:
            raise HTTPException(status_code=404, detail="Task not found")
        approval = await approvals.get(body.approval_id)
        if approval is None or approval.task_id != task_id:
            raise HTTPException(status_code=404, detail="Approval not found for task")
        if approval.decision is None:
            raise HTTPException(status_code=409, detail="Approval is still pending")
        conversation_id = await conversations.get_conversation_id_for_task(task_id)
        async def generator():
            reasoning_parts: list[str] = []
            content_parts: list[str] = []
            try:
                await task_store.update_status(task_id, "running")
                continued_event = await event_store.append(
                    task_id,
                    "task_password_submitted",
                    {"approval_id": body.approval_id},
                )
                tool_observation = await runtime._run_approved_or_denied_action(
                    task_id,
                    approval.normalized_action,
                    approval.decision,
                    password=body.password,
                )
                messages = runtime.context_builder.build_basic_messages(task)
                tool_observations = [tool_observation]
                if not has_password_request(tool_observations):
                    tool_observations = await runtime._run_action_loop(
                        task_id,
                        messages,
                        task.workspace,
                        initial_observations=tool_observations,
                    )
                async for tool_event in emit_tool_events(task_id, continued_event.sequence):
                    yield tool_event
                if has_password_request(tool_observations):
                    await task_store.update_status(task_id, "waiting_for_password")
                    password_event = await event_store.append(
                        task_id,
                        "tool_password_requested",
                        {
                            "approval_id": body.approval_id,
                            "tool": approval.normalized_action.get("tool"),
                            "action": redacted_action(approval.normalized_action),
                            "reason": "Sudo password is required to continue.",
                        },
                    )
                    yield sse("tool_password_requested", password_event.model_dump())
                    yield sse(
                        "done",
                        {
                            "task_id": task_id,
                            "conversation_id": conversation_id,
                            "status": "waiting_for_password",
                            "final_response": "Waiting for sudo password.",
                            "reasoning_content": None,
                        },
                    )
                    return
                messages = [
                    *messages,
                    {
                        "role": "user",
                        "content": "tool_observations:\n"
                        + json.dumps(tool_observations, ensure_ascii=False, indent=2),
                    },
                ]
                await event_store.append(task_id, "model_call_started", {"role": "thinker"})
                async for chunk in model_client.stream_chat("thinker", messages):
                    delta = str(chunk.get("delta") or "")
                    if chunk.get("type") == "reasoning_delta":
                        reasoning_parts.append(delta)
                        yield sse("reasoning_delta", {"task_id": task_id, "delta": delta})
                    elif chunk.get("type") == "content_delta":
                        content_parts.append(delta)
                        yield sse("content_delta", {"task_id": task_id, "delta": delta})
                content = "".join(content_parts)
                reasoning_content = "".join(reasoning_parts) or None
                await event_store.append(
                    task_id,
                    "cognition_response",
                    {
                        "role": "thinker",
                        "content": content,
                        "reasoning_content": reasoning_content,
                    },
                )
                await event_store.append(
                    task_id,
                    "model_call_finished",
                    {
                        "role": "thinker",
                        "model": model_client.get_role_config("thinker").model,
                    },
                )
                await task_store.complete_task(task_id, content)
                if conversation_id:
                    await conversations.add_message(
                        conversation_id,
                        "assistant",
                        content,
                        reasoning_content=reasoning_content,
                        task_id=task_id,
                        status="completed",
                    )
                await event_store.append(
                    task_id,
                    "task_completed",
                    {
                        "final_response": content,
                        "reasoning_content": reasoning_content,
                    },
                )
                yield sse(
                    "done",
                    {
                        "task_id": task_id,
                        "conversation_id": conversation_id,
                        "status": "completed",
                        "final_response": content,
                        "reasoning_content": reasoning_content,
                    },
                )
            except Exception as exc:
                await task_store.fail_task(task_id, str(exc))
                await event_store.append(task_id, "task_failed", {"error": str(exc)})
                yield sse(
                    "error",
                    {
                        "task_id": task_id,
                        "status": "failed",
                        "error": str(exc),
                    },
                )
        return StreamingResponse(generator(), media_type="text/event-stream")
    @app.post("/v1/tasks/{task_id}/cancel")
    async def cancel_task(task_id: str) -> dict[str, str]:
        await task_store.cancel_task(task_id)
--- a/duck_core/runtime_loop.py
+++ b/duck_core/runtime_loop.py
@ -248,7 +248,11 @@ class RuntimeLoop:
            )
    async def _run_approved_or_denied_action(
-        self, task_id: str, action: dict[str, Any], decision: str
+        self,
        task_id: str,
        action: dict[str, Any],
        decision: str,
        password: str | None = None,
    ) -> dict[str, Any]:
        tool_name = str(action.get("tool", ""))
        index = await self._approval_action_index(task_id, action)
@ -260,7 +264,7 @@ class RuntimeLoop:
            )
        else:
            gateway = ToolGateway.default((await self.task_store.get_task(task_id)).workspace or ".")
-            result = await gateway.run_action(action, approved=True)
+            result = await gateway.run_action(action, approved=True, password=password)
        result_payload = result.model_dump()
        await self.event_store.append(
--- a/duck_core/tools/gateway.py
+++ b/duck_core/tools/gateway.py
@ -4,7 +4,6 @@ from duck_core.tools.base import Tool, ToolResult
 from duck_core.tools.file_read import FileReadTool
 from duck_core.tools.file_write import FileWriteTool
 from duck_core.tools.list_dir import ListDirTool
 from duck_core.tools.os_update_check import OsUpdateCheckTool
 from duck_core.tools.search_files import SearchFilesTool
 from duck_core.tools.shell_exec_safe import ShellExecSafeTool
@ -21,12 +20,13 @@ class ToolGateway:
                FileWriteTool(workspace),
                ListDirTool(workspace),
                SearchFilesTool(workspace),
                OsUpdateCheckTool(workspace),
                ShellExecSafeTool(workspace),
            ]
        )
-    async def run_action(self, action: dict[str, Any], approved: bool = False) -> ToolResult:
+    async def run_action(
        self, action: dict[str, Any], approved: bool = False, password: str | None = None
    ) -> ToolResult:
        tool_name = str(action.get("tool", ""))
        tool = self.tools.get(tool_name)
        if tool is None:
@ -36,4 +36,6 @@ class ToolGateway:
            return ToolResult(ok=False, error="Tool args must be an object")
        if approved:
            args = {**args, "_approved": True}
        if password is not None:
            args = {**args, "_password": password}
        return await tool.run(args)
--- a/duck_core/tools/os_update_check.py
+++ b/duck_core/tools/os_update_check.py
@ -1,58 +0,0 @@
 import shutil
 import subprocess
 from typing import Any
 from duck_core.tools.base import ToolResult
 class OsUpdateCheckTool:
    name = "os_update_check"
    risk_level = "low"
    def __init__(self, workspace: str, timeout_seconds: int = 60, max_lines: int = 300):
        self.workspace = workspace
        self.timeout_seconds = timeout_seconds
        self.max_lines = max_lines
    async def run(self, args: dict[str, Any]) -> ToolResult:
        if shutil.which("apt"):
            return self._run_apt()
        return ToolResult(
            ok=False,
            error="No supported package manager found for update checks.",
            metadata={"supported_package_managers": ["apt"]},
        )
    def _run_apt(self) -> ToolResult:
        try:
            completed = subprocess.run(
                ["apt", "list", "--upgradable"],
                cwd=self.workspace,
                text=True,
                capture_output=True,
                timeout=self.timeout_seconds,
                check=False,
            )
        except subprocess.SubprocessError as exc:
            return ToolResult(ok=False, error=str(exc), metadata={"package_manager": "apt"})
        lines = completed.stdout.splitlines()
        package_lines = [line for line in lines if "/" in line and "upgradable from:" in line]
        output_lines = lines[: self.max_lines]
        truncated = len(lines) > self.max_lines
        if truncated:
            output_lines.append(f"... truncated after {self.max_lines} lines")
        return ToolResult(
            ok=completed.returncode == 0,
            output="\n".join(output_lines),
            error=completed.stderr or None,
            metadata={
                "package_manager": "apt",
                "command": "apt list --upgradable",
                "returncode": completed.returncode,
                "upgradable_count": len(package_lines),
                "refreshed_cache": False,
                "truncated": truncated,
            },
        )
--- a/duck_core/tools/shell_exec_safe.py
+++ b/duck_core/tools/shell_exec_safe.py
@ -13,6 +13,8 @@ ALLOWLIST = {
    "tail",
    "grep",
    "find",
    "apt list",
    "apt-cache policy",
    "pytest",
    "python -m pytest",
    "python3 -m pytest",
@ -23,7 +25,6 @@ ALLOWLIST = {
 BLOCKLIST = {
    "rm",
    "sudo",
    "su",
    "dd",
    "mkfs",
@ -58,17 +59,27 @@ class ShellExecSafeTool:
    async def run(self, args: dict[str, Any]) -> ToolResult:
        command = str(args.get("command", "")).strip()
        approved = bool(args.get("_approved"))
        password = args.get("_password")
        allowed, reason, blocked = self._is_allowed(command, approved=approved)
        if not allowed:
            metadata = {"blocked": True} if blocked else {"requires_approval": True}
            return ToolResult(ok=False, error=reason, metadata=metadata)
        if self._is_sudo_command(command) and not password:
            return ToolResult(
                ok=False,
                error="Sudo password is required to run this command.",
                metadata={"requires_password": True},
            )
        run_command = self._sudo_stdin_command(command) if self._is_sudo_command(command) else command
        input_text = f"{password}\n" if self._is_sudo_command(command) else None
        try:
            completed = subprocess.run(
-                command,
+                run_command,
                cwd=self.workspace,
                shell=True,
                text=True,
                capture_output=True,
                input=input_text,
                timeout=self.timeout_seconds,
                check=False,
            )
@ -93,6 +104,8 @@ class ShellExecSafeTool:
                return False, f"Command is blocked: {blocked}", True
        if approved:
            return True, None, False
        if self._is_sudo_command(command):
            return False, "Sudo command requires approval.", False
        prefix1 = parts[0] if parts else ""
        prefix2 = " ".join(parts[:2])
        prefix3 = " ".join(parts[:3])
@ -107,3 +120,14 @@ class ShellExecSafeTool:
        if " " in lowered_blocked or "|" in lowered_blocked:
            return lowered_command.startswith(lowered_blocked) or lowered_blocked in lowered_command
        return bool(parts) and parts[0].lower() == lowered_blocked
    def _is_sudo_command(self, command: str) -> bool:
        try:
            parts = shlex.split(command)
        except ValueError:
            return False
        return bool(parts) and parts[0] == "sudo"
    def _sudo_stdin_command(self, command: str) -> str:
        parts = shlex.split(command)
        return shlex.join(["sudo", "-S", "-p", "", *parts[1:]])
--- a/duck_core/web/static/app.js
+++ b/duck_core/web/static/app.js
@ -145,7 +145,6 @@ function formatToolCommand(tool, args) {
  if (tool === "file_write") return `$ file_write ${args.path || ""}`.trim();
  if (tool === "list_dir") return `$ list_dir ${args.path || "."}`.trim();
  if (tool === "search_files") return `$ search_files ${args.query || ""}`.trim();
  if (tool === "os_update_check") return "$ os_update_check";
  return `$ ${tool || "tool"}`;
 }
@ -213,6 +212,41 @@ function appendApprovalTerminal(article, eventPayload) {
  terminal?.append(createInlineApprovalActions(payload.approval_id, command));
 }
 function appendPasswordPrompt(article, eventPayload) {
  const payload = eventPayload.payload || eventPayload;
  const terminal = article?.querySelector(".tool-terminal.is-error") || article?.querySelector(".tool-terminal");
  if (!terminal) return;
  const body = terminal.querySelector(".tool-terminal-body");
  const status = terminal.querySelector(".tool-terminal-status");
  terminal.classList.add("is-waiting");
  terminal.dataset.approvalId = payload.approval_id || terminal.dataset.approvalId || "";
  terminal.dataset.taskId = eventPayload.task_id || terminal.dataset.taskId || "";
  if (status) status.textContent = "password";
  if (body) {
    body.textContent = [
      body.textContent.trim(),
      "",
      payload.reason || "Sudo password is required to continue.",
    ].filter(Boolean).join("\n");
  }
  if (!terminal.querySelector(".tool-password-form")) terminal.append(createPasswordForm());
 }
 function createPasswordForm() {
  const form = document.createElement("form");
  form.className = "tool-password-form";
  const input = document.createElement("input");
  input.type = "password";
  input.autocomplete = "current-password";
  input.placeholder = "Sudo password";
  input.required = true;
  const button = document.createElement("button");
  button.type = "submit";
  button.textContent = "Continue";
  form.append(input, button);
  return form;
 }
 function createInlineApprovalActions(approvalId, command) {
  const actions = document.createElement("div");
  actions.className = "tool-approval-actions";
@ -267,6 +301,22 @@ async function resolveInlineApproval(button) {
  }
 }
 async function submitToolPassword(form) {
  const terminal = form.closest(".tool-terminal");
  const article = terminal?.closest(".message");
  const taskId = terminal?.dataset.taskId;
  const approvalId = terminal?.dataset.approvalId;
  const input = form.querySelector("input");
  const password = input?.value || "";
  if (!terminal || !article || !taskId || !approvalId || !password) return;
  form.querySelectorAll("input, button").forEach((item) => {
    item.disabled = true;
  });
  await continueAfterPassword(article, taskId, approvalId, password);
  input.value = "";
  form.remove();
 }
 function humanApprovalDecision(action) {
  if (action === "allow_once") return "разрешено один раз";
  if (action === "allow_forever") return "разрешено навсегда";
@ -423,6 +473,11 @@ async function handleAssistantStreamEvent(pending, name, data, context) {
    appendApprovalTerminal(pending, data);
    return;
  }
  if (name === "tool_password_requested") {
    pending.querySelector(".message-meta span").textContent = "password";
    appendPasswordPrompt(pending, data);
    return;
  }
  if (name === "content_delta") {
    if (!context.contentStarted) {
      context.contentStarted = true;
@ -473,6 +528,30 @@ async function continueAfterInlineApproval(article, taskId, approvalId) {
  }
 }
 async function continueAfterPassword(article, taskId, approvalId, password) {
  if (!article || state.running) return;
  state.running = true;
  document.querySelector("#run").disabled = true;
  setStatus("#task-status", taskId, "warn");
  const context = {taskId, contentStarted: false};
  try {
    await streamSse(
      `/v1/tasks/${encodeURIComponent(taskId)}/password/stream`,
      {approval_id: approvalId, password},
      async ({name, data}) => handleAssistantStreamEvent(article, name, data, context),
    );
  } catch (error) {
    setMessagePending(article, error.message);
    article.querySelector(".message-meta span").textContent = "failed";
    setStatus("#task-status", "failed", "bad");
    await refreshEvents(taskId);
  } finally {
    state.running = false;
    document.querySelector("#run").disabled = false;
    document.querySelector("#message")?.focus();
  }
 }
 async function refreshConversations() {
  const list = document.querySelector("#conversation-list");
  if (!list) return;
@ -611,6 +690,12 @@ function bindChat() {
    const button = event.target.closest(".message-reasoning-toggle");
    if (button) toggleInlineReasoning(button);
  });
  document.querySelector("#messages")?.addEventListener("submit", (event) => {
    const form = event.target.closest(".tool-password-form");
    if (!form) return;
    event.preventDefault();
    submitToolPassword(form).catch(console.error);
  });
  document.querySelector("#debug")?.addEventListener("change", (event) => {
    document.querySelector("#debug-panel").hidden = !event.target.checked;
  });
--- a/duck_core/web/static/style.css
+++ b/duck_core/web/static/style.css
@ -635,6 +635,38 @@ dd {
  opacity: 0.65;
 }
 .tool-password-form {
  display: grid;
  grid-template-columns: minmax(0, 1fr) auto;
  gap: 8px;
  padding: 10px 12px 12px;
  border-top: 1px solid #1e293b;
  background: #111827;
 }
 .tool-password-form input {
  min-width: 0;
  border-color: #334155;
  background: #0f172a;
  color: #f8fafc;
 }
 .tool-password-form button {
  border: 0;
  border-radius: 7px;
  padding: 8px 10px;
  background: #2563eb;
  color: #ffffff;
  font-size: 12px;
  font-weight: 800;
 }
 .tool-password-form input:disabled,
 .tool-password-form button:disabled {
  cursor: wait;
  opacity: 0.65;
 }
 .message-meta {
  display: flex;
  align-items: center;
--- a/prompts/roles/action.md
+++ b/prompts/roles/action.md
@ -12,9 +12,6 @@ Available tools:
  Args: {"path": "."}
 - search_files: search text inside files in the current workspace.
  Args: {"query": "text to find", "path": ".", "glob": "*.py"}
 - os_update_check: check available operating system package updates without
  refreshing package manager caches and without sudo.
  Args: {}
 - shell_exec_safe: run a safe allowlisted shell command in the current workspace.
  Args: {"command": "pwd"}
@ -24,6 +21,9 @@ follow-up information. Return actions=[] when the observations are sufficient
 for the thinker to answer.
 Use only the listed tools. Keep actions minimal and directly tied to the user's
 request. Do not invent tool names.
-For user requests like "check updates in the system", use os_update_check.
+For user requests like "check updates in the system", use shell_exec_safe with
-Do not use apt update, apt-get update, sudo, su, or package installation commands
+`apt list --upgradable`. Do not use apt update, apt-get update, sudo, su, or
-for update checks.
+package installation commands for a read-only update check.
 For commands that genuinely require elevated privileges, use the real system
 command with `sudo` through shell_exec_safe. DuckLM will ask the user for
 approval and then for the sudo password when the command needs it.
--- a/tests/smoke/test_api_stream_chat.py
+++ b/tests/smoke/test_api_stream_chat.py
@ -177,3 +177,149 @@ def test_continue_stream_executes_approved_tool_and_streams_answer(tmp_path, mon
    conversation = client.get(f"/v1/conversations/{conversation_id}").json()
    assert conversation["messages"][-1]["content"] == "continued after approval"
    assert conversation["messages"][-1]["status"] == "completed"
 def test_continue_stream_requests_password_for_approved_sudo(tmp_path, monkeypatch):
    monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
    async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
        assert role == "action"
        actions = []
        if not any("tool_observations" in message["content"] for message in messages):
            actions = [
                {
                    "tool": "shell_exec_safe",
                    "args": {"command": "sudo apt update"},
                    "reason": "Check updates with root privileges",
                }
            ]
        return ModelResponse(
            role=role,
            model="local-main",
            content=json.dumps(
                {
                    "kind": "action_directive",
                    "intent": "run sudo command",
                    "risk_level": "medium",
                    "actions": actions,
                }
            ),
            reasoning_content=None,
            raw={},
            latency_ms=1.0,
        )
    monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
    client = TestClient(create_app())
    with client.stream(
        "POST",
        "/v1/chat/stream",
        json={"message": "run sudo update", "workspace": str(tmp_path), "debug": True},
    ) as response:
        initial_body = "".join(response.iter_text())
    task_id = re.search(r'"task_id"\s*:\s*"([^"]+)"', initial_body).group(1)
    approval = next(
        item for item in client.get("/v1/approvals/pending").json()
        if item["task_id"] == task_id
    )
    client.post(f"/v1/approvals/{approval['approval_id']}/allow_once")
    with client.stream(
        "POST",
        f"/v1/tasks/{task_id}/continue/stream",
        json={"approval_id": approval["approval_id"]},
    ) as response:
        body = "".join(response.iter_text())
    assert response.status_code == 200
    assert "event: tool_password_requested" in body
    assert "waiting_for_password" in body
    assert "password" in body.lower()
 def test_password_stream_runs_sudo_with_password_and_streams_answer(tmp_path, monkeypatch):
    monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
    calls = []
    class Completed:
        returncode = 0
        stdout = "sudo updated\n"
        stderr = ""
    def fake_run(command, **kwargs):
        calls.append((command, kwargs))
        return Completed()
    async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
        if role == "action":
            actions = []
            if not any("tool_observations" in message["content"] for message in messages):
                actions = [
                    {
                        "tool": "shell_exec_safe",
                        "args": {"command": "sudo apt update"},
                        "reason": "Check updates with root privileges",
                    }
                ]
            return ModelResponse(
                role=role,
                model="local-main",
                content=json.dumps(
                    {
                        "kind": "action_directive",
                        "intent": "run sudo command",
                        "risk_level": "medium",
                        "actions": actions,
                    }
                ),
                reasoning_content=None,
                raw={},
                latency_ms=1.0,
            )
        raise AssertionError("non-stream thinker should not be used")
    async def fake_stream_chat(self, role, messages):
        assert role == "thinker"
        observation = next(message for message in messages if "tool_observations" in message["content"])
        assert "sudo updated" in observation["content"]
        assert "secret" not in observation["content"]
        yield {"type": "content_delta", "delta": "sudo command completed"}
    monkeypatch.setattr("duck_core.tools.shell_exec_safe.subprocess.run", fake_run)
    monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
    monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat)
    client = TestClient(create_app())
    with client.stream(
        "POST",
        "/v1/chat/stream",
        json={"message": "run sudo update", "workspace": str(tmp_path), "debug": True},
    ) as response:
        initial_body = "".join(response.iter_text())
    task_id = re.search(r'"task_id"\s*:\s*"([^"]+)"', initial_body).group(1)
    approval = next(
        item for item in client.get("/v1/approvals/pending").json()
        if item["task_id"] == task_id
    )
    client.post(f"/v1/approvals/{approval['approval_id']}/allow_once")
    with client.stream(
        "POST",
        f"/v1/tasks/{task_id}/continue/stream",
        json={"approval_id": approval["approval_id"]},
    ) as response:
        _ = "".join(response.iter_text())
    with client.stream(
        "POST",
        f"/v1/tasks/{task_id}/password/stream",
        json={"approval_id": approval["approval_id"], "password": "secret"},
    ) as response:
        body = "".join(response.iter_text())
    assert response.status_code == 200
    assert calls[0][1]["input"] == "secret\n"
    assert "event: tool_call_finished" in body
    assert "event: content_delta" in body
    assert "sudo command completed" in body
    assert "secret" not in body
--- a/tests/smoke/test_runtime_tools.py
+++ b/tests/smoke/test_runtime_tools.py
@ -106,7 +106,13 @@ class FakeUpdateCheckModelClient:
        if role == "action":
            actions = []
            if not any("tool_observations" in message["content"] for message in messages):
-                actions = [{"tool": "os_update_check", "args": {}, "reason": "Check OS updates"}]
+                actions = [
                    {
                        "tool": "shell_exec_safe",
                        "args": {"command": "apt list --upgradable"},
                        "reason": "Check OS updates",
                    }
                ]
            return ModelResponse(
                role=role,
                model="local-main",
@ -124,7 +130,7 @@ class FakeUpdateCheckModelClient:
            )
        assert role == "thinker"
        observation_text = "\n".join(message["content"] for message in messages)
-        assert "os_update_check" in observation_text
+        assert "apt list --upgradable" in observation_text
        assert "requires_approval" not in observation_text
        return ModelResponse(
            role=role,
@ -191,7 +197,7 @@ async def test_runtime_checks_system_updates_without_approval_loop(tmp_path):
    assert not any(event.event_type == "tool_approval_requested" for event in events)
    assert any(
        event.event_type == "tool_call_finished"
-        and event.payload["tool"] == "os_update_check"
+        and event.payload["tool"] == "shell_exec_safe"
        for event in events
    )
--- a/tests/smoke/test_tool_gateway.py
+++ b/tests/smoke/test_tool_gateway.py
@ -3,7 +3,6 @@ import pytest
 from duck_core.tools.file_read import FileReadTool
 from duck_core.tools.file_write import FileWriteTool
 from duck_core.tools.gateway import ToolGateway
 from duck_core.tools.os_update_check import OsUpdateCheckTool
 from duck_core.tools.shell_exec_safe import ShellExecSafeTool
@ -34,8 +33,45 @@ async def test_shell_tool_blocks_dangerous_commands(tmp_path):
    assert blocked.metadata.get("requires_approval") is not True
    assert blocked.metadata["blocked"] is True
    assert sudo.ok is False
-    assert sudo.metadata.get("requires_approval") is not True
+    assert sudo.metadata["requires_approval"] is True
-    assert sudo.metadata["blocked"] is True
+
@pytest.mark.asyncio
 async def test_shell_tool_requests_password_for_approved_sudo(tmp_path):
    shell = ShellExecSafeTool(str(tmp_path))
    result = await shell.run({"command": "sudo apt update", "_approved": True})
    assert result.ok is False
    assert result.metadata["requires_password"] is True
    assert "password" in result.error.lower()
@pytest.mark.asyncio
 async def test_shell_tool_passes_password_to_sudo_stdin(monkeypatch, tmp_path):
    calls = []
    class Completed:
        returncode = 0
        stdout = "updated\n"
        stderr = ""
    def fake_run(command, **kwargs):
        calls.append((command, kwargs))
        return Completed()
    monkeypatch.setattr("duck_core.tools.shell_exec_safe.subprocess.run", fake_run)
    shell = ShellExecSafeTool(str(tmp_path))
    result = await shell.run(
        {"command": "sudo apt update", "_approved": True, "_password": "secret"}
    )
    command, kwargs = calls[0]
    assert result.ok is True
    assert "sudo -S -p '' apt update" == command
    assert kwargs["input"] == "secret\n"
    assert "secret" not in str(result.model_dump())
@pytest.mark.asyncio
@ -86,23 +122,20 @@ async def test_tool_gateway_searches_file_contents(tmp_path):
@pytest.mark.asyncio
-async def test_os_update_check_reports_apt_upgradable_packages(monkeypatch, tmp_path):
+async def test_shell_tool_allows_read_only_apt_update_check(monkeypatch, tmp_path):
    class Completed:
        returncode = 0
        stdout = "Listing...\\nbootlogd/stable 3.14-4 amd64 [upgradable from: 3.06-4]\\n"
        stderr = "WARNING: apt does not have a stable CLI interface.\\n"
    monkeypatch.setattr("duck_core.tools.os_update_check.shutil.which", lambda name: "/usr/bin/apt")
    monkeypatch.setattr(
-        "duck_core.tools.os_update_check.subprocess.run",
+        "duck_core.tools.shell_exec_safe.subprocess.run",
        lambda *args, **kwargs: Completed(),
    )
-    tool = OsUpdateCheckTool(str(tmp_path))
+    shell = ShellExecSafeTool(str(tmp_path))
-    result = await tool.run({})
+    result = await shell.run({"command": "apt list --upgradable"})
    assert result.ok is True
    assert "bootlogd" in result.output
-    assert result.metadata["package_manager"] == "apt"
+    assert result.metadata["command"] == "apt list --upgradable"
    assert result.metadata["upgradable_count"] == 1
    assert result.metadata["refreshed_cache"] is False