diff --git a/duck_core/api.py b/duck_core/api.py index baa3eb7..a20f94e 100644 --- a/duck_core/api.py +++ b/duck_core/api.py @@ -41,6 +41,11 @@ class ContinueRequest(BaseModel): approval_id: str +class PasswordRequest(BaseModel): + approval_id: str + password: str + + def create_app() -> FastAPI: get_settings.cache_clear() settings = get_settings() @@ -423,7 +428,7 @@ def create_app() -> FastAPI: ) messages = runtime.context_builder.build_basic_messages(task) tool_observations = [tool_observation] - if approval.decision != "deny": + if approval.decision != "deny" and not has_password_request(tool_observations): tool_observations = await runtime._run_action_loop( task_id, messages, @@ -432,6 +437,30 @@ def create_app() -> FastAPI: ) async for tool_event in emit_tool_events(task_id, continued_event.sequence): yield tool_event + if has_password_request(tool_observations): + await task_store.update_status(task_id, "waiting_for_password") + password_event = await event_store.append( + task_id, + "tool_password_requested", + { + "approval_id": body.approval_id, + "tool": approval.normalized_action.get("tool"), + "action": redacted_action(approval.normalized_action), + "reason": "Sudo password is required to continue.", + }, + ) + yield sse("tool_password_requested", password_event.model_dump()) + yield sse( + "done", + { + "task_id": task_id, + "conversation_id": conversation_id, + "status": "waiting_for_password", + "final_response": "Waiting for sudo password.", + "reasoning_content": None, + }, + ) + return if any(observation.get("requires_approval") for observation in tool_observations): await task_store.waiting_for_approval(task_id) await event_store.append( @@ -538,6 +567,162 @@ def create_app() -> FastAPI: return StreamingResponse(generator(), media_type="text/event-stream") + def has_password_request(observations: list[dict[str, Any]]) -> bool: + return any( + observation.get("result", {}).get("metadata", {}).get("requires_password") + for observation in observations + ) + + def redacted_action(action: dict[str, Any]) -> dict[str, Any]: + redacted = json.loads(json.dumps(action)) + args = redacted.get("args") + if isinstance(args, dict): + args.pop("_password", None) + return redacted + + @app.post("/v1/tasks/{task_id}/password/stream") + async def password_task_stream(task_id: str, body: PasswordRequest) -> StreamingResponse: + task = await task_store.get_task(task_id) + if task is None: + raise HTTPException(status_code=404, detail="Task not found") + approval = await approvals.get(body.approval_id) + if approval is None or approval.task_id != task_id: + raise HTTPException(status_code=404, detail="Approval not found for task") + if approval.decision is None: + raise HTTPException(status_code=409, detail="Approval is still pending") + conversation_id = await conversations.get_conversation_id_for_task(task_id) + + async def generator(): + reasoning_parts: list[str] = [] + content_parts: list[str] = [] + try: + await task_store.update_status(task_id, "running") + continued_event = await event_store.append( + task_id, + "task_password_submitted", + {"approval_id": body.approval_id}, + ) + tool_observation = await runtime._run_approved_or_denied_action( + task_id, + approval.normalized_action, + approval.decision, + password=body.password, + ) + messages = runtime.context_builder.build_basic_messages(task) + tool_observations = [tool_observation] + if not has_password_request(tool_observations): + tool_observations = await runtime._run_action_loop( + task_id, + messages, + task.workspace, + initial_observations=tool_observations, + ) + async for tool_event in emit_tool_events(task_id, continued_event.sequence): + yield tool_event + if has_password_request(tool_observations): + await task_store.update_status(task_id, "waiting_for_password") + password_event = await event_store.append( + task_id, + "tool_password_requested", + { + "approval_id": body.approval_id, + "tool": approval.normalized_action.get("tool"), + "action": redacted_action(approval.normalized_action), + "reason": "Sudo password is required to continue.", + }, + ) + yield sse("tool_password_requested", password_event.model_dump()) + yield sse( + "done", + { + "task_id": task_id, + "conversation_id": conversation_id, + "status": "waiting_for_password", + "final_response": "Waiting for sudo password.", + "reasoning_content": None, + }, + ) + return + + messages = [ + *messages, + { + "role": "user", + "content": "tool_observations:\n" + + json.dumps(tool_observations, ensure_ascii=False, indent=2), + }, + ] + await event_store.append(task_id, "model_call_started", {"role": "thinker"}) + async for chunk in model_client.stream_chat("thinker", messages): + delta = str(chunk.get("delta") or "") + if chunk.get("type") == "reasoning_delta": + reasoning_parts.append(delta) + yield sse("reasoning_delta", {"task_id": task_id, "delta": delta}) + elif chunk.get("type") == "content_delta": + content_parts.append(delta) + yield sse("content_delta", {"task_id": task_id, "delta": delta}) + + content = "".join(content_parts) + reasoning_content = "".join(reasoning_parts) or None + await event_store.append( + task_id, + "cognition_response", + { + "role": "thinker", + "content": content, + "reasoning_content": reasoning_content, + }, + ) + await event_store.append( + task_id, + "model_call_finished", + { + "role": "thinker", + "model": model_client.get_role_config("thinker").model, + }, + ) + await task_store.complete_task(task_id, content) + if conversation_id: + await conversations.add_message( + conversation_id, + "assistant", + content, + reasoning_content=reasoning_content, + task_id=task_id, + status="completed", + ) + await event_store.append( + task_id, + "task_completed", + { + "final_response": content, + "reasoning_content": reasoning_content, + }, + ) + yield sse( + "done", + { + "task_id": task_id, + "conversation_id": conversation_id, + "status": "completed", + "final_response": content, + "reasoning_content": reasoning_content, + }, + ) + except Exception as exc: + await task_store.fail_task(task_id, str(exc)) + await event_store.append(task_id, "task_failed", {"error": str(exc)}) + yield sse( + "error", + { + "task_id": task_id, + "status": "failed", + "error": str(exc), + }, + ) + + return StreamingResponse(generator(), media_type="text/event-stream") + @app.post("/v1/tasks/{task_id}/cancel") async def cancel_task(task_id: str) -> dict[str, str]: await task_store.cancel_task(task_id) diff --git a/duck_core/runtime_loop.py b/duck_core/runtime_loop.py index 552ffe7..43c8f1b 100644 --- a/duck_core/runtime_loop.py +++ b/duck_core/runtime_loop.py @@ -248,7 +248,11 @@ class RuntimeLoop: ) async def _run_approved_or_denied_action( - self, task_id: str, action: dict[str, Any], decision: str + self, + task_id: str, + action: dict[str, Any], + decision: str, + password: str | None = None, ) -> dict[str, Any]: tool_name = str(action.get("tool", "")) index = await self._approval_action_index(task_id, action) @@ -260,7 +264,7 @@ class RuntimeLoop: ) else: gateway = ToolGateway.default((await self.task_store.get_task(task_id)).workspace or ".") - result = await gateway.run_action(action, approved=True) + result = await gateway.run_action(action, approved=True, password=password) result_payload = result.model_dump() await self.event_store.append( diff --git a/duck_core/tools/gateway.py b/duck_core/tools/gateway.py index 28df56b..88dde52 100644 --- a/duck_core/tools/gateway.py +++ b/duck_core/tools/gateway.py @@ -4,7 +4,6 @@ from duck_core.tools.base import Tool, ToolResult from duck_core.tools.file_read import FileReadTool from duck_core.tools.file_write import FileWriteTool from duck_core.tools.list_dir import ListDirTool -from duck_core.tools.os_update_check import OsUpdateCheckTool from duck_core.tools.search_files import SearchFilesTool from duck_core.tools.shell_exec_safe import ShellExecSafeTool @@ -21,12 +20,13 @@ class ToolGateway: FileWriteTool(workspace), ListDirTool(workspace), SearchFilesTool(workspace), - OsUpdateCheckTool(workspace), ShellExecSafeTool(workspace), ] ) - async def run_action(self, action: dict[str, Any], approved: bool = False) -> ToolResult: + async def run_action( + self, action: dict[str, Any], approved: bool = False, password: str | None = None + ) -> ToolResult: tool_name = str(action.get("tool", "")) tool = self.tools.get(tool_name) if tool is None: @@ -36,4 +36,6 @@ class ToolGateway: return ToolResult(ok=False, error="Tool args must be an object") if approved: args = {**args, "_approved": True} + if password is not None: + args = {**args, "_password": password} return await tool.run(args) diff --git a/duck_core/tools/os_update_check.py b/duck_core/tools/os_update_check.py deleted file mode 100644 index 2555733..0000000 --- a/duck_core/tools/os_update_check.py +++ /dev/null @@ -1,58 +0,0 @@ -import shutil -import subprocess -from typing import Any - -from duck_core.tools.base import ToolResult - - -class OsUpdateCheckTool: - name = "os_update_check" - risk_level = "low" - - def __init__(self, workspace: str, timeout_seconds: int = 60, max_lines: int = 300): - self.workspace = workspace - self.timeout_seconds = timeout_seconds - self.max_lines = max_lines - - async def run(self, args: dict[str, Any]) -> ToolResult: - if shutil.which("apt"): - return self._run_apt() - return ToolResult( - ok=False, - error="No supported package manager found for update checks.", - metadata={"supported_package_managers": ["apt"]}, - ) - - def _run_apt(self) -> ToolResult: - try: - completed = subprocess.run( - ["apt", "list", "--upgradable"], - cwd=self.workspace, - text=True, - capture_output=True, - timeout=self.timeout_seconds, - check=False, - ) - except subprocess.SubprocessError as exc: - return ToolResult(ok=False, error=str(exc), metadata={"package_manager": "apt"}) - - lines = completed.stdout.splitlines() - package_lines = [line for line in lines if "/" in line and "upgradable from:" in line] - output_lines = lines[: self.max_lines] - truncated = len(lines) > self.max_lines - if truncated: - output_lines.append(f"... truncated after {self.max_lines} lines") - - return ToolResult( - ok=completed.returncode == 0, - output="\n".join(output_lines), - error=completed.stderr or None, - metadata={ - "package_manager": "apt", - "command": "apt list --upgradable", - "returncode": completed.returncode, - "upgradable_count": len(package_lines), - "refreshed_cache": False, - "truncated": truncated, - }, - ) diff --git a/duck_core/tools/shell_exec_safe.py b/duck_core/tools/shell_exec_safe.py index d06836d..0664dce 100644 --- a/duck_core/tools/shell_exec_safe.py +++ b/duck_core/tools/shell_exec_safe.py @@ -13,6 +13,8 @@ ALLOWLIST = { "tail", "grep", "find", + "apt list", + "apt-cache policy", "pytest", "python -m pytest", "python3 -m pytest", @@ -23,7 +25,6 @@ ALLOWLIST = { BLOCKLIST = { "rm", - "sudo", "su", "dd", "mkfs", @@ -58,17 +59,27 @@ class ShellExecSafeTool: async def run(self, args: dict[str, Any]) -> ToolResult: command = str(args.get("command", "")).strip() approved = bool(args.get("_approved")) + password = args.get("_password") allowed, reason, blocked = self._is_allowed(command, approved=approved) if not allowed: metadata = {"blocked": True} if blocked else {"requires_approval": True} return ToolResult(ok=False, error=reason, metadata=metadata) + if self._is_sudo_command(command) and not password: + return ToolResult( + ok=False, + error="Sudo password is required to run this command.", + metadata={"requires_password": True}, + ) + run_command = self._sudo_stdin_command(command) if self._is_sudo_command(command) else command + input_text = f"{password}\n" if self._is_sudo_command(command) else None try: completed = subprocess.run( - command, + run_command, cwd=self.workspace, shell=True, text=True, capture_output=True, + input=input_text, timeout=self.timeout_seconds, check=False, ) @@ -93,6 +104,8 @@ class ShellExecSafeTool: return False, f"Command is blocked: {blocked}", True if approved: return True, None, False + if self._is_sudo_command(command): + return False, "Sudo command requires approval.", False prefix1 = parts[0] if parts else "" prefix2 = " ".join(parts[:2]) prefix3 = " ".join(parts[:3]) @@ -107,3 +120,14 @@ class ShellExecSafeTool: if " " in lowered_blocked or "|" in lowered_blocked: return lowered_command.startswith(lowered_blocked) or lowered_blocked in lowered_command return bool(parts) and parts[0].lower() == lowered_blocked + + def _is_sudo_command(self, command: str) -> bool: + try: + parts = shlex.split(command) + except ValueError: + return False + return bool(parts) and parts[0] == "sudo" + + def _sudo_stdin_command(self, command: str) -> str: + parts = shlex.split(command) + return shlex.join(["sudo", "-S", "-p", "", *parts[1:]]) diff --git a/duck_core/web/static/app.js b/duck_core/web/static/app.js index 2481901..960e849 100644 --- a/duck_core/web/static/app.js +++ b/duck_core/web/static/app.js @@ -145,7 +145,6 @@ function formatToolCommand(tool, args) { if (tool === "file_write") return `$ file_write ${args.path || ""}`.trim(); if (tool === "list_dir") return `$ list_dir ${args.path || "."}`.trim(); if (tool === "search_files") return `$ search_files ${args.query || ""}`.trim(); - if (tool === "os_update_check") return "$ os_update_check"; return `$ ${tool || "tool"}`; } @@ -213,6 +212,41 @@ function appendApprovalTerminal(article, eventPayload) { terminal?.append(createInlineApprovalActions(payload.approval_id, command)); } +function appendPasswordPrompt(article, eventPayload) { + const payload = eventPayload.payload || eventPayload; + const terminal = article?.querySelector(".tool-terminal.is-error") || article?.querySelector(".tool-terminal"); + if (!terminal) return; + const body = terminal.querySelector(".tool-terminal-body"); + const status = terminal.querySelector(".tool-terminal-status"); + terminal.classList.add("is-waiting"); + terminal.dataset.approvalId = payload.approval_id || terminal.dataset.approvalId || ""; + terminal.dataset.taskId = eventPayload.task_id || terminal.dataset.taskId || ""; + if (status) status.textContent = "password"; + if (body) { + body.textContent = [ + body.textContent.trim(), + "", + payload.reason || "Sudo password is required to continue.", + ].filter(Boolean).join("\n"); + } + if (!terminal.querySelector(".tool-password-form")) terminal.append(createPasswordForm()); +} + +function createPasswordForm() { + const form = document.createElement("form"); + form.className = "tool-password-form"; + const input = document.createElement("input"); + input.type = "password"; + input.autocomplete = "current-password"; + input.placeholder = "Sudo password"; + input.required = true; + const button = document.createElement("button"); + button.type = "submit"; + button.textContent = "Continue"; + form.append(input, button); + return form; +} + function createInlineApprovalActions(approvalId, command) { const actions = document.createElement("div"); actions.className = "tool-approval-actions"; @@ -267,6 +301,22 @@ async function resolveInlineApproval(button) { } } +async function submitToolPassword(form) { + const terminal = form.closest(".tool-terminal"); + const article = terminal?.closest(".message"); + const taskId = terminal?.dataset.taskId; + const approvalId = terminal?.dataset.approvalId; + const input = form.querySelector("input"); + const password = input?.value || ""; + if (!terminal || !article || !taskId || !approvalId || !password) return; + form.querySelectorAll("input, button").forEach((item) => { + item.disabled = true; + }); + await continueAfterPassword(article, taskId, approvalId, password); + input.value = ""; + form.remove(); +} + function humanApprovalDecision(action) { if (action === "allow_once") return "разрешено один раз"; if (action === "allow_forever") return "разрешено навсегда"; @@ -423,6 +473,11 @@ async function handleAssistantStreamEvent(pending, name, data, context) { appendApprovalTerminal(pending, data); return; } + if (name === "tool_password_requested") { + pending.querySelector(".message-meta span").textContent = "password"; + appendPasswordPrompt(pending, data); + return; + } if (name === "content_delta") { if (!context.contentStarted) { context.contentStarted = true; @@ -473,6 +528,30 @@ async function continueAfterInlineApproval(article, taskId, approvalId) { } } +async function continueAfterPassword(article, taskId, approvalId, password) { + if (!article || state.running) return; + state.running = true; + document.querySelector("#run").disabled = true; + setStatus("#task-status", taskId, "warn"); + const context = {taskId, contentStarted: false}; + try { + await streamSse( + `/v1/tasks/${encodeURIComponent(taskId)}/password/stream`, + {approval_id: approvalId, password}, + async ({name, data}) => handleAssistantStreamEvent(article, name, data, context), + ); + } catch (error) { + setMessagePending(article, error.message); + article.querySelector(".message-meta span").textContent = "failed"; + setStatus("#task-status", "failed", "bad"); + await refreshEvents(taskId); + } finally { + state.running = false; + document.querySelector("#run").disabled = false; + document.querySelector("#message")?.focus(); + } +} + async function refreshConversations() { const list = document.querySelector("#conversation-list"); if (!list) return; @@ -611,6 +690,12 @@ function bindChat() { const button = event.target.closest(".message-reasoning-toggle"); if (button) toggleInlineReasoning(button); }); + document.querySelector("#messages")?.addEventListener("submit", (event) => { + const form = event.target.closest(".tool-password-form"); + if (!form) return; + event.preventDefault(); + submitToolPassword(form).catch(console.error); + }); document.querySelector("#debug")?.addEventListener("change", (event) => { document.querySelector("#debug-panel").hidden = !event.target.checked; }); diff --git a/duck_core/web/static/style.css b/duck_core/web/static/style.css index a972cbe..76e1a11 100644 --- a/duck_core/web/static/style.css +++ b/duck_core/web/static/style.css @@ -635,6 +635,38 @@ dd { opacity: 0.65; } +.tool-password-form { + display: grid; + grid-template-columns: minmax(0, 1fr) auto; + gap: 8px; + padding: 10px 12px 12px; + border-top: 1px solid #1e293b; + background: #111827; +} + +.tool-password-form input { + min-width: 0; + border-color: #334155; + background: #0f172a; + color: #f8fafc; +} + +.tool-password-form button { + border: 0; + border-radius: 7px; + padding: 8px 10px; + background: #2563eb; + color: #ffffff; + font-size: 12px; + font-weight: 800; +} + +.tool-password-form input:disabled, +.tool-password-form button:disabled { + cursor: wait; + opacity: 0.65; +} + .message-meta { display: flex; align-items: center; diff --git a/prompts/roles/action.md b/prompts/roles/action.md index f3daed5..f64e16c 100644 --- a/prompts/roles/action.md +++ b/prompts/roles/action.md @@ -12,9 +12,6 @@ Available tools: Args: {"path": "."} - search_files: search text inside files in the current workspace. Args: {"query": "text to find", "path": ".", "glob": "*.py"} -- os_update_check: check available operating system package updates without - refreshing package manager caches and without sudo. - Args: {} - shell_exec_safe: run a safe allowlisted shell command in the current workspace. Args: {"command": "pwd"} @@ -24,6 +21,9 @@ follow-up information. Return actions=[] when the observations are sufficient for the thinker to answer. Use only the listed tools. Keep actions minimal and directly tied to the user's request. Do not invent tool names. -For user requests like "check updates in the system", use os_update_check. -Do not use apt update, apt-get update, sudo, su, or package installation commands -for update checks. +For user requests like "check updates in the system", use shell_exec_safe with +`apt list --upgradable`. Do not use apt update, apt-get update, sudo, su, or +package installation commands for a read-only update check. +For commands that genuinely require elevated privileges, use the real system +command with `sudo` through shell_exec_safe. DuckLM will ask the user for +approval and then for the sudo password when the command needs it. diff --git a/tests/smoke/test_api_stream_chat.py b/tests/smoke/test_api_stream_chat.py index e18e78f..0a1921f 100644 --- a/tests/smoke/test_api_stream_chat.py +++ b/tests/smoke/test_api_stream_chat.py @@ -177,3 +177,149 @@ def test_continue_stream_executes_approved_tool_and_streams_answer(tmp_path, mon conversation = client.get(f"/v1/conversations/{conversation_id}").json() assert conversation["messages"][-1]["content"] == "continued after approval" assert conversation["messages"][-1]["status"] == "completed" + + +def test_continue_stream_requests_password_for_approved_sudo(tmp_path, monkeypatch): + monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3")) + + async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None): + assert role == "action" + actions = [] + if not any("tool_observations" in message["content"] for message in messages): + actions = [ + { + "tool": "shell_exec_safe", + "args": {"command": "sudo apt update"}, + "reason": "Check updates with root privileges", + } + ] + return ModelResponse( + role=role, + model="local-main", + content=json.dumps( + { + "kind": "action_directive", + "intent": "run sudo command", + "risk_level": "medium", + "actions": actions, + } + ), + reasoning_content=None, + raw={}, + latency_ms=1.0, + ) + + monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat) + client = TestClient(create_app()) + + with client.stream( + "POST", + "/v1/chat/stream", + json={"message": "run sudo update", "workspace": str(tmp_path), "debug": True}, + ) as response: + initial_body = "".join(response.iter_text()) + task_id = re.search(r'"task_id"\s*:\s*"([^"]+)"', initial_body).group(1) + approval = next( + item for item in client.get("/v1/approvals/pending").json() + if item["task_id"] == task_id + ) + client.post(f"/v1/approvals/{approval['approval_id']}/allow_once") + + with client.stream( + "POST", + f"/v1/tasks/{task_id}/continue/stream", + json={"approval_id": approval["approval_id"]}, + ) as response: + body = "".join(response.iter_text()) + + assert response.status_code == 200 + assert "event: tool_password_requested" in body + assert "waiting_for_password" in body + assert "password" in body.lower() + + +def test_password_stream_runs_sudo_with_password_and_streams_answer(tmp_path, monkeypatch): + monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3")) + calls = [] + + class Completed: + returncode = 0 + stdout = "sudo updated\n" + stderr = "" + + def fake_run(command, **kwargs): + calls.append((command, kwargs)) + return Completed() + + async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None): + if role == "action": + actions = [] + if not any("tool_observations" in message["content"] for message in messages): + actions = [ + { + "tool": "shell_exec_safe", + "args": {"command": "sudo apt update"}, + "reason": "Check updates with root privileges", + } + ] + return ModelResponse( + role=role, + model="local-main", + content=json.dumps( + { + "kind": "action_directive", + "intent": "run sudo command", + "risk_level": "medium", + "actions": actions, + } + ), + reasoning_content=None, + raw={}, + latency_ms=1.0, + ) + raise AssertionError("non-stream thinker should not be used") + + async def fake_stream_chat(self, role, messages): + assert role == "thinker" + observation = next(message for message in messages if "tool_observations" in message["content"]) + assert "sudo updated" in observation["content"] + assert "secret" not in observation["content"] + yield {"type": "content_delta", "delta": "sudo command completed"} + + monkeypatch.setattr("duck_core.tools.shell_exec_safe.subprocess.run", fake_run) + monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat) + monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat) + client = TestClient(create_app()) + + with client.stream( + "POST", + "/v1/chat/stream", + json={"message": "run sudo update", "workspace": str(tmp_path), "debug": True}, + ) as response: + initial_body = "".join(response.iter_text()) + task_id = re.search(r'"task_id"\s*:\s*"([^"]+)"', initial_body).group(1) + approval = next( + item for item in client.get("/v1/approvals/pending").json() + if item["task_id"] == task_id + ) + client.post(f"/v1/approvals/{approval['approval_id']}/allow_once") + with client.stream( + "POST", + f"/v1/tasks/{task_id}/continue/stream", + json={"approval_id": approval["approval_id"]}, + ) as response: + _ = "".join(response.iter_text()) + + with client.stream( + "POST", + f"/v1/tasks/{task_id}/password/stream", + json={"approval_id": approval["approval_id"], "password": "secret"}, + ) as response: + body = "".join(response.iter_text()) + + assert response.status_code == 200 + assert calls[0][1]["input"] == "secret\n" + assert "event: tool_call_finished" in body + assert "event: content_delta" in body + assert "sudo command completed" in body + assert "secret" not in body diff --git a/tests/smoke/test_runtime_tools.py b/tests/smoke/test_runtime_tools.py index 52f50dd..d95c68a 100644 --- a/tests/smoke/test_runtime_tools.py +++ b/tests/smoke/test_runtime_tools.py @@ -106,7 +106,13 @@ class FakeUpdateCheckModelClient: if role == "action": actions = [] if not any("tool_observations" in message["content"] for message in messages): - actions = [{"tool": "os_update_check", "args": {}, "reason": "Check OS updates"}] + actions = [ + { + "tool": "shell_exec_safe", + "args": {"command": "apt list --upgradable"}, + "reason": "Check OS updates", + } + ] return ModelResponse( role=role, model="local-main", @@ -124,7 +130,7 @@ class FakeUpdateCheckModelClient: ) assert role == "thinker" observation_text = "\n".join(message["content"] for message in messages) - assert "os_update_check" in observation_text + assert "apt list --upgradable" in observation_text assert "requires_approval" not in observation_text return ModelResponse( role=role, @@ -191,7 +197,7 @@ async def test_runtime_checks_system_updates_without_approval_loop(tmp_path): assert not any(event.event_type == "tool_approval_requested" for event in events) assert any( event.event_type == "tool_call_finished" - and event.payload["tool"] == "os_update_check" + and event.payload["tool"] == "shell_exec_safe" for event in events ) diff --git a/tests/smoke/test_tool_gateway.py b/tests/smoke/test_tool_gateway.py index 3027e34..ddb030e 100644 --- a/tests/smoke/test_tool_gateway.py +++ b/tests/smoke/test_tool_gateway.py @@ -3,7 +3,6 @@ import pytest from duck_core.tools.file_read import FileReadTool from duck_core.tools.file_write import FileWriteTool from duck_core.tools.gateway import ToolGateway -from duck_core.tools.os_update_check import OsUpdateCheckTool from duck_core.tools.shell_exec_safe import ShellExecSafeTool @@ -34,8 +33,45 @@ async def test_shell_tool_blocks_dangerous_commands(tmp_path): assert blocked.metadata.get("requires_approval") is not True assert blocked.metadata["blocked"] is True assert sudo.ok is False - assert sudo.metadata.get("requires_approval") is not True - assert sudo.metadata["blocked"] is True + assert sudo.metadata["requires_approval"] is True + + +@pytest.mark.asyncio +async def test_shell_tool_requests_password_for_approved_sudo(tmp_path): + shell = ShellExecSafeTool(str(tmp_path)) + + result = await shell.run({"command": "sudo apt update", "_approved": True}) + + assert result.ok is False + assert result.metadata["requires_password"] is True + assert "password" in result.error.lower() + + +@pytest.mark.asyncio +async def test_shell_tool_passes_password_to_sudo_stdin(monkeypatch, tmp_path): + calls = [] + + class Completed: + returncode = 0 + stdout = "updated\n" + stderr = "" + + def fake_run(command, **kwargs): + calls.append((command, kwargs)) + return Completed() + + monkeypatch.setattr("duck_core.tools.shell_exec_safe.subprocess.run", fake_run) + shell = ShellExecSafeTool(str(tmp_path)) + + result = await shell.run( + {"command": "sudo apt update", "_approved": True, "_password": "secret"} + ) + + command, kwargs = calls[0] + assert result.ok is True + assert "sudo -S -p '' apt update" == command + assert kwargs["input"] == "secret\n" + assert "secret" not in str(result.model_dump()) @pytest.mark.asyncio @@ -86,23 +122,20 @@ async def test_tool_gateway_searches_file_contents(tmp_path): @pytest.mark.asyncio -async def test_os_update_check_reports_apt_upgradable_packages(monkeypatch, tmp_path): +async def test_shell_tool_allows_read_only_apt_update_check(monkeypatch, tmp_path): class Completed: returncode = 0 stdout = "Listing...\\nbootlogd/stable 3.14-4 amd64 [upgradable from: 3.06-4]\\n" stderr = "WARNING: apt does not have a stable CLI interface.\\n" - monkeypatch.setattr("duck_core.tools.os_update_check.shutil.which", lambda name: "/usr/bin/apt") monkeypatch.setattr( - "duck_core.tools.os_update_check.subprocess.run", + "duck_core.tools.shell_exec_safe.subprocess.run", lambda *args, **kwargs: Completed(), ) - tool = OsUpdateCheckTool(str(tmp_path)) + shell = ShellExecSafeTool(str(tmp_path)) - result = await tool.run({}) + result = await shell.run({"command": "apt list --upgradable"}) assert result.ok is True assert "bootlogd" in result.output - assert result.metadata["package_manager"] == "apt" - assert result.metadata["upgradable_count"] == 1 - assert result.metadata["refreshed_cache"] is False + assert result.metadata["command"] == "apt list --upgradable"