Support sudo password flow for shell commands

This commit is contained in:
mirivlad 2026-05-20 22:45:19 +08:00
parent 12bed7b6ba
commit 45b70d2800
11 changed files with 546 additions and 87 deletions

View File

@ -41,6 +41,11 @@ class ContinueRequest(BaseModel):
approval_id: str approval_id: str
class PasswordRequest(BaseModel):
approval_id: str
password: str
def create_app() -> FastAPI: def create_app() -> FastAPI:
get_settings.cache_clear() get_settings.cache_clear()
settings = get_settings() settings = get_settings()
@ -423,7 +428,7 @@ def create_app() -> FastAPI:
) )
messages = runtime.context_builder.build_basic_messages(task) messages = runtime.context_builder.build_basic_messages(task)
tool_observations = [tool_observation] tool_observations = [tool_observation]
if approval.decision != "deny": if approval.decision != "deny" and not has_password_request(tool_observations):
tool_observations = await runtime._run_action_loop( tool_observations = await runtime._run_action_loop(
task_id, task_id,
messages, messages,
@ -432,6 +437,30 @@ def create_app() -> FastAPI:
) )
async for tool_event in emit_tool_events(task_id, continued_event.sequence): async for tool_event in emit_tool_events(task_id, continued_event.sequence):
yield tool_event yield tool_event
if has_password_request(tool_observations):
await task_store.update_status(task_id, "waiting_for_password")
password_event = await event_store.append(
task_id,
"tool_password_requested",
{
"approval_id": body.approval_id,
"tool": approval.normalized_action.get("tool"),
"action": redacted_action(approval.normalized_action),
"reason": "Sudo password is required to continue.",
},
)
yield sse("tool_password_requested", password_event.model_dump())
yield sse(
"done",
{
"task_id": task_id,
"conversation_id": conversation_id,
"status": "waiting_for_password",
"final_response": "Waiting for sudo password.",
"reasoning_content": None,
},
)
return
if any(observation.get("requires_approval") for observation in tool_observations): if any(observation.get("requires_approval") for observation in tool_observations):
await task_store.waiting_for_approval(task_id) await task_store.waiting_for_approval(task_id)
await event_store.append( await event_store.append(
@ -538,6 +567,162 @@ def create_app() -> FastAPI:
return StreamingResponse(generator(), media_type="text/event-stream") return StreamingResponse(generator(), media_type="text/event-stream")
def has_password_request(observations: list[dict[str, Any]]) -> bool:
return any(
observation.get("result", {}).get("metadata", {}).get("requires_password")
for observation in observations
)
def redacted_action(action: dict[str, Any]) -> dict[str, Any]:
redacted = json.loads(json.dumps(action))
args = redacted.get("args")
if isinstance(args, dict):
args.pop("_password", None)
return redacted
@app.post("/v1/tasks/{task_id}/password/stream")
async def password_task_stream(task_id: str, body: PasswordRequest) -> StreamingResponse:
task = await task_store.get_task(task_id)
if task is None:
raise HTTPException(status_code=404, detail="Task not found")
approval = await approvals.get(body.approval_id)
if approval is None or approval.task_id != task_id:
raise HTTPException(status_code=404, detail="Approval not found for task")
if approval.decision is None:
raise HTTPException(status_code=409, detail="Approval is still pending")
conversation_id = await conversations.get_conversation_id_for_task(task_id)
async def generator():
reasoning_parts: list[str] = []
content_parts: list[str] = []
try:
await task_store.update_status(task_id, "running")
continued_event = await event_store.append(
task_id,
"task_password_submitted",
{"approval_id": body.approval_id},
)
tool_observation = await runtime._run_approved_or_denied_action(
task_id,
approval.normalized_action,
approval.decision,
password=body.password,
)
messages = runtime.context_builder.build_basic_messages(task)
tool_observations = [tool_observation]
if not has_password_request(tool_observations):
tool_observations = await runtime._run_action_loop(
task_id,
messages,
task.workspace,
initial_observations=tool_observations,
)
async for tool_event in emit_tool_events(task_id, continued_event.sequence):
yield tool_event
if has_password_request(tool_observations):
await task_store.update_status(task_id, "waiting_for_password")
password_event = await event_store.append(
task_id,
"tool_password_requested",
{
"approval_id": body.approval_id,
"tool": approval.normalized_action.get("tool"),
"action": redacted_action(approval.normalized_action),
"reason": "Sudo password is required to continue.",
},
)
yield sse("tool_password_requested", password_event.model_dump())
yield sse(
"done",
{
"task_id": task_id,
"conversation_id": conversation_id,
"status": "waiting_for_password",
"final_response": "Waiting for sudo password.",
"reasoning_content": None,
},
)
return
messages = [
*messages,
{
"role": "user",
"content": "tool_observations:\n"
+ json.dumps(tool_observations, ensure_ascii=False, indent=2),
},
]
await event_store.append(task_id, "model_call_started", {"role": "thinker"})
async for chunk in model_client.stream_chat("thinker", messages):
delta = str(chunk.get("delta") or "")
if chunk.get("type") == "reasoning_delta":
reasoning_parts.append(delta)
yield sse("reasoning_delta", {"task_id": task_id, "delta": delta})
elif chunk.get("type") == "content_delta":
content_parts.append(delta)
yield sse("content_delta", {"task_id": task_id, "delta": delta})
content = "".join(content_parts)
reasoning_content = "".join(reasoning_parts) or None
await event_store.append(
task_id,
"cognition_response",
{
"role": "thinker",
"content": content,
"reasoning_content": reasoning_content,
},
)
await event_store.append(
task_id,
"model_call_finished",
{
"role": "thinker",
"model": model_client.get_role_config("thinker").model,
},
)
await task_store.complete_task(task_id, content)
if conversation_id:
await conversations.add_message(
conversation_id,
"assistant",
content,
reasoning_content=reasoning_content,
task_id=task_id,
status="completed",
)
await event_store.append(
task_id,
"task_completed",
{
"final_response": content,
"reasoning_content": reasoning_content,
},
)
yield sse(
"done",
{
"task_id": task_id,
"conversation_id": conversation_id,
"status": "completed",
"final_response": content,
"reasoning_content": reasoning_content,
},
)
except Exception as exc:
await task_store.fail_task(task_id, str(exc))
await event_store.append(task_id, "task_failed", {"error": str(exc)})
yield sse(
"error",
{
"task_id": task_id,
"status": "failed",
"error": str(exc),
},
)
return StreamingResponse(generator(), media_type="text/event-stream")
@app.post("/v1/tasks/{task_id}/cancel") @app.post("/v1/tasks/{task_id}/cancel")
async def cancel_task(task_id: str) -> dict[str, str]: async def cancel_task(task_id: str) -> dict[str, str]:
await task_store.cancel_task(task_id) await task_store.cancel_task(task_id)

View File

@ -248,7 +248,11 @@ class RuntimeLoop:
) )
async def _run_approved_or_denied_action( async def _run_approved_or_denied_action(
self, task_id: str, action: dict[str, Any], decision: str self,
task_id: str,
action: dict[str, Any],
decision: str,
password: str | None = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
tool_name = str(action.get("tool", "")) tool_name = str(action.get("tool", ""))
index = await self._approval_action_index(task_id, action) index = await self._approval_action_index(task_id, action)
@ -260,7 +264,7 @@ class RuntimeLoop:
) )
else: else:
gateway = ToolGateway.default((await self.task_store.get_task(task_id)).workspace or ".") gateway = ToolGateway.default((await self.task_store.get_task(task_id)).workspace or ".")
result = await gateway.run_action(action, approved=True) result = await gateway.run_action(action, approved=True, password=password)
result_payload = result.model_dump() result_payload = result.model_dump()
await self.event_store.append( await self.event_store.append(

View File

@ -4,7 +4,6 @@ from duck_core.tools.base import Tool, ToolResult
from duck_core.tools.file_read import FileReadTool from duck_core.tools.file_read import FileReadTool
from duck_core.tools.file_write import FileWriteTool from duck_core.tools.file_write import FileWriteTool
from duck_core.tools.list_dir import ListDirTool from duck_core.tools.list_dir import ListDirTool
from duck_core.tools.os_update_check import OsUpdateCheckTool
from duck_core.tools.search_files import SearchFilesTool from duck_core.tools.search_files import SearchFilesTool
from duck_core.tools.shell_exec_safe import ShellExecSafeTool from duck_core.tools.shell_exec_safe import ShellExecSafeTool
@ -21,12 +20,13 @@ class ToolGateway:
FileWriteTool(workspace), FileWriteTool(workspace),
ListDirTool(workspace), ListDirTool(workspace),
SearchFilesTool(workspace), SearchFilesTool(workspace),
OsUpdateCheckTool(workspace),
ShellExecSafeTool(workspace), ShellExecSafeTool(workspace),
] ]
) )
async def run_action(self, action: dict[str, Any], approved: bool = False) -> ToolResult: async def run_action(
self, action: dict[str, Any], approved: bool = False, password: str | None = None
) -> ToolResult:
tool_name = str(action.get("tool", "")) tool_name = str(action.get("tool", ""))
tool = self.tools.get(tool_name) tool = self.tools.get(tool_name)
if tool is None: if tool is None:
@ -36,4 +36,6 @@ class ToolGateway:
return ToolResult(ok=False, error="Tool args must be an object") return ToolResult(ok=False, error="Tool args must be an object")
if approved: if approved:
args = {**args, "_approved": True} args = {**args, "_approved": True}
if password is not None:
args = {**args, "_password": password}
return await tool.run(args) return await tool.run(args)

View File

@ -1,58 +0,0 @@
import shutil
import subprocess
from typing import Any
from duck_core.tools.base import ToolResult
class OsUpdateCheckTool:
name = "os_update_check"
risk_level = "low"
def __init__(self, workspace: str, timeout_seconds: int = 60, max_lines: int = 300):
self.workspace = workspace
self.timeout_seconds = timeout_seconds
self.max_lines = max_lines
async def run(self, args: dict[str, Any]) -> ToolResult:
if shutil.which("apt"):
return self._run_apt()
return ToolResult(
ok=False,
error="No supported package manager found for update checks.",
metadata={"supported_package_managers": ["apt"]},
)
def _run_apt(self) -> ToolResult:
try:
completed = subprocess.run(
["apt", "list", "--upgradable"],
cwd=self.workspace,
text=True,
capture_output=True,
timeout=self.timeout_seconds,
check=False,
)
except subprocess.SubprocessError as exc:
return ToolResult(ok=False, error=str(exc), metadata={"package_manager": "apt"})
lines = completed.stdout.splitlines()
package_lines = [line for line in lines if "/" in line and "upgradable from:" in line]
output_lines = lines[: self.max_lines]
truncated = len(lines) > self.max_lines
if truncated:
output_lines.append(f"... truncated after {self.max_lines} lines")
return ToolResult(
ok=completed.returncode == 0,
output="\n".join(output_lines),
error=completed.stderr or None,
metadata={
"package_manager": "apt",
"command": "apt list --upgradable",
"returncode": completed.returncode,
"upgradable_count": len(package_lines),
"refreshed_cache": False,
"truncated": truncated,
},
)

View File

@ -13,6 +13,8 @@ ALLOWLIST = {
"tail", "tail",
"grep", "grep",
"find", "find",
"apt list",
"apt-cache policy",
"pytest", "pytest",
"python -m pytest", "python -m pytest",
"python3 -m pytest", "python3 -m pytest",
@ -23,7 +25,6 @@ ALLOWLIST = {
BLOCKLIST = { BLOCKLIST = {
"rm", "rm",
"sudo",
"su", "su",
"dd", "dd",
"mkfs", "mkfs",
@ -58,17 +59,27 @@ class ShellExecSafeTool:
async def run(self, args: dict[str, Any]) -> ToolResult: async def run(self, args: dict[str, Any]) -> ToolResult:
command = str(args.get("command", "")).strip() command = str(args.get("command", "")).strip()
approved = bool(args.get("_approved")) approved = bool(args.get("_approved"))
password = args.get("_password")
allowed, reason, blocked = self._is_allowed(command, approved=approved) allowed, reason, blocked = self._is_allowed(command, approved=approved)
if not allowed: if not allowed:
metadata = {"blocked": True} if blocked else {"requires_approval": True} metadata = {"blocked": True} if blocked else {"requires_approval": True}
return ToolResult(ok=False, error=reason, metadata=metadata) return ToolResult(ok=False, error=reason, metadata=metadata)
if self._is_sudo_command(command) and not password:
return ToolResult(
ok=False,
error="Sudo password is required to run this command.",
metadata={"requires_password": True},
)
run_command = self._sudo_stdin_command(command) if self._is_sudo_command(command) else command
input_text = f"{password}\n" if self._is_sudo_command(command) else None
try: try:
completed = subprocess.run( completed = subprocess.run(
command, run_command,
cwd=self.workspace, cwd=self.workspace,
shell=True, shell=True,
text=True, text=True,
capture_output=True, capture_output=True,
input=input_text,
timeout=self.timeout_seconds, timeout=self.timeout_seconds,
check=False, check=False,
) )
@ -93,6 +104,8 @@ class ShellExecSafeTool:
return False, f"Command is blocked: {blocked}", True return False, f"Command is blocked: {blocked}", True
if approved: if approved:
return True, None, False return True, None, False
if self._is_sudo_command(command):
return False, "Sudo command requires approval.", False
prefix1 = parts[0] if parts else "" prefix1 = parts[0] if parts else ""
prefix2 = " ".join(parts[:2]) prefix2 = " ".join(parts[:2])
prefix3 = " ".join(parts[:3]) prefix3 = " ".join(parts[:3])
@ -107,3 +120,14 @@ class ShellExecSafeTool:
if " " in lowered_blocked or "|" in lowered_blocked: if " " in lowered_blocked or "|" in lowered_blocked:
return lowered_command.startswith(lowered_blocked) or lowered_blocked in lowered_command return lowered_command.startswith(lowered_blocked) or lowered_blocked in lowered_command
return bool(parts) and parts[0].lower() == lowered_blocked return bool(parts) and parts[0].lower() == lowered_blocked
def _is_sudo_command(self, command: str) -> bool:
try:
parts = shlex.split(command)
except ValueError:
return False
return bool(parts) and parts[0] == "sudo"
def _sudo_stdin_command(self, command: str) -> str:
parts = shlex.split(command)
return shlex.join(["sudo", "-S", "-p", "", *parts[1:]])

View File

@ -145,7 +145,6 @@ function formatToolCommand(tool, args) {
if (tool === "file_write") return `$ file_write ${args.path || ""}`.trim(); if (tool === "file_write") return `$ file_write ${args.path || ""}`.trim();
if (tool === "list_dir") return `$ list_dir ${args.path || "."}`.trim(); if (tool === "list_dir") return `$ list_dir ${args.path || "."}`.trim();
if (tool === "search_files") return `$ search_files ${args.query || ""}`.trim(); if (tool === "search_files") return `$ search_files ${args.query || ""}`.trim();
if (tool === "os_update_check") return "$ os_update_check";
return `$ ${tool || "tool"}`; return `$ ${tool || "tool"}`;
} }
@ -213,6 +212,41 @@ function appendApprovalTerminal(article, eventPayload) {
terminal?.append(createInlineApprovalActions(payload.approval_id, command)); terminal?.append(createInlineApprovalActions(payload.approval_id, command));
} }
function appendPasswordPrompt(article, eventPayload) {
const payload = eventPayload.payload || eventPayload;
const terminal = article?.querySelector(".tool-terminal.is-error") || article?.querySelector(".tool-terminal");
if (!terminal) return;
const body = terminal.querySelector(".tool-terminal-body");
const status = terminal.querySelector(".tool-terminal-status");
terminal.classList.add("is-waiting");
terminal.dataset.approvalId = payload.approval_id || terminal.dataset.approvalId || "";
terminal.dataset.taskId = eventPayload.task_id || terminal.dataset.taskId || "";
if (status) status.textContent = "password";
if (body) {
body.textContent = [
body.textContent.trim(),
"",
payload.reason || "Sudo password is required to continue.",
].filter(Boolean).join("\n");
}
if (!terminal.querySelector(".tool-password-form")) terminal.append(createPasswordForm());
}
function createPasswordForm() {
const form = document.createElement("form");
form.className = "tool-password-form";
const input = document.createElement("input");
input.type = "password";
input.autocomplete = "current-password";
input.placeholder = "Sudo password";
input.required = true;
const button = document.createElement("button");
button.type = "submit";
button.textContent = "Continue";
form.append(input, button);
return form;
}
function createInlineApprovalActions(approvalId, command) { function createInlineApprovalActions(approvalId, command) {
const actions = document.createElement("div"); const actions = document.createElement("div");
actions.className = "tool-approval-actions"; actions.className = "tool-approval-actions";
@ -267,6 +301,22 @@ async function resolveInlineApproval(button) {
} }
} }
async function submitToolPassword(form) {
const terminal = form.closest(".tool-terminal");
const article = terminal?.closest(".message");
const taskId = terminal?.dataset.taskId;
const approvalId = terminal?.dataset.approvalId;
const input = form.querySelector("input");
const password = input?.value || "";
if (!terminal || !article || !taskId || !approvalId || !password) return;
form.querySelectorAll("input, button").forEach((item) => {
item.disabled = true;
});
await continueAfterPassword(article, taskId, approvalId, password);
input.value = "";
form.remove();
}
function humanApprovalDecision(action) { function humanApprovalDecision(action) {
if (action === "allow_once") return "разрешено один раз"; if (action === "allow_once") return "разрешено один раз";
if (action === "allow_forever") return "разрешено навсегда"; if (action === "allow_forever") return "разрешено навсегда";
@ -423,6 +473,11 @@ async function handleAssistantStreamEvent(pending, name, data, context) {
appendApprovalTerminal(pending, data); appendApprovalTerminal(pending, data);
return; return;
} }
if (name === "tool_password_requested") {
pending.querySelector(".message-meta span").textContent = "password";
appendPasswordPrompt(pending, data);
return;
}
if (name === "content_delta") { if (name === "content_delta") {
if (!context.contentStarted) { if (!context.contentStarted) {
context.contentStarted = true; context.contentStarted = true;
@ -473,6 +528,30 @@ async function continueAfterInlineApproval(article, taskId, approvalId) {
} }
} }
async function continueAfterPassword(article, taskId, approvalId, password) {
if (!article || state.running) return;
state.running = true;
document.querySelector("#run").disabled = true;
setStatus("#task-status", taskId, "warn");
const context = {taskId, contentStarted: false};
try {
await streamSse(
`/v1/tasks/${encodeURIComponent(taskId)}/password/stream`,
{approval_id: approvalId, password},
async ({name, data}) => handleAssistantStreamEvent(article, name, data, context),
);
} catch (error) {
setMessagePending(article, error.message);
article.querySelector(".message-meta span").textContent = "failed";
setStatus("#task-status", "failed", "bad");
await refreshEvents(taskId);
} finally {
state.running = false;
document.querySelector("#run").disabled = false;
document.querySelector("#message")?.focus();
}
}
async function refreshConversations() { async function refreshConversations() {
const list = document.querySelector("#conversation-list"); const list = document.querySelector("#conversation-list");
if (!list) return; if (!list) return;
@ -611,6 +690,12 @@ function bindChat() {
const button = event.target.closest(".message-reasoning-toggle"); const button = event.target.closest(".message-reasoning-toggle");
if (button) toggleInlineReasoning(button); if (button) toggleInlineReasoning(button);
}); });
document.querySelector("#messages")?.addEventListener("submit", (event) => {
const form = event.target.closest(".tool-password-form");
if (!form) return;
event.preventDefault();
submitToolPassword(form).catch(console.error);
});
document.querySelector("#debug")?.addEventListener("change", (event) => { document.querySelector("#debug")?.addEventListener("change", (event) => {
document.querySelector("#debug-panel").hidden = !event.target.checked; document.querySelector("#debug-panel").hidden = !event.target.checked;
}); });

View File

@ -635,6 +635,38 @@ dd {
opacity: 0.65; opacity: 0.65;
} }
.tool-password-form {
display: grid;
grid-template-columns: minmax(0, 1fr) auto;
gap: 8px;
padding: 10px 12px 12px;
border-top: 1px solid #1e293b;
background: #111827;
}
.tool-password-form input {
min-width: 0;
border-color: #334155;
background: #0f172a;
color: #f8fafc;
}
.tool-password-form button {
border: 0;
border-radius: 7px;
padding: 8px 10px;
background: #2563eb;
color: #ffffff;
font-size: 12px;
font-weight: 800;
}
.tool-password-form input:disabled,
.tool-password-form button:disabled {
cursor: wait;
opacity: 0.65;
}
.message-meta { .message-meta {
display: flex; display: flex;
align-items: center; align-items: center;

View File

@ -12,9 +12,6 @@ Available tools:
Args: {"path": "."} Args: {"path": "."}
- search_files: search text inside files in the current workspace. - search_files: search text inside files in the current workspace.
Args: {"query": "text to find", "path": ".", "glob": "*.py"} Args: {"query": "text to find", "path": ".", "glob": "*.py"}
- os_update_check: check available operating system package updates without
refreshing package manager caches and without sudo.
Args: {}
- shell_exec_safe: run a safe allowlisted shell command in the current workspace. - shell_exec_safe: run a safe allowlisted shell command in the current workspace.
Args: {"command": "pwd"} Args: {"command": "pwd"}
@ -24,6 +21,9 @@ follow-up information. Return actions=[] when the observations are sufficient
for the thinker to answer. for the thinker to answer.
Use only the listed tools. Keep actions minimal and directly tied to the user's Use only the listed tools. Keep actions minimal and directly tied to the user's
request. Do not invent tool names. request. Do not invent tool names.
For user requests like "check updates in the system", use os_update_check. For user requests like "check updates in the system", use shell_exec_safe with
Do not use apt update, apt-get update, sudo, su, or package installation commands `apt list --upgradable`. Do not use apt update, apt-get update, sudo, su, or
for update checks. package installation commands for a read-only update check.
For commands that genuinely require elevated privileges, use the real system
command with `sudo` through shell_exec_safe. DuckLM will ask the user for
approval and then for the sudo password when the command needs it.

View File

@ -177,3 +177,149 @@ def test_continue_stream_executes_approved_tool_and_streams_answer(tmp_path, mon
conversation = client.get(f"/v1/conversations/{conversation_id}").json() conversation = client.get(f"/v1/conversations/{conversation_id}").json()
assert conversation["messages"][-1]["content"] == "continued after approval" assert conversation["messages"][-1]["content"] == "continued after approval"
assert conversation["messages"][-1]["status"] == "completed" assert conversation["messages"][-1]["status"] == "completed"
def test_continue_stream_requests_password_for_approved_sudo(tmp_path, monkeypatch):
monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
assert role == "action"
actions = []
if not any("tool_observations" in message["content"] for message in messages):
actions = [
{
"tool": "shell_exec_safe",
"args": {"command": "sudo apt update"},
"reason": "Check updates with root privileges",
}
]
return ModelResponse(
role=role,
model="local-main",
content=json.dumps(
{
"kind": "action_directive",
"intent": "run sudo command",
"risk_level": "medium",
"actions": actions,
}
),
reasoning_content=None,
raw={},
latency_ms=1.0,
)
monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
client = TestClient(create_app())
with client.stream(
"POST",
"/v1/chat/stream",
json={"message": "run sudo update", "workspace": str(tmp_path), "debug": True},
) as response:
initial_body = "".join(response.iter_text())
task_id = re.search(r'"task_id"\s*:\s*"([^"]+)"', initial_body).group(1)
approval = next(
item for item in client.get("/v1/approvals/pending").json()
if item["task_id"] == task_id
)
client.post(f"/v1/approvals/{approval['approval_id']}/allow_once")
with client.stream(
"POST",
f"/v1/tasks/{task_id}/continue/stream",
json={"approval_id": approval["approval_id"]},
) as response:
body = "".join(response.iter_text())
assert response.status_code == 200
assert "event: tool_password_requested" in body
assert "waiting_for_password" in body
assert "password" in body.lower()
def test_password_stream_runs_sudo_with_password_and_streams_answer(tmp_path, monkeypatch):
monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
calls = []
class Completed:
returncode = 0
stdout = "sudo updated\n"
stderr = ""
def fake_run(command, **kwargs):
calls.append((command, kwargs))
return Completed()
async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
if role == "action":
actions = []
if not any("tool_observations" in message["content"] for message in messages):
actions = [
{
"tool": "shell_exec_safe",
"args": {"command": "sudo apt update"},
"reason": "Check updates with root privileges",
}
]
return ModelResponse(
role=role,
model="local-main",
content=json.dumps(
{
"kind": "action_directive",
"intent": "run sudo command",
"risk_level": "medium",
"actions": actions,
}
),
reasoning_content=None,
raw={},
latency_ms=1.0,
)
raise AssertionError("non-stream thinker should not be used")
async def fake_stream_chat(self, role, messages):
assert role == "thinker"
observation = next(message for message in messages if "tool_observations" in message["content"])
assert "sudo updated" in observation["content"]
assert "secret" not in observation["content"]
yield {"type": "content_delta", "delta": "sudo command completed"}
monkeypatch.setattr("duck_core.tools.shell_exec_safe.subprocess.run", fake_run)
monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat)
client = TestClient(create_app())
with client.stream(
"POST",
"/v1/chat/stream",
json={"message": "run sudo update", "workspace": str(tmp_path), "debug": True},
) as response:
initial_body = "".join(response.iter_text())
task_id = re.search(r'"task_id"\s*:\s*"([^"]+)"', initial_body).group(1)
approval = next(
item for item in client.get("/v1/approvals/pending").json()
if item["task_id"] == task_id
)
client.post(f"/v1/approvals/{approval['approval_id']}/allow_once")
with client.stream(
"POST",
f"/v1/tasks/{task_id}/continue/stream",
json={"approval_id": approval["approval_id"]},
) as response:
_ = "".join(response.iter_text())
with client.stream(
"POST",
f"/v1/tasks/{task_id}/password/stream",
json={"approval_id": approval["approval_id"], "password": "secret"},
) as response:
body = "".join(response.iter_text())
assert response.status_code == 200
assert calls[0][1]["input"] == "secret\n"
assert "event: tool_call_finished" in body
assert "event: content_delta" in body
assert "sudo command completed" in body
assert "secret" not in body

View File

@ -106,7 +106,13 @@ class FakeUpdateCheckModelClient:
if role == "action": if role == "action":
actions = [] actions = []
if not any("tool_observations" in message["content"] for message in messages): if not any("tool_observations" in message["content"] for message in messages):
actions = [{"tool": "os_update_check", "args": {}, "reason": "Check OS updates"}] actions = [
{
"tool": "shell_exec_safe",
"args": {"command": "apt list --upgradable"},
"reason": "Check OS updates",
}
]
return ModelResponse( return ModelResponse(
role=role, role=role,
model="local-main", model="local-main",
@ -124,7 +130,7 @@ class FakeUpdateCheckModelClient:
) )
assert role == "thinker" assert role == "thinker"
observation_text = "\n".join(message["content"] for message in messages) observation_text = "\n".join(message["content"] for message in messages)
assert "os_update_check" in observation_text assert "apt list --upgradable" in observation_text
assert "requires_approval" not in observation_text assert "requires_approval" not in observation_text
return ModelResponse( return ModelResponse(
role=role, role=role,
@ -191,7 +197,7 @@ async def test_runtime_checks_system_updates_without_approval_loop(tmp_path):
assert not any(event.event_type == "tool_approval_requested" for event in events) assert not any(event.event_type == "tool_approval_requested" for event in events)
assert any( assert any(
event.event_type == "tool_call_finished" event.event_type == "tool_call_finished"
and event.payload["tool"] == "os_update_check" and event.payload["tool"] == "shell_exec_safe"
for event in events for event in events
) )

View File

@ -3,7 +3,6 @@ import pytest
from duck_core.tools.file_read import FileReadTool from duck_core.tools.file_read import FileReadTool
from duck_core.tools.file_write import FileWriteTool from duck_core.tools.file_write import FileWriteTool
from duck_core.tools.gateway import ToolGateway from duck_core.tools.gateway import ToolGateway
from duck_core.tools.os_update_check import OsUpdateCheckTool
from duck_core.tools.shell_exec_safe import ShellExecSafeTool from duck_core.tools.shell_exec_safe import ShellExecSafeTool
@ -34,8 +33,45 @@ async def test_shell_tool_blocks_dangerous_commands(tmp_path):
assert blocked.metadata.get("requires_approval") is not True assert blocked.metadata.get("requires_approval") is not True
assert blocked.metadata["blocked"] is True assert blocked.metadata["blocked"] is True
assert sudo.ok is False assert sudo.ok is False
assert sudo.metadata.get("requires_approval") is not True assert sudo.metadata["requires_approval"] is True
assert sudo.metadata["blocked"] is True
@pytest.mark.asyncio
async def test_shell_tool_requests_password_for_approved_sudo(tmp_path):
shell = ShellExecSafeTool(str(tmp_path))
result = await shell.run({"command": "sudo apt update", "_approved": True})
assert result.ok is False
assert result.metadata["requires_password"] is True
assert "password" in result.error.lower()
@pytest.mark.asyncio
async def test_shell_tool_passes_password_to_sudo_stdin(monkeypatch, tmp_path):
calls = []
class Completed:
returncode = 0
stdout = "updated\n"
stderr = ""
def fake_run(command, **kwargs):
calls.append((command, kwargs))
return Completed()
monkeypatch.setattr("duck_core.tools.shell_exec_safe.subprocess.run", fake_run)
shell = ShellExecSafeTool(str(tmp_path))
result = await shell.run(
{"command": "sudo apt update", "_approved": True, "_password": "secret"}
)
command, kwargs = calls[0]
assert result.ok is True
assert "sudo -S -p '' apt update" == command
assert kwargs["input"] == "secret\n"
assert "secret" not in str(result.model_dump())
@pytest.mark.asyncio @pytest.mark.asyncio
@ -86,23 +122,20 @@ async def test_tool_gateway_searches_file_contents(tmp_path):
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_os_update_check_reports_apt_upgradable_packages(monkeypatch, tmp_path): async def test_shell_tool_allows_read_only_apt_update_check(monkeypatch, tmp_path):
class Completed: class Completed:
returncode = 0 returncode = 0
stdout = "Listing...\\nbootlogd/stable 3.14-4 amd64 [upgradable from: 3.06-4]\\n" stdout = "Listing...\\nbootlogd/stable 3.14-4 amd64 [upgradable from: 3.06-4]\\n"
stderr = "WARNING: apt does not have a stable CLI interface.\\n" stderr = "WARNING: apt does not have a stable CLI interface.\\n"
monkeypatch.setattr("duck_core.tools.os_update_check.shutil.which", lambda name: "/usr/bin/apt")
monkeypatch.setattr( monkeypatch.setattr(
"duck_core.tools.os_update_check.subprocess.run", "duck_core.tools.shell_exec_safe.subprocess.run",
lambda *args, **kwargs: Completed(), lambda *args, **kwargs: Completed(),
) )
tool = OsUpdateCheckTool(str(tmp_path)) shell = ShellExecSafeTool(str(tmp_path))
result = await tool.run({}) result = await shell.run({"command": "apt list --upgradable"})
assert result.ok is True assert result.ok is True
assert "bootlogd" in result.output assert "bootlogd" in result.output
assert result.metadata["package_manager"] == "apt" assert result.metadata["command"] == "apt list --upgradable"
assert result.metadata["upgradable_count"] == 1
assert result.metadata["refreshed_cache"] is False