490 lines
18 KiB
Python
490 lines
18 KiB
Python
import json
|
|
from pathlib import Path
|
|
|
|
from app.core.contracts import ExecutionDirective, UserTask
|
|
from app.core.contracts import PermissionDecision
|
|
from app.core.contracts import ToolResult
|
|
from app.events.event_types import TOOL_OUTPUT_CHUNK
|
|
from app.runtime.runtime_controller import RuntimeController
|
|
from app.tools.sandbox import ToolSandbox
|
|
|
|
|
|
def _write_config_tree(base_dir: Path) -> None:
|
|
(base_dir / "config").mkdir()
|
|
(base_dir / "data" / "events").mkdir(parents=True, exist_ok=True)
|
|
(base_dir / "data" / "state").mkdir(parents=True, exist_ok=True)
|
|
(base_dir / "data" / "permissions").mkdir(parents=True, exist_ok=True)
|
|
(base_dir / "models").mkdir(exist_ok=True)
|
|
|
|
configs = {
|
|
"models.json": {
|
|
"orchestrator_path": "models/llama.gguf",
|
|
"coder_path": "models/xcoder.gguf",
|
|
"critic_path": "models/gemma.gguf",
|
|
"embeddings_path": "models/all-MiniLM-L6-v2",
|
|
"inference": {},
|
|
},
|
|
"prompts.json": {
|
|
"orchestration_prompt": "",
|
|
"planning_prompt": "",
|
|
"coder_prompt": "",
|
|
"critic_prompt": "",
|
|
},
|
|
"permissions.json": {
|
|
"settings": {
|
|
"allow_caching": True,
|
|
"cache_file": str(base_dir / "data/runtime/allowed_commands.json"),
|
|
"normalize_commands": True,
|
|
"split_chained": True
|
|
},
|
|
"command_categories": {
|
|
"hard_stop": {
|
|
"commands": ["rm -rf /", "rm -rf /*", "dd if=/dev/zero of=/dev/sd*"]
|
|
},
|
|
"no_always": {
|
|
"allow_once": True,
|
|
"allow_always": False,
|
|
"commands": [
|
|
"rm -rf *", "rm -rf .*", "shutdown", "reboot", "halt",
|
|
"apt", "apt-get", "dpkg", "yum", "dnf", "pacman",
|
|
"systemctl stop", "systemctl start", "systemctl restart",
|
|
"service stop", "service start", "killall", "pkill -9"
|
|
]
|
|
},
|
|
"normal": {
|
|
"allow_once": True,
|
|
"allow_always": True,
|
|
"commands": ["shell_exec", "file_write"]
|
|
}
|
|
},
|
|
"path_settings": {
|
|
"allow_read_outside": True,
|
|
"allow_write_paths": [str(base_dir), "/tmp"],
|
|
"require_confirmation_for_write": True,
|
|
"require_confirmation_for_shell": True
|
|
}
|
|
},
|
|
"runtime.json": {
|
|
"step_timeout_ms": 5000,
|
|
"task_timeout_ms": 30000,
|
|
"planner_retry_limit": 1,
|
|
"tool_retry_limit": 0,
|
|
"replan_limit": 0,
|
|
"max_execution_steps": 5,
|
|
"retrieval_top_k": 3,
|
|
"memory_thresholds": {},
|
|
"critic_fallback_policy": "continue_without_critic",
|
|
"checkpoint_policy": {"save_on_transition": True},
|
|
"event_retention_policy": {"keep_all": True},
|
|
"streaming_settings": {"enabled": True},
|
|
},
|
|
}
|
|
for name, payload in configs.items():
|
|
(base_dir / "config" / name).write_text(json.dumps(payload), encoding="utf-8")
|
|
|
|
|
|
def test_file_write_and_read_tool_flow(tmp_path: Path) -> None:
|
|
_write_config_tree(tmp_path)
|
|
controller = RuntimeController(base_dir=tmp_path)
|
|
target = tmp_path / "notes" / "test.txt"
|
|
|
|
write_result = controller.handle_task(
|
|
UserTask(
|
|
input="write a file",
|
|
context={
|
|
"requested_tool": "file_write",
|
|
"tool_args": {"path": str(target), "content": "hello from ducklm"},
|
|
},
|
|
)
|
|
)
|
|
assert write_result["status"] == "completed"
|
|
assert target.read_text(encoding="utf-8") == "hello from ducklm"
|
|
|
|
read_result = controller.handle_task(
|
|
UserTask(
|
|
input="read the file",
|
|
context={
|
|
"requested_tool": "file_read",
|
|
"tool_args": {"path": str(target)},
|
|
},
|
|
)
|
|
)
|
|
assert read_result["status"] == "completed"
|
|
assert read_result["result"]["output"] == "hello from ducklm"
|
|
|
|
|
|
def test_shell_exec_requires_permission_for_dangerous_command(tmp_path: Path) -> None:
|
|
_write_config_tree(tmp_path)
|
|
controller = RuntimeController(base_dir=tmp_path)
|
|
result = controller.handle_task(
|
|
UserTask(
|
|
input="run dangerous shell command",
|
|
context={
|
|
"requested_tool": "shell_exec",
|
|
"tool_args": {"command": "rm -rf /tmp/nonexistent"},
|
|
},
|
|
)
|
|
)
|
|
# rm -rf /tmp/nonexistent is not hard_stop (only exact "rm -rf /" is)
|
|
# but it matches "rm -rf *" in no_always category
|
|
assert result["status"] == "awaiting_permission"
|
|
assert "permission_request" in result["result"]
|
|
|
|
|
|
def test_shell_exec_allows_safe_command(tmp_path: Path) -> None:
|
|
_write_config_tree(tmp_path)
|
|
controller = RuntimeController(base_dir=tmp_path)
|
|
result = controller.handle_task(
|
|
UserTask(
|
|
input="run safe shell command",
|
|
context={
|
|
"requested_tool": "shell_exec",
|
|
"tool_args": {"command": "pwd"},
|
|
},
|
|
)
|
|
)
|
|
# Even safe commands require permission in the new permission model
|
|
assert result["status"] == "awaiting_permission"
|
|
assert "permission_request" in result["result"]
|
|
# Grant permission and verify execution
|
|
resumed = controller.resolve_permission(task_id=result["task_id"], decision="allow_once")
|
|
assert resumed["status"] == "completed"
|
|
assert str(tmp_path) in resumed["result"]["output"]
|
|
|
|
|
|
def test_shell_exec_publishes_output_chunks_before_completion(tmp_path: Path) -> None:
|
|
_write_config_tree(tmp_path)
|
|
controller = RuntimeController(base_dir=tmp_path)
|
|
perm_override = PermissionDecision(
|
|
action_type="shell_command",
|
|
pattern="printf",
|
|
decision="allow_always",
|
|
)
|
|
|
|
task = UserTask(
|
|
input="stream shell output",
|
|
context={
|
|
"requested_tool": "shell_exec",
|
|
"tool_args": {"command": "printf 'first\\n'; sleep 0.1; printf 'second\\n'"},
|
|
},
|
|
)
|
|
result = controller.execution_engine.execute(
|
|
task,
|
|
ExecutionDirective(
|
|
type="tool",
|
|
payload={
|
|
"tool": "shell_exec",
|
|
"args": {"command": "printf 'first\\n'; sleep 0.1; printf 'second\\n'"},
|
|
},
|
|
),
|
|
permission_override=perm_override,
|
|
)
|
|
|
|
events = controller.event_bus.list_for_task(task.task_id)
|
|
chunk_events = [event for event in events if event.type == TOOL_OUTPUT_CHUNK]
|
|
completed_index = next(index for index, event in enumerate(events) if event.type == "tool_completed")
|
|
first_chunk_index = next(index for index, event in enumerate(events) if event.type == TOOL_OUTPUT_CHUNK)
|
|
assert result["status"] == "completed"
|
|
assert [event.payload["chunk"] for event in chunk_events] == ["first\n", "second\n"]
|
|
assert first_chunk_index < completed_index
|
|
|
|
|
|
def test_streaming_shell_uses_idle_timeout_not_step_timeout(tmp_path: Path) -> None:
|
|
sandbox = ToolSandbox(
|
|
allowed_root=tmp_path,
|
|
timeout_ms=100,
|
|
command_timeout_ms=2000,
|
|
idle_timeout_ms=500,
|
|
)
|
|
chunks: list[str] = []
|
|
|
|
result = sandbox.run_shell(
|
|
command="printf 'first\\n'; sleep 0.2; printf 'second\\n'",
|
|
output_callback=lambda _stream, chunk: chunks.append(chunk),
|
|
)
|
|
|
|
assert result.returncode == 0
|
|
assert result.stdout == "first\nsecond\n"
|
|
assert chunks == ["first\n", "second\n"]
|
|
|
|
|
|
def test_streaming_shell_timeout_kills_child_process_group(tmp_path: Path) -> None:
|
|
marker = tmp_path / "child-survived"
|
|
sandbox = ToolSandbox(
|
|
allowed_root=tmp_path,
|
|
timeout_ms=100,
|
|
command_timeout_ms=100,
|
|
idle_timeout_ms=1000,
|
|
)
|
|
|
|
result = sandbox.run_shell(
|
|
command=f"sh -c 'sleep 1; touch {marker}'",
|
|
output_callback=lambda _stream, _chunk: None,
|
|
)
|
|
|
|
assert result.returncode == -9
|
|
assert not marker.exists()
|
|
|
|
|
|
class _RecoveryCritic:
|
|
async def generate(self, prompt: str, max_tokens: int | None = None) -> str:
|
|
return '{"action":"continue","reason":"No matches is acceptable information for this exploratory check."}'
|
|
|
|
|
|
def test_failed_shell_step_can_recover_and_continue(tmp_path: Path) -> None:
|
|
_write_config_tree(tmp_path)
|
|
controller = RuntimeController(base_dir=tmp_path)
|
|
controller.execution_engine.set_critic(_RecoveryCritic())
|
|
controller.execution_engine._recovery_limit = 1
|
|
# Bypass permission check for this test — we're testing recovery, not permissions
|
|
from app.core.contracts import PermissionDecision
|
|
perm_override = PermissionDecision(
|
|
action_type="shell_command",
|
|
pattern="grep",
|
|
decision="allow_always",
|
|
)
|
|
result = controller.execution_engine.execute(
|
|
UserTask(
|
|
input="run grep with no matches and recover",
|
|
),
|
|
ExecutionDirective(
|
|
type="plan",
|
|
payload={
|
|
"steps": [
|
|
{
|
|
"id": "1",
|
|
"tool": "shell_exec",
|
|
"args": {"command": "printf 'abc\\n' | grep definitely_missing"},
|
|
"depends_on": [],
|
|
}
|
|
]
|
|
},
|
|
),
|
|
permission_override=perm_override,
|
|
)
|
|
assert result["status"] == "completed"
|
|
failed_result = result["result"]["step_results"][0]["result"]["result"]
|
|
assert failed_result["metadata"]["exit_code"] == 1
|
|
|
|
|
|
def test_privilege_scope_failure_awaits_user_review_before_replan(tmp_path: Path) -> None:
|
|
_write_config_tree(tmp_path)
|
|
controller = RuntimeController(base_dir=tmp_path)
|
|
task = UserTask(
|
|
input="обнови систему",
|
|
context={
|
|
"requested_tool": "shell_exec",
|
|
"tool_args": {"command": "sudo apt update && apt upgrade -y"},
|
|
},
|
|
)
|
|
class FailingShellTool:
|
|
def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
|
|
return ToolResult(
|
|
tool="shell_exec",
|
|
ok=False,
|
|
output="Error: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?",
|
|
error="Command failed with exit code 100",
|
|
metadata={"exit_code": 100},
|
|
)
|
|
|
|
controller.tool_registry._tools["shell_exec"] = FailingShellTool()
|
|
|
|
initial = controller.handle_task(task)
|
|
assert initial["status"] == "awaiting_permission"
|
|
controller.resolve_permission(task_id=task.task_id, decision="allow_once")
|
|
result = controller.resolve_secret(task_id=task.task_id, secret="secret")
|
|
|
|
assert result["status"] == "awaiting_review"
|
|
assert result["result"]["review"]["diagnosis"]["type"] == "privilege_scope_error"
|
|
assert result["result"]["review"]["critic_assessment"]["classification"] == "model_planning_error"
|
|
|
|
|
|
def test_plan_pauses_on_privilege_scope_review_instead_of_completing(tmp_path: Path) -> None:
|
|
_write_config_tree(tmp_path)
|
|
controller = RuntimeController(base_dir=tmp_path)
|
|
|
|
class FailingShellTool:
|
|
def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
|
|
return ToolResult(
|
|
tool="shell_exec",
|
|
ok=False,
|
|
output="Error: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?",
|
|
error="Command failed with exit code 100",
|
|
metadata={"exit_code": 100},
|
|
)
|
|
|
|
controller.tool_registry._tools["shell_exec"] = FailingShellTool()
|
|
result = controller.execution_engine.execute(
|
|
UserTask(input="обнови систему"),
|
|
ExecutionDirective(
|
|
type="plan",
|
|
payload={
|
|
"steps": [
|
|
{
|
|
"id": "1",
|
|
"tool": "shell_exec",
|
|
"args": {"command": "sudo apt update && apt upgrade -y"},
|
|
"depends_on": [],
|
|
}
|
|
]
|
|
},
|
|
),
|
|
permission_override=PermissionDecision(
|
|
action_type="shell_command",
|
|
pattern="apt",
|
|
decision="allow_once",
|
|
),
|
|
secret_override="secret",
|
|
)
|
|
|
|
assert result["status"] == "awaiting_review"
|
|
assert result["result"]["review"]["diagnosis"]["type"] == "privilege_scope_error"
|
|
|
|
|
|
def test_sudo_auth_failure_requests_secret_retry_not_review(tmp_path: Path) -> None:
|
|
_write_config_tree(tmp_path)
|
|
controller = RuntimeController(base_dir=tmp_path)
|
|
|
|
class BadPasswordShellTool:
|
|
def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
|
|
return ToolResult(
|
|
tool="shell_exec",
|
|
ok=False,
|
|
output="Sorry, try again.\nsudo: no password was provided\nsudo: 1 incorrect password attempt\n",
|
|
error="Command failed with exit code 1",
|
|
metadata={"exit_code": 1, "sudo_auth_failed": True},
|
|
)
|
|
|
|
controller.tool_registry._tools["shell_exec"] = BadPasswordShellTool()
|
|
result = controller.execution_engine.execute(
|
|
UserTask(input="обнови систему"),
|
|
ExecutionDirective(
|
|
type="plan",
|
|
payload={
|
|
"steps": [
|
|
{
|
|
"id": "1",
|
|
"tool": "shell_exec",
|
|
"args": {"command": "sudo apt update && apt upgrade -y"},
|
|
"depends_on": [],
|
|
}
|
|
]
|
|
},
|
|
),
|
|
permission_override=PermissionDecision(
|
|
action_type="shell_command",
|
|
pattern="apt",
|
|
decision="allow_once",
|
|
),
|
|
secret_override="wrong",
|
|
)
|
|
|
|
assert result["status"] == "awaiting_input"
|
|
assert result["result"]["secret_request"]["kind"] == "sudo_password"
|
|
assert result["result"]["secret_request"]["prompt"] == "Sudo password incorrect. Try again"
|
|
assert result["result"]["attempt_failed"] is True
|
|
|
|
|
|
def test_runtime_keeps_secret_state_after_bad_sudo_password(tmp_path: Path) -> None:
|
|
_write_config_tree(tmp_path)
|
|
controller = RuntimeController(base_dir=tmp_path)
|
|
|
|
class RetryPasswordShellTool:
|
|
calls = 0
|
|
|
|
def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
|
|
self.calls += 1
|
|
if self.calls == 1:
|
|
return ToolResult(
|
|
tool="shell_exec",
|
|
ok=False,
|
|
output="Sorry, try again.\nsudo: no password was provided\nsudo: 1 incorrect password attempt\n",
|
|
error="Command failed with exit code 1",
|
|
metadata={"exit_code": 1, "sudo_auth_failed": True},
|
|
)
|
|
return ToolResult(
|
|
tool="shell_exec",
|
|
ok=True,
|
|
output="root\n",
|
|
metadata={"exit_code": 0},
|
|
)
|
|
|
|
controller.tool_registry._tools["shell_exec"] = RetryPasswordShellTool()
|
|
task = UserTask(
|
|
input="кто root",
|
|
context={
|
|
"requested_tool": "shell_exec",
|
|
"tool_args": {"command": "sudo whoami"},
|
|
},
|
|
)
|
|
initial = controller.handle_task(task)
|
|
assert initial["status"] == "awaiting_permission"
|
|
allowed = controller.resolve_permission(task_id=task.task_id, decision="allow_once")
|
|
assert allowed["status"] == "awaiting_input"
|
|
|
|
retry = controller.resolve_secret(task_id=task.task_id, secret="wrong")
|
|
assert retry["status"] == "awaiting_input"
|
|
assert retry["result"]["attempt_failed"] is True
|
|
|
|
final = controller.resolve_secret(task_id=task.task_id, secret="correct")
|
|
assert final["status"] == "completed"
|
|
assert final["result"]["output"] == "root\n"
|
|
|
|
|
|
def test_permission_resolution_can_resume_task(tmp_path: Path) -> None:
|
|
_write_config_tree(tmp_path)
|
|
controller = RuntimeController(base_dir=tmp_path)
|
|
initial = controller.handle_task(
|
|
UserTask(
|
|
input="запусти sudo apt update",
|
|
)
|
|
)
|
|
assert initial["status"] == "awaiting_permission"
|
|
resumed = controller.resolve_permission(task_id=initial["task_id"], decision="deny")
|
|
assert resumed["status"] == "failed"
|
|
assert resumed["result"]["error"] == "Permission denied by user."
|
|
|
|
|
|
def test_sudo_permission_resolution_requests_secret_input(tmp_path: Path) -> None:
|
|
_write_config_tree(tmp_path)
|
|
controller = RuntimeController(base_dir=tmp_path)
|
|
initial = controller.handle_task(UserTask(input="запусти sudo apt update"))
|
|
assert initial["status"] == "awaiting_permission"
|
|
resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once")
|
|
assert resumed["status"] == "awaiting_input"
|
|
assert resumed["result"]["secret_request"]["kind"] == "sudo_password"
|
|
|
|
|
|
def test_implicit_sudo_command_requests_password(tmp_path: Path) -> None:
|
|
"""Commands like 'apt list --upgradable' that require sudo but don't start with 'sudo'
|
|
should also trigger password request after permission is granted."""
|
|
_write_config_tree(tmp_path)
|
|
controller = RuntimeController(base_dir=tmp_path)
|
|
# apt list --upgradable requires root but doesn't start with 'sudo'
|
|
initial = controller.handle_task(
|
|
UserTask(
|
|
input="проверь обновления",
|
|
context={
|
|
"requested_tool": "shell_exec",
|
|
"tool_args": {"command": "apt list --upgradable"},
|
|
},
|
|
)
|
|
)
|
|
assert initial["status"] == "awaiting_permission"
|
|
# Grant permission — should request sudo password since apt requires root
|
|
resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once")
|
|
assert resumed["status"] == "awaiting_input"
|
|
assert resumed["result"]["secret_request"]["kind"] == "sudo_password"
|
|
|
|
|
|
def test_secret_resolution_continues_after_pending_secret_saved(tmp_path: Path) -> None:
|
|
_write_config_tree(tmp_path)
|
|
controller = RuntimeController(base_dir=tmp_path)
|
|
initial = controller.handle_task(UserTask(input="запусти sudo apt update"))
|
|
assert initial["status"] == "awaiting_permission"
|
|
resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once")
|
|
assert resumed["status"] == "awaiting_input"
|
|
final = controller.resolve_secret(task_id=initial["task_id"], secret="wrongpass")
|
|
assert final["status"] in {"completed", "failed", "awaiting_input"}
|
|
assert "error" in final["result"] or "output" in final["result"]
|