import json from pathlib import Path from app.core.contracts import ExecutionDirective, UserTask from app.core.contracts import PermissionDecision from app.core.contracts import ToolResult from app.events.event_types import TOOL_OUTPUT_CHUNK from app.runtime.runtime_controller import RuntimeController from app.tools.sandbox import ToolSandbox def _write_config_tree(base_dir: Path) -> None: (base_dir / "config").mkdir() (base_dir / "data" / "events").mkdir(parents=True, exist_ok=True) (base_dir / "data" / "state").mkdir(parents=True, exist_ok=True) (base_dir / "data" / "permissions").mkdir(parents=True, exist_ok=True) (base_dir / "models").mkdir(exist_ok=True) configs = { "models.json": { "orchestrator_path": "models/llama.gguf", "coder_path": "models/xcoder.gguf", "critic_path": "models/gemma.gguf", "embeddings_path": "models/all-MiniLM-L6-v2", "inference": {}, }, "prompts.json": { "orchestration_prompt": "", "planning_prompt": "", "coder_prompt": "", "critic_prompt": "", }, "permissions.json": { "settings": { "allow_caching": True, "cache_file": str(base_dir / "data/runtime/allowed_commands.json"), "normalize_commands": True, "split_chained": True }, "command_categories": { "hard_stop": { "commands": ["rm -rf /", "rm -rf /*", "dd if=/dev/zero of=/dev/sd*"] }, "no_always": { "allow_once": True, "allow_always": False, "commands": [ "rm -rf *", "rm -rf .*", "shutdown", "reboot", "halt", "apt", "apt-get", "dpkg", "yum", "dnf", "pacman", "systemctl stop", "systemctl start", "systemctl restart", "service stop", "service start", "killall", "pkill -9" ] }, "normal": { "allow_once": True, "allow_always": True, "commands": ["shell_exec", "file_write"] } }, "path_settings": { "allow_read_outside": True, "allow_write_paths": [str(base_dir), "/tmp"], "require_confirmation_for_write": True, "require_confirmation_for_shell": True } }, "runtime.json": { "step_timeout_ms": 5000, "task_timeout_ms": 30000, "planner_retry_limit": 1, "tool_retry_limit": 0, "replan_limit": 0, "max_execution_steps": 5, "retrieval_top_k": 3, "memory_thresholds": {}, "critic_fallback_policy": "continue_without_critic", "checkpoint_policy": {"save_on_transition": True}, "event_retention_policy": {"keep_all": True}, "streaming_settings": {"enabled": True}, }, } for name, payload in configs.items(): (base_dir / "config" / name).write_text(json.dumps(payload), encoding="utf-8") def test_file_write_and_read_tool_flow(tmp_path: Path) -> None: _write_config_tree(tmp_path) controller = RuntimeController(base_dir=tmp_path) target = tmp_path / "notes" / "test.txt" write_result = controller.handle_task( UserTask( input="write a file", context={ "requested_tool": "file_write", "tool_args": {"path": str(target), "content": "hello from ducklm"}, }, ) ) assert write_result["status"] == "completed" assert target.read_text(encoding="utf-8") == "hello from ducklm" read_result = controller.handle_task( UserTask( input="read the file", context={ "requested_tool": "file_read", "tool_args": {"path": str(target)}, }, ) ) assert read_result["status"] == "completed" assert read_result["result"]["output"] == "hello from ducklm" def test_shell_exec_requires_permission_for_dangerous_command(tmp_path: Path) -> None: _write_config_tree(tmp_path) controller = RuntimeController(base_dir=tmp_path) result = controller.handle_task( UserTask( input="run dangerous shell command", context={ "requested_tool": "shell_exec", "tool_args": {"command": "rm -rf /tmp/nonexistent"}, }, ) ) # rm -rf /tmp/nonexistent is not hard_stop (only exact "rm -rf /" is) # but it matches "rm -rf *" in no_always category assert result["status"] == "awaiting_permission" assert "permission_request" in result["result"] def test_shell_exec_allows_safe_command(tmp_path: Path) -> None: _write_config_tree(tmp_path) controller = RuntimeController(base_dir=tmp_path) result = controller.handle_task( UserTask( input="run safe shell command", context={ "requested_tool": "shell_exec", "tool_args": {"command": "pwd"}, }, ) ) # Even safe commands require permission in the new permission model assert result["status"] == "awaiting_permission" assert "permission_request" in result["result"] # Grant permission and verify execution resumed = controller.resolve_permission(task_id=result["task_id"], decision="allow_once") assert resumed["status"] == "completed" assert str(tmp_path) in resumed["result"]["output"] def test_shell_exec_publishes_output_chunks_before_completion(tmp_path: Path) -> None: _write_config_tree(tmp_path) controller = RuntimeController(base_dir=tmp_path) perm_override = PermissionDecision( action_type="shell_command", pattern="printf", decision="allow_always", ) task = UserTask( input="stream shell output", context={ "requested_tool": "shell_exec", "tool_args": {"command": "printf 'first\\n'; sleep 0.1; printf 'second\\n'"}, }, ) result = controller.execution_engine.execute( task, ExecutionDirective( type="tool", payload={ "tool": "shell_exec", "args": {"command": "printf 'first\\n'; sleep 0.1; printf 'second\\n'"}, }, ), permission_override=perm_override, ) events = controller.event_bus.list_for_task(task.task_id) chunk_events = [event for event in events if event.type == TOOL_OUTPUT_CHUNK] completed_index = next(index for index, event in enumerate(events) if event.type == "tool_completed") first_chunk_index = next(index for index, event in enumerate(events) if event.type == TOOL_OUTPUT_CHUNK) assert result["status"] == "completed" assert [event.payload["chunk"] for event in chunk_events] == ["first\n", "second\n"] assert first_chunk_index < completed_index def test_streaming_shell_uses_idle_timeout_not_step_timeout(tmp_path: Path) -> None: sandbox = ToolSandbox( allowed_root=tmp_path, timeout_ms=100, command_timeout_ms=2000, idle_timeout_ms=500, ) chunks: list[str] = [] result = sandbox.run_shell( command="printf 'first\\n'; sleep 0.2; printf 'second\\n'", output_callback=lambda _stream, chunk: chunks.append(chunk), ) assert result.returncode == 0 assert result.stdout == "first\nsecond\n" assert chunks == ["first\n", "second\n"] def test_streaming_shell_timeout_kills_child_process_group(tmp_path: Path) -> None: marker = tmp_path / "child-survived" sandbox = ToolSandbox( allowed_root=tmp_path, timeout_ms=100, command_timeout_ms=100, idle_timeout_ms=1000, ) result = sandbox.run_shell( command=f"sh -c 'sleep 1; touch {marker}'", output_callback=lambda _stream, _chunk: None, ) assert result.returncode == -9 assert not marker.exists() class _RecoveryCritic: async def generate(self, prompt: str, max_tokens: int | None = None) -> str: return '{"action":"continue","reason":"No matches is acceptable information for this exploratory check."}' def test_failed_shell_step_can_recover_and_continue(tmp_path: Path) -> None: _write_config_tree(tmp_path) controller = RuntimeController(base_dir=tmp_path) controller.execution_engine.set_critic(_RecoveryCritic()) controller.execution_engine._recovery_limit = 1 # Bypass permission check for this test — we're testing recovery, not permissions from app.core.contracts import PermissionDecision perm_override = PermissionDecision( action_type="shell_command", pattern="grep", decision="allow_always", ) result = controller.execution_engine.execute( UserTask( input="run grep with no matches and recover", ), ExecutionDirective( type="plan", payload={ "steps": [ { "id": "1", "tool": "shell_exec", "args": {"command": "printf 'abc\\n' | grep definitely_missing"}, "depends_on": [], } ] }, ), permission_override=perm_override, ) assert result["status"] == "completed" failed_result = result["result"]["step_results"][0]["result"]["result"] assert failed_result["metadata"]["exit_code"] == 1 def test_privilege_scope_failure_awaits_user_review_before_replan(tmp_path: Path) -> None: _write_config_tree(tmp_path) controller = RuntimeController(base_dir=tmp_path) task = UserTask( input="обнови систему", context={ "requested_tool": "shell_exec", "tool_args": {"command": "sudo apt update && apt upgrade -y"}, }, ) class FailingShellTool: def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: return ToolResult( tool="shell_exec", ok=False, output="Error: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?", error="Command failed with exit code 100", metadata={"exit_code": 100}, ) controller.tool_registry._tools["shell_exec"] = FailingShellTool() initial = controller.handle_task(task) assert initial["status"] == "awaiting_permission" controller.resolve_permission(task_id=task.task_id, decision="allow_once") result = controller.resolve_secret(task_id=task.task_id, secret="secret") assert result["status"] == "awaiting_review" assert result["result"]["review"]["diagnosis"]["type"] == "privilege_scope_error" assert result["result"]["review"]["critic_assessment"]["classification"] == "model_planning_error" def test_plan_pauses_on_privilege_scope_review_instead_of_completing(tmp_path: Path) -> None: _write_config_tree(tmp_path) controller = RuntimeController(base_dir=tmp_path) class FailingShellTool: def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: return ToolResult( tool="shell_exec", ok=False, output="Error: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?", error="Command failed with exit code 100", metadata={"exit_code": 100}, ) controller.tool_registry._tools["shell_exec"] = FailingShellTool() result = controller.execution_engine.execute( UserTask(input="обнови систему"), ExecutionDirective( type="plan", payload={ "steps": [ { "id": "1", "tool": "shell_exec", "args": {"command": "sudo apt update && apt upgrade -y"}, "depends_on": [], } ] }, ), permission_override=PermissionDecision( action_type="shell_command", pattern="apt", decision="allow_once", ), secret_override="secret", ) assert result["status"] == "awaiting_review" assert result["result"]["review"]["diagnosis"]["type"] == "privilege_scope_error" def test_sudo_auth_failure_requests_secret_retry_not_review(tmp_path: Path) -> None: _write_config_tree(tmp_path) controller = RuntimeController(base_dir=tmp_path) class BadPasswordShellTool: def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: return ToolResult( tool="shell_exec", ok=False, output="Sorry, try again.\nsudo: no password was provided\nsudo: 1 incorrect password attempt\n", error="Command failed with exit code 1", metadata={"exit_code": 1, "sudo_auth_failed": True}, ) controller.tool_registry._tools["shell_exec"] = BadPasswordShellTool() result = controller.execution_engine.execute( UserTask(input="обнови систему"), ExecutionDirective( type="plan", payload={ "steps": [ { "id": "1", "tool": "shell_exec", "args": {"command": "sudo apt update && apt upgrade -y"}, "depends_on": [], } ] }, ), permission_override=PermissionDecision( action_type="shell_command", pattern="apt", decision="allow_once", ), secret_override="wrong", ) assert result["status"] == "awaiting_input" assert result["result"]["secret_request"]["kind"] == "sudo_password" assert result["result"]["secret_request"]["prompt"] == "Sudo password incorrect. Try again" assert result["result"]["attempt_failed"] is True def test_runtime_keeps_secret_state_after_bad_sudo_password(tmp_path: Path) -> None: _write_config_tree(tmp_path) controller = RuntimeController(base_dir=tmp_path) class RetryPasswordShellTool: calls = 0 def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: self.calls += 1 if self.calls == 1: return ToolResult( tool="shell_exec", ok=False, output="Sorry, try again.\nsudo: no password was provided\nsudo: 1 incorrect password attempt\n", error="Command failed with exit code 1", metadata={"exit_code": 1, "sudo_auth_failed": True}, ) return ToolResult( tool="shell_exec", ok=True, output="root\n", metadata={"exit_code": 0}, ) controller.tool_registry._tools["shell_exec"] = RetryPasswordShellTool() task = UserTask( input="кто root", context={ "requested_tool": "shell_exec", "tool_args": {"command": "sudo whoami"}, }, ) initial = controller.handle_task(task) assert initial["status"] == "awaiting_permission" allowed = controller.resolve_permission(task_id=task.task_id, decision="allow_once") assert allowed["status"] == "awaiting_input" retry = controller.resolve_secret(task_id=task.task_id, secret="wrong") assert retry["status"] == "awaiting_input" assert retry["result"]["attempt_failed"] is True final = controller.resolve_secret(task_id=task.task_id, secret="correct") assert final["status"] == "completed" assert final["result"]["output"] == "root\n" def test_permission_resolution_can_resume_task(tmp_path: Path) -> None: _write_config_tree(tmp_path) controller = RuntimeController(base_dir=tmp_path) initial = controller.handle_task( UserTask( input="запусти sudo apt update", ) ) assert initial["status"] == "awaiting_permission" resumed = controller.resolve_permission(task_id=initial["task_id"], decision="deny") assert resumed["status"] == "failed" assert resumed["result"]["error"] == "Permission denied by user." def test_sudo_permission_resolution_requests_secret_input(tmp_path: Path) -> None: _write_config_tree(tmp_path) controller = RuntimeController(base_dir=tmp_path) initial = controller.handle_task(UserTask(input="запусти sudo apt update")) assert initial["status"] == "awaiting_permission" resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once") assert resumed["status"] == "awaiting_input" assert resumed["result"]["secret_request"]["kind"] == "sudo_password" def test_implicit_sudo_command_requests_password(tmp_path: Path) -> None: """Commands like 'apt list --upgradable' that require sudo but don't start with 'sudo' should also trigger password request after permission is granted.""" _write_config_tree(tmp_path) controller = RuntimeController(base_dir=tmp_path) # apt list --upgradable requires root but doesn't start with 'sudo' initial = controller.handle_task( UserTask( input="проверь обновления", context={ "requested_tool": "shell_exec", "tool_args": {"command": "apt list --upgradable"}, }, ) ) assert initial["status"] == "awaiting_permission" # Grant permission — should request sudo password since apt requires root resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once") assert resumed["status"] == "awaiting_input" assert resumed["result"]["secret_request"]["kind"] == "sudo_password" def test_secret_resolution_continues_after_pending_secret_saved(tmp_path: Path) -> None: _write_config_tree(tmp_path) controller = RuntimeController(base_dir=tmp_path) initial = controller.handle_task(UserTask(input="запусти sudo apt update")) assert initial["status"] == "awaiting_permission" resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once") assert resumed["status"] == "awaiting_input" final = controller.resolve_secret(task_id=initial["task_id"], secret="wrongpass") assert final["status"] in {"completed", "failed", "awaiting_input"} assert "error" in final["result"] or "output" in final["result"]