from __future__ import annotations import asyncio import json import logging from typing import Any from app.core.contracts import ( CriticScore, ExecutionDirective, PermissionDecision, PermissionRequest, RuntimeEvent, SecretRequest, ToolCall, UserTask, ) from app.core.execution_scheduler import ExecutionScheduler from app.events.event_bus import EventBus from app.events.event_types import ( CRITIC_CALLED, CRITIC_RESULT, PERMISSION_REQUESTED, PERMISSION_RESOLVED, PLAN_FAILED, PLAN_STARTED, SECRET_REQUESTED, STEP_STARTED, STEPPED_COMPLETED, TOOL_CALLED, TOOL_COMPLETED, ) from app.models.async_adapters import AsyncCriticAdapter, AsyncCoderAdapter from app.memory.write_policy import MemoryWritePolicy from app.memory.interface import MemoryInterface logger = logging.getLogger(__name__) class ExecutionEngine: def __init__( self, event_bus: EventBus, tool_registry, permission_service, scheduler: ExecutionScheduler | None = None, critic: AsyncCriticAdapter | None = None, memory_policy: MemoryWritePolicy | None = None, memory_interface: MemoryInterface | None = None, prompts: dict[str, str] | None = None, recovery_limit: int = 1, ) -> None: self._event_bus = event_bus self._tool_registry = tool_registry self._permission_service = permission_service self._scheduler = scheduler or ExecutionScheduler() self._critic = critic self._coder: AsyncCoderAdapter | None = None self._memory_policy = memory_policy self._memory_interface = memory_interface self._prompts = prompts or {} self._recovery_limit = recovery_limit def set_critic(self, critic: AsyncCriticAdapter) -> None: self._critic = critic def set_coder(self, coder: AsyncCoderAdapter) -> None: self._coder = coder def set_memory_policy(self, policy: MemoryWritePolicy) -> None: self._memory_policy = policy def execute( self, task: UserTask, directive: ExecutionDirective, permission_override: PermissionDecision | None = None, secret_override: str | None = None, password_override: str | None = None, ) -> dict[str, Any]: scheduled = self._scheduler.next_directive(directive) self._publish(task, STEP_STARTED, {"directive_type": scheduled.type}) if scheduled.type == "plan": return self._execute_plan( task=task, directive=scheduled, permission_override=permission_override, secret_override=secret_override, password_override=password_override, ) if scheduled.type == "tool": return self._execute_tool( task=task, directive=scheduled, permission_override=permission_override, secret_override=secret_override, password_override=password_override, ) if scheduled.type == "respond": return { "status": "completed", "result": { "message": f"Runtime accepted task: {task.input}", "mode": scheduled.payload.get("mode", "direct_response"), }, } if scheduled.type == "coder": return self._execute_coder( task=task, directive=scheduled, ) if scheduled.type == "fail": return { "status": "failed", "result": {"error": scheduled.reason or "Execution failed."}, } return { "status": "completed", "result": { "message": "Directive accepted.", "directive_type": scheduled.type, }, } def _execute_plan( self, task: UserTask, directive: ExecutionDirective, permission_override: PermissionDecision | None = None, secret_override: str | None = None, password_override: str | None = None, ) -> dict[str, Any]: # Unified format: {"type": "plan", "payload": {"steps": [...]}} # Need to extract steps from nested payload import json payload = directive.payload steps_data = [] # If payload has "steps" directly, use them if "steps" in payload: steps_data = payload.get("steps", []) # If payload is a string (JSON), parse it elif isinstance(payload, str) and payload.strip().startswith("{"): try: parsed = json.loads(payload) steps_data = parsed.get("payload", {}).get("steps", []) except: steps_data = [] if steps_data: plan_json = json.dumps({"type": "plan", "payload": {"steps": steps_data}}) else: plan_json = json.dumps(payload) plan_steps = self._scheduler.parse_plan_steps(plan_json, task.task_id) if not plan_steps: return { "status": "failed", "result": {"error": "Failed to parse plan steps from directive"}, } if not self._scheduler.validate_no_cycles(plan_steps): self._publish(task, PLAN_FAILED, {"error": "Cycle detected in plan"}) return { "status": "failed", "result": {"error": "Cycle detected in plan"}, } graph = self._scheduler.build_task_graph(plan_steps) self._publish(task, PLAN_STARTED, {"steps": len(plan_steps)}) completed_steps: set[str] = set() step_results: list[dict[str, Any]] = [] ready_steps = self._get_ready_steps(graph, completed_steps) while ready_steps: step = ready_steps.pop(0) # Handle respond kind directly without tool execution if step.kind == "respond": result = { "status": "completed", "result": { "message": step.args.get("text", step.description), }, } else: step_directive = ExecutionDirective( type=step.kind, payload={ "tool": step.tool, "args": step.args, }, requires_permission=step.requires_confirmation, reason=step.description, ) result = self._execute_tool( task=task, directive=step_directive, permission_override=permission_override, secret_override=secret_override, password_override=password_override, ) # If tool needs permission - return immediately, don't continue execution if result.get("status") == "awaiting_permission": return { "status": "awaiting_permission", "result": result.get("result", {}), "step_results": step_results, } step_results.append({ "step_id": step.id, "result": result, }) completed_steps.add(step.id) self._publish(task, STEPPED_COMPLETED, { "step_id": step.id, "status": result.get("status"), }) if result.get("status") == "failed": recovery = self._recover_failed_step( task=task, step=step, result=result, step_results=step_results, permission_override=permission_override, secret_override=secret_override, password_override=password_override, ) if recovery.get("status") == "awaiting_permission": return recovery if recovery.get("status") == "completed": recovered_result = recovery.get("result") if recovered_result: step_results[-1]["result"] = recovered_result if recovery.get("finish"): return { "status": "completed", "result": { "message": recovery.get("message", "Recovered from failed step"), "step_results": step_results, }, } else: return { "status": "failed", "result": { "error": f"Step {step.id} failed", "failed_step": step.id, "step_results": step_results, "recovery": recovery.get("result"), }, } requires_execution = directive.payload.get("requires_execution", True) if requires_execution and self._critic: critic_result = self._evaluate_with_critic( task, step, result ) if critic_result: # Convert to dict for JSON serialization result["critic_score"] = critic_result.model_dump(mode="json") if hasattr(critic_result, 'model_dump') else dict(critic_result) self._save_critique_to_memory(task, step, critic_result) ready_steps = self._get_ready_steps(graph, completed_steps) return { "status": "completed", "result": { "message": f"Plan executed: {len(completed_steps)} steps completed", "step_results": step_results, }, } def _recover_failed_step( self, task: UserTask, step, result: dict[str, Any], step_results: list[dict[str, Any]], permission_override: PermissionDecision | None = None, secret_override: str | None = None, password_override: str | None = None, ) -> dict[str, Any]: if self._recovery_limit <= 0 or not self._critic: return {"status": "failed", "result": {"reason": "recovery_unavailable"}} decision = self._evaluate_recovery(task, step, result, step_results) action = decision.get("action", "fail") if action == "continue": recovered = dict(result) recovered["status"] = "completed" recovered["recovery_decision"] = decision return {"status": "completed", "result": recovered} if action == "respond": recovered = dict(result) recovered["status"] = "completed" recovered["recovery_decision"] = decision return { "status": "completed", "result": recovered, "finish": True, "message": decision.get("message") or decision.get("reason") or "Recovered by responding to user", } if action == "retry": retry_tool = decision.get("tool") or step.tool retry_args = decision.get("args") or step.args retry_result = self._execute_tool( task=task, directive=ExecutionDirective( type="tool", payload={"tool": retry_tool, "args": retry_args}, requires_permission=True, reason=decision.get("reason", "Recovery retry"), ), permission_override=permission_override, secret_override=secret_override, password_override=password_override, ) if retry_result.get("status") == "awaiting_permission": return retry_result retry_result["recovery_decision"] = decision if retry_result.get("status") == "completed": return {"status": "completed", "result": retry_result} return {"status": "failed", "result": {"decision": decision, "retry_result": retry_result}} return {"status": "failed", "result": decision} def _evaluate_recovery( self, task: UserTask, step, result: dict[str, Any], step_results: list[dict[str, Any]], ) -> dict[str, Any]: prompt = self._build_recovery_prompt(task, step, result, step_results) self._publish(task, CRITIC_CALLED, {"step_id": step.id, "mode": "recovery"}) try: output = asyncio.run(self._critic.generate(prompt, max_tokens=512)) decision = self._parse_recovery_decision(output) self._publish(task, CRITIC_RESULT, { "step_id": step.id, "mode": "recovery", "decision": decision, "raw": output, }) return decision except Exception as e: logger.warning(f"Recovery evaluation failed: {e}") self._publish(task, CRITIC_RESULT, { "step_id": step.id, "mode": "recovery", "error": str(e), }) return {"action": "fail", "reason": str(e)} def _build_recovery_prompt( self, task: UserTask, step, result: dict[str, Any], step_results: list[dict[str, Any]], ) -> str: return f"""You are a recovery controller for an agent runtime. Decide what to do after a failed tool step. A non-zero exit code is not always fatal. Interpret the failure in context. Allowed actions: - continue: failure is acceptable information; continue the plan. - retry: try one alternative tool call. Include "tool" and "args". - respond: stop and answer the user with available information. Include "message". - fail: real failure; stop the task. Return ONLY JSON: {{"action":"continue|retry|respond|fail","reason":"...","tool":"shell_exec","args":{{...}},"message":"..."}} Task: {task.input} Failed step: id={step.id} tool={step.tool} args={json.dumps(step.args, ensure_ascii=False)} description={step.description} Failed result: {json.dumps(result, ensure_ascii=False, indent=2)} Previous step results: {json.dumps(step_results, ensure_ascii=False, indent=2)} """ def _parse_recovery_decision(self, output: str) -> dict[str, Any]: try: json_start = output.find("{") json_end = output.rfind("}") + 1 if json_start < 0 or json_end <= 0: return {"action": "fail", "reason": "Recovery output was not JSON"} data = json.loads(output[json_start:json_end]) action = data.get("action", "fail") if action not in {"continue", "retry", "respond", "fail"}: action = "fail" data["action"] = action return data except (json.JSONDecodeError, TypeError, ValueError) as e: return {"action": "fail", "reason": f"Recovery JSON parse failed: {e}"} def _get_ready_steps( self, graph: dict[str, Any], completed: set[str], ) -> list: if not graph or not graph.get("nodes"): return [] step_map: dict = graph.get("step_map", {}) ready = [] for node in graph["nodes"]: node_id = node["id"] if node_id in completed: continue deps = node.get("depends_on", []) if all(dep in completed for dep in deps): step = step_map.get(node_id) if step: ready.append(step) return ready def _evaluate_with_critic( self, task: UserTask, step, result: dict[str, Any], ) -> CriticScore | None: if not self._critic: return None critic_prompt = self._build_critic_prompt(step, result) self._publish(task, CRITIC_CALLED, {"step_id": step.id}) try: critic_output = asyncio.run(self._critic.generate(critic_prompt)) score = self._parse_critic_score(critic_output) self._publish(task, CRITIC_RESULT, { "step_id": step.id, "score": score.model_dump(mode="json") if score else None, }) if score: result["critic_score"] = { "correctness": score.correctness, "usefulness": score.usefulness, "safety": score.safety, "memory_store": score.memory_store, "weight": score.weight, "explanation": score.explanation, } return score except Exception as e: logger.warning(f"Critic evaluation failed: {e}") self._publish(task, CRITIC_RESULT, { "step_id": step.id, "error": str(e), }) return None def _save_critique_to_memory( self, task: UserTask, step, score: CriticScore, ) -> None: """Save critic evaluation as critique entry in memory.""" if not self._memory_interface: return try: tool_name = step.tool tool_args = step.args or {} args_str = ", ".join([f"{k}={v}" for k, v in tool_args.items()]) critique_text = f"Tool: {tool_name}({args_str}) | Task: {task.input[:100]} | Scores: correctness={score.correctness}, usefulness={score.usefulness}, safety={score.safety} | {score.explanation}" metadata = { "task_input": task.input, "tool": tool_name, "args": tool_args, "step_id": step.id, "scores": { "correctness": score.correctness, "usefulness": score.usefulness, "safety": score.safety, }, } self._memory_interface.insert( text=critique_text, kind="critique", source="critic", task_id=task.task_id, session_id=task.session_id, weight=score.weight, metadata=metadata, ) logger.info(f"Saved critique to memory: {tool_name} task_id={task.task_id}") except Exception as e: logger.warning(f"Failed to save critique to memory: {e}") def _build_critic_prompt(self, step, result: dict[str, Any]) -> str: base_prompt = self._prompts.get("critic", "") tool_result = result.get("result", {}) return f"""{base_prompt} Step: {step.description} Tool: {step.tool} Args: {step.args} Result: {json.dumps(tool_result, indent=2)} Evaluate and respond with JSON: {{"correctness": 0.0-1.0, "usefulness": 0.0-1.0, "safety": 0.0-1.0, "memory_store": true|false, "weight": 0.0-1.0, "explanation": "..."}}""" def _parse_critic_score(self, output: str) -> CriticScore | None: try: json_start = output.find("{") json_end = output.rfind("}") + 1 if json_start < 0: return None json_str = output[json_start:json_end] data = json.loads(json_str) return CriticScore( correctness=data.get("correctness", 0.5), usefulness=data.get("usefulness", 0.5), safety=data.get("safety", 1.0), memory_store=data.get("memory_store", False), weight=data.get("weight", 0.5), explanation=data.get("explanation", ""), ) except (json.JSONDecodeError, ValueError, TypeError) as e: logger.warning(f"Critic score parsing failed: {e}") return None def _execute_coder( self, task: UserTask, directive: ExecutionDirective, ) -> dict[str, Any]: if not self._coder: return {"status": "failed", "result": {"error": "Coder model not available"}} coder_task = directive.payload.get("task", "") if not coder_task: return {"status": "failed", "result": {"error": "Missing task for coder"}} try: output = asyncio.run(self._coder.generate(coder_task)) return { "status": "completed", "result": {"code": output}, } except Exception as e: logger.warning(f"Coder execution failed: {e}") return {"status": "failed", "result": {"error": str(e)}} def _execute_tool( self, task: UserTask, directive: ExecutionDirective, permission_override: PermissionDecision | None = None, secret_override: str | None = None, password_override: str | None = None, ) -> dict[str, Any]: tool_name = str(directive.payload.get("tool", "")).strip() tool_args = dict(directive.payload.get("args", {})) if password_override: tool_args["password"] = password_override if not tool_name: return {"status": "failed", "result": {"error": "Missing tool name"}} # Tool-first: validate tool exists in registry available_tools = self._tool_registry.list_names() if tool_name not in available_tools: return {"status": "failed", "result": {"error": f"Unknown tool: {tool_name}. Available tools: {available_tools}"}} permission_result = None # Check permission for shell_exec and file_write if tool_name == "shell_exec": permission_result = self._permission_service.check_shell_command( task_id=task.task_id, session_id=task.session_id, command=str(tool_args.get("command", "")), ) elif tool_name == "file_write": # Allow writing to runtime data directory without permission check write_path = str(tool_args.get("path", "")) if "allowed_commands.json" in write_path or "/data/runtime" in write_path: # Internal system write - allow without permission permission_result = {"decision": "allowed", "path": write_path} else: permission_result = self._permission_service.check_write_path( task_id=task.task_id, session_id=task.session_id, path=write_path, ) # Handle permission result if permission_result: decision = permission_result.get("decision", "unknown") # Hard stop - deny execution if decision == "hard_stop": self._publish(task, PERMISSION_REQUESTED, permission_result) return { "status": "failed", "result": { "error": f"Command blocked: {permission_result.get('reason', 'Hard stop command')}", "command": permission_result.get("command", ""), }, } # Cached - already allowed if decision in ("allowed_always", "allowed") or permission_result.get("cached"): self._publish(task, PERMISSION_RESOLVED, permission_result) # Need user confirmation - return immediately, don't continue execution elif decision == "prompt": self._publish(task, PERMISSION_REQUESTED, permission_result) return { "status": "awaiting_permission", "result": { "error": "Permission required before execution.", "permission_request": permission_result, }, } # Hard stop - return immediately elif decision == "deny": self._publish(task, PERMISSION_RESOLVED, permission_result) return { "status": "failed", "result": { "error": "Permission denied", "command": permission_result.get("command", ""), }, } # Deny elif decision == "deny": self._publish(task, PERMISSION_RESOLVED, permission_result) return { "status": "failed", "result": { "error": "Permission denied", "command": permission_result.get("command", ""), }, } if tool_name == "shell_exec": command = str(tool_args.get("command", "")) if command.startswith("sudo ") and secret_override is None: secret_request = SecretRequest( task_id=task.task_id, session_id=task.session_id, kind="sudo_password", prompt="Sudo password required", command=command, ) self._publish(task, SECRET_REQUESTED, secret_request.model_dump(mode="json")) return { "status": "awaiting_input", "result": { "error": "Secret required", "secret_request": secret_request.model_dump(mode="json"), }, } if command.startswith("sudo ") and secret_override is not None: tool_args["command"] = f"sudo -S -p '' {command[len('sudo '):]}" tool_args["stdin_secret"] = f"{secret_override}\n" tool_call = ToolCall( tool=tool_name, args=tool_args, task_id=task.task_id, step_id="step-1", ) self._publish(task, TOOL_CALLED, tool_call.model_dump(mode="json")) tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args) self._publish(task, TOOL_COMPLETED, tool_result.model_dump(mode="json")) needs_sudo = tool_result.metadata.get("needs_sudo", False) if tool_result.metadata else False if not tool_result.ok and needs_sudo: return { "status": "awaiting_password", "result": { "task_id": task.task_id, "needs_sudo": True, "command": tool_args.get("command", ""), "error": tool_result.error or "Permission denied", "tool_result": tool_result.model_dump(mode="json"), }, } return { "status": "completed" if tool_result.ok else "failed", "result": tool_result.model_dump(mode="json"), } def _publish(self, task: UserTask, event_type: str, payload: dict[str, Any]) -> None: if not self._event_bus: return event = RuntimeEvent( task_id=task.task_id, session_id=task.session_id, sequence=self._event_bus.next_sequence(task.task_id), type=event_type, payload=payload, ) self._event_bus.publish(event)