ducklm/app/core/execution_engine.py

976 lines
37 KiB
Python

from __future__ import annotations
import asyncio
import json
import logging
from typing import Any
from app.core.contracts import (
CriticScore,
ExecutionDirective,
PermissionDecision,
PermissionRequest,
RuntimeEvent,
SecretRequest,
ToolCall,
ToolResult,
UserTask,
)
from app.core.command_analyzer import CommandAnalyzer
from app.core.execution_scheduler import ExecutionScheduler
from app.events.event_bus import EventBus
from app.events.event_types import (
CRITIC_CALLED,
CRITIC_RESULT,
PERMISSION_REQUESTED,
PERMISSION_RESOLVED,
PLAN_FAILED,
PLAN_STARTED,
SECRET_REQUESTED,
STEP_STARTED,
STEPPED_COMPLETED,
TOOL_CALLED,
TOOL_COMPLETED,
TOOL_OUTPUT_CHUNK,
)
from app.models.async_adapters import AsyncCriticAdapter, AsyncCoderAdapter
from app.memory.write_policy import MemoryWritePolicy
from app.memory.interface import MemoryInterface
logger = logging.getLogger(__name__)
class ExecutionEngine:
def __init__(
self,
event_bus: EventBus,
tool_registry,
permission_service,
scheduler: ExecutionScheduler | None = None,
critic: AsyncCriticAdapter | None = None,
memory_policy: MemoryWritePolicy | None = None,
memory_interface: MemoryInterface | None = None,
prompts: dict[str, str] | None = None,
recovery_limit: int = 1,
critic_retry_limit: int = 2,
command_analyzer: CommandAnalyzer | None = None,
) -> None:
self._event_bus = event_bus
self._tool_registry = tool_registry
self._permission_service = permission_service
self._scheduler = scheduler or ExecutionScheduler()
self._critic = critic
self._coder: AsyncCoderAdapter | None = None
self._memory_policy = memory_policy
self._memory_interface = memory_interface
self._prompts = prompts or {}
self._recovery_limit = recovery_limit
self._critic_retry_limit = critic_retry_limit
self._command_analyzer = command_analyzer
def set_critic(self, critic: AsyncCriticAdapter) -> None:
self._critic = critic
def set_coder(self, coder: AsyncCoderAdapter) -> None:
self._coder = coder
def set_memory_policy(self, policy: MemoryWritePolicy) -> None:
self._memory_policy = policy
def execute(
self,
task: UserTask,
directive: ExecutionDirective,
permission_override: PermissionDecision | None = None,
secret_override: str | None = None,
password_override: str | None = None,
) -> dict[str, Any]:
scheduled = self._scheduler.next_directive(directive)
self._publish(task, STEP_STARTED, {"directive_type": scheduled.type})
if scheduled.type == "plan":
return self._execute_plan(
task=task,
directive=scheduled,
permission_override=permission_override,
secret_override=secret_override,
password_override=password_override,
)
if scheduled.type == "tool":
return self._execute_tool(
task=task,
directive=scheduled,
permission_override=permission_override,
secret_override=secret_override,
password_override=password_override,
)
if scheduled.type == "respond":
return {
"status": "completed",
"result": {
"message": scheduled.payload.get("text", f"Runtime accepted task: {task.input}"),
"mode": scheduled.payload.get("mode", "direct_response"),
},
"directive": scheduled.model_dump(mode="json"),
}
if scheduled.type == "coder":
return self._execute_coder(
task=task,
directive=scheduled,
)
if scheduled.type == "fail":
return {
"status": "failed",
"result": {"error": scheduled.reason or "Execution failed."},
}
return {
"status": "completed",
"result": {
"message": "Directive accepted.",
"directive_type": scheduled.type,
},
}
def _execute_plan(
self,
task: UserTask,
directive: ExecutionDirective,
permission_override: PermissionDecision | None = None,
secret_override: str | None = None,
password_override: str | None = None,
) -> dict[str, Any]:
# Unified format: {"type": "plan", "payload": {"steps": [...]}}
# Need to extract steps from nested payload
import json
payload = directive.payload
steps_data = []
# If payload has "steps" directly, use them
if "steps" in payload:
steps_data = payload.get("steps", [])
# If payload is a string (JSON), parse it
elif isinstance(payload, str) and payload.strip().startswith("{"):
try:
parsed = json.loads(payload)
steps_data = parsed.get("payload", {}).get("steps", [])
except:
steps_data = []
if steps_data:
plan_json = json.dumps({"type": "plan", "payload": {"steps": steps_data}})
else:
plan_json = json.dumps(payload)
plan_steps = self._scheduler.parse_plan_steps(plan_json, task.task_id)
if not plan_steps:
return {
"status": "failed",
"result": {"error": "Failed to parse plan steps from directive"},
}
if not self._scheduler.validate_no_cycles(plan_steps):
self._publish(task, PLAN_FAILED, {"error": "Cycle detected in plan"})
return {
"status": "failed",
"result": {"error": "Cycle detected in plan"},
}
graph = self._scheduler.build_task_graph(plan_steps)
self._publish(task, PLAN_STARTED, {"steps": len(plan_steps)})
completed_steps: set[str] = set()
step_results: list[dict[str, Any]] = []
critic_retries_used = 0 # Track critic→replan cycles
ready_steps = self._get_ready_steps(graph, completed_steps)
while ready_steps:
step = ready_steps.pop(0)
# Handle respond kind directly without tool execution
if step.kind == "respond":
result = {
"status": "completed",
"result": {
"message": step.args.get("text", step.description),
},
}
else:
step_directive = ExecutionDirective(
type=step.kind,
payload={
"tool": step.tool,
"args": step.args,
},
requires_permission=step.requires_confirmation,
reason=step.description,
)
result = self._execute_tool(
task=task,
directive=step_directive,
permission_override=permission_override,
secret_override=secret_override,
password_override=password_override,
)
# If tool needs human input/review - return immediately.
if result.get("status") in (
"awaiting_permission",
"awaiting_input",
"awaiting_password",
"awaiting_review",
):
return {
"status": result.get("status"),
"result": result.get("result", {}),
"step_results": step_results,
}
step_results.append({
"step_id": step.id,
"result": result,
})
completed_steps.add(step.id)
self._publish(task, STEPPED_COMPLETED, {
"step_id": step.id,
"status": result.get("status"),
})
# === Critic evaluation ===
if self._critic and result.get("status") == "completed":
critic_score = self._evaluate_with_critic(task, step, result)
if critic_score:
result["critic_score"] = {
"correctness": critic_score.correctness,
"usefulness": critic_score.usefulness,
"safety": critic_score.safety,
"memory_store": critic_score.memory_store,
"weight": critic_score.weight,
"explanation": critic_score.explanation,
}
self._save_critique_to_memory(task, step, critic_score)
# Check if step result is satisfactory
min_correctness = 0.5
if critic_score.correctness < min_correctness:
# Step failed critic check — try to recover
if critic_retries_used < self._critic_retry_limit and step.kind != "respond":
critic_retries_used += 1
self._publish(task, CRITIC_RESULT, {
"step_id": step.id,
"score": critic_score.model_dump(mode="json"),
"action": "retry",
"retry": critic_retries_used,
})
# Retry the same step — rebuild directive
retry_directive = ExecutionDirective(
type=step.kind,
payload={"tool": step.tool, "args": step.args},
requires_permission=step.requires_confirmation,
reason=step.description,
)
retry_result = self._execute_tool(
task=task,
directive=retry_directive,
permission_override=permission_override,
secret_override=secret_override,
password_override=password_override,
)
if retry_result.get("status") == "completed":
result = retry_result
step_results[-1]["result"] = result
# Re-evaluate after retry
critic_score2 = self._evaluate_with_critic(task, step, result)
if critic_score2 and critic_score2.correctness >= min_correctness:
# Retry succeeded
continue
# If retry also failed, continue to next step
else:
self._publish(task, CRITIC_RESULT, {
"step_id": step.id,
"score": critic_score.model_dump(mode="json"),
"action": "give_up",
"reason": f"Critic retry limit ({self._critic_retry_limit}) reached",
})
# Handle failed step
if result.get("status") == "failed":
review = self._build_failed_step_review(task, step, result)
if review:
return {
"status": "awaiting_review",
"result": {
"error": f"Step {step.id} requires review before replanning",
"failed_step": step.id,
"step_results": step_results,
"review": review,
},
}
recovery = self._recover_failed_step(
task=task,
step=step,
result=result,
step_results=step_results,
permission_override=permission_override,
secret_override=secret_override,
password_override=password_override,
)
if recovery.get("status") == "awaiting_permission":
return recovery
if recovery.get("status") == "completed":
recovered_result = recovery.get("result")
if recovered_result:
step_results[-1]["result"] = recovered_result
if recovery.get("finish"):
return {
"status": "completed",
"result": {
"message": recovery.get("message", "Recovered from failed step"),
"step_results": step_results,
},
}
else:
return {
"status": "failed",
"result": {
"error": f"Step {step.id} failed",
"failed_step": step.id,
"step_results": step_results,
"recovery": recovery.get("result"),
},
}
ready_steps = self._get_ready_steps(graph, completed_steps)
return {
"status": "completed",
"result": {
"message": f"Plan executed: {len(completed_steps)} steps completed",
"step_results": step_results,
},
}
def _build_failed_step_review(self, task: UserTask, step, result: dict[str, Any]) -> dict[str, Any] | None:
if step.tool != "shell_exec" or not self._command_analyzer:
return None
command = str((step.args or {}).get("command", ""))
if not command:
return None
diagnosis = self._command_analyzer.analyze(
command=command,
task_id=task.task_id,
session_id=task.session_id,
)
if diagnosis.get("type") == "ok":
return None
return {
"step_id": step.id,
"tool": step.tool,
"command": command,
"diagnosis": diagnosis,
"critic_assessment": {
"classification": "model_planning_error",
"needs_replan": True,
"explanation": "Structured command analysis found a model action error before recovery.",
},
}
def _recover_failed_step(
self,
task: UserTask,
step,
result: dict[str, Any],
step_results: list[dict[str, Any]],
permission_override: PermissionDecision | None = None,
secret_override: str | None = None,
password_override: str | None = None,
) -> dict[str, Any]:
if self._recovery_limit <= 0 or not self._critic:
return {"status": "failed", "result": {"reason": "recovery_unavailable"}}
decision = self._evaluate_recovery(task, step, result, step_results)
action = decision.get("action", "fail")
if action == "continue":
recovered = dict(result)
recovered["status"] = "completed"
recovered["recovery_decision"] = decision
return {"status": "completed", "result": recovered}
if action == "respond":
recovered = dict(result)
recovered["status"] = "completed"
recovered["recovery_decision"] = decision
return {
"status": "completed",
"result": recovered,
"finish": True,
"message": decision.get("message") or decision.get("reason") or "Recovered by responding to user",
}
if action == "retry":
retry_tool = decision.get("tool") or step.tool
retry_args = decision.get("args") or step.args
retry_result = self._execute_tool(
task=task,
directive=ExecutionDirective(
type="tool",
payload={"tool": retry_tool, "args": retry_args},
requires_permission=True,
reason=decision.get("reason", "Recovery retry"),
),
permission_override=permission_override,
secret_override=secret_override,
password_override=password_override,
)
if retry_result.get("status") == "awaiting_permission":
return retry_result
retry_result["recovery_decision"] = decision
if retry_result.get("status") == "completed":
return {"status": "completed", "result": retry_result}
return {"status": "failed", "result": {"decision": decision, "retry_result": retry_result}}
return {"status": "failed", "result": decision}
def _evaluate_recovery(
self,
task: UserTask,
step,
result: dict[str, Any],
step_results: list[dict[str, Any]],
) -> dict[str, Any]:
prompt = self._build_recovery_prompt(task, step, result, step_results)
self._publish(task, CRITIC_CALLED, {"step_id": step.id, "mode": "recovery"})
try:
output = asyncio.run(self._critic.generate(prompt, max_tokens=512))
decision = self._parse_recovery_decision(output)
self._publish(task, CRITIC_RESULT, {
"step_id": step.id,
"mode": "recovery",
"decision": decision,
"raw": output,
})
return decision
except Exception as e:
logger.warning(f"Recovery evaluation failed: {e}")
self._publish(task, CRITIC_RESULT, {
"step_id": step.id,
"mode": "recovery",
"error": str(e),
})
return {"action": "fail", "reason": str(e)}
def _build_recovery_prompt(
self,
task: UserTask,
step,
result: dict[str, Any],
step_results: list[dict[str, Any]],
) -> str:
return f"""You are a recovery controller for an agent runtime.
Decide what to do after a failed tool step. A non-zero exit code is not always fatal.
Interpret the failure in context.
Allowed actions:
- continue: failure is acceptable information; continue the plan.
- retry: try one alternative tool call. Include "tool" and "args".
- respond: stop and answer the user with available information. Include "message".
- fail: real failure; stop the task.
Return ONLY JSON:
{{"action":"continue|retry|respond|fail","reason":"...","tool":"shell_exec","args":{{...}},"message":"..."}}
Task:
{task.input}
Failed step:
id={step.id}
tool={step.tool}
args={json.dumps(step.args, ensure_ascii=False)}
description={step.description}
Failed result:
{json.dumps(result, ensure_ascii=False, indent=2)}
Previous step results:
{json.dumps(step_results, ensure_ascii=False, indent=2)}
"""
def _parse_recovery_decision(self, output: str) -> dict[str, Any]:
try:
json_start = output.find("{")
json_end = output.rfind("}") + 1
if json_start < 0 or json_end <= 0:
return {"action": "fail", "reason": "Recovery output was not JSON"}
data = json.loads(output[json_start:json_end])
action = data.get("action", "fail")
if action not in {"continue", "retry", "respond", "fail"}:
action = "fail"
data["action"] = action
return data
except (json.JSONDecodeError, TypeError, ValueError) as e:
return {"action": "fail", "reason": f"Recovery JSON parse failed: {e}"}
def _get_ready_steps(
self,
graph: dict[str, Any],
completed: set[str],
) -> list:
if not graph or not graph.get("nodes"):
return []
step_map: dict = graph.get("step_map", {})
ready = []
for node in graph["nodes"]:
node_id = node["id"]
if node_id in completed:
continue
deps = node.get("depends_on", [])
if all(dep in completed for dep in deps):
step = step_map.get(node_id)
if step:
ready.append(step)
return ready
def _evaluate_with_critic(
self,
task: UserTask,
step,
result: dict[str, Any],
) -> CriticScore | None:
if not self._critic:
return None
critic_prompt = self._build_critic_prompt(step, result)
self._publish(task, CRITIC_CALLED, {"step_id": step.id})
try:
critic_output = asyncio.run(self._critic.generate(critic_prompt))
score = self._parse_critic_score(critic_output)
self._publish(task, CRITIC_RESULT, {
"step_id": step.id,
"score": score.model_dump(mode="json") if score else None,
})
if score:
result["critic_score"] = {
"correctness": score.correctness,
"usefulness": score.usefulness,
"safety": score.safety,
"memory_store": score.memory_store,
"weight": score.weight,
"explanation": score.explanation,
}
return score
except Exception as e:
logger.warning(f"Critic evaluation failed: {e}")
self._publish(task, CRITIC_RESULT, {
"step_id": step.id,
"error": str(e),
})
return None
def _save_critique_to_memory(
self,
task: UserTask,
step,
score: CriticScore,
) -> None:
"""Save critic evaluation as critique entry in memory, using MemoryWritePolicy."""
if not self._memory_interface:
return
try:
# Check with policy before saving
if self._memory_policy:
decision = self._memory_policy.decide(
critic_score=score,
memory_type="critique",
session_id=task.session_id,
)
if decision == "skip":
logger.info(f"MemoryWritePolicy skipped critique for {step.tool}")
return
# For "store_with_weight", we could adjust weight, but critic score already has weight
tool_name = step.tool
tool_args = step.args or {}
args_str = ", ".join([f"{k}={v}" for k, v in tool_args.items()])
critique_text = f"Tool: {tool_name}({args_str}) | Task: {task.input[:100]} | Scores: correctness={score.correctness}, usefulness={score.usefulness}, safety={score.safety} | {score.explanation}"
metadata = {
"task_input": task.input,
"tool": tool_name,
"args": tool_args,
"step_id": step.id,
"scores": {
"correctness": score.correctness,
"usefulness": score.usefulness,
"safety": score.safety,
},
}
self._memory_interface.insert(
text=critique_text,
kind="critique",
source="critic",
task_id=task.task_id,
session_id=task.session_id,
weight=score.weight,
metadata=metadata,
)
logger.info(f"Saved critique to memory: {tool_name} task_id={task.task_id}")
except Exception as e:
logger.warning(f"Failed to save critique to memory: {e}")
def _build_critic_prompt(self, step, result: dict[str, Any]) -> str:
base_prompt = self._prompts.get("critic", "")
tool_result = result.get("result", {})
# Truncate long outputs to avoid exceeding context window
# Keep output under ~2000 chars to leave room for prompt + generation
output = tool_result.get("output", "")
if isinstance(output, str) and len(output) > 2000:
output = output[:2000] + "\n... [truncated]"
elif not isinstance(output, str):
output_str = json.dumps(output, ensure_ascii=False)
if len(output_str) > 2000:
output = output_str[:2000] + "\n... [truncated]"
else:
output = output_str
# Build a compact result representation
compact_result = {
"ok": tool_result.get("ok"),
"output": output,
"error": tool_result.get("error"),
"exit_code": tool_result.get("metadata", {}).get("exit_code"),
}
return f"""{base_prompt}
Step: {step.description}
Tool: {step.tool}
Args: {step.args}
Result:
{json.dumps(compact_result, indent=2, ensure_ascii=False)}
Evaluate and respond with JSON:
{{"correctness": 0.0-1.0, "usefulness": 0.0-1.0, "safety": 0.0-1.0, "memory_store": true|false, "weight": 0.0-1.0, "explanation": "..."}}"""
def _parse_critic_score(self, output: str) -> CriticScore | None:
try:
json_start = output.find("{")
json_end = output.rfind("}") + 1
if json_start < 0:
return None
json_str = output[json_start:json_end]
data = json.loads(json_str)
return CriticScore(
correctness=data.get("correctness", 0.5),
usefulness=data.get("usefulness", 0.5),
safety=data.get("safety", 1.0),
memory_store=data.get("memory_store", False),
weight=data.get("weight", 0.5),
explanation=data.get("explanation", ""),
)
except (json.JSONDecodeError, ValueError, TypeError) as e:
logger.warning(f"Critic score parsing failed: {e}")
return None
def _execute_coder(
self,
task: UserTask,
directive: ExecutionDirective,
) -> dict[str, Any]:
if not self._coder:
return {"status": "failed", "result": {"error": "Coder model not available"}}
coder_task = directive.payload.get("task", "")
if not coder_task:
return {"status": "failed", "result": {"error": "Missing task for coder"}}
try:
output = asyncio.run(self._coder.generate(coder_task))
return {
"status": "completed",
"result": {"code": output},
}
except Exception as e:
logger.warning(f"Coder execution failed: {e}")
return {"status": "failed", "result": {"error": str(e)}}
def _execute_tool(
self,
task: UserTask,
directive: ExecutionDirective,
permission_override: PermissionDecision | None = None,
secret_override: str | None = None,
password_override: str | None = None,
) -> dict[str, Any]:
tool_name = str(directive.payload.get("tool", "")).strip()
tool_args = dict(directive.payload.get("args", {}))
if password_override:
tool_args["password"] = password_override
if not tool_name:
return {"status": "failed", "result": {"error": "Missing tool name"}}
# Tool-first: validate tool exists in registry
available_tools = self._tool_registry.list_names()
if tool_name not in available_tools:
return {"status": "failed", "result": {"error": f"Unknown tool: {tool_name}. Available tools: {available_tools}"}}
permission_result = None
# If permission_override is provided, skip permission check
if permission_override is not None:
permission_result = {
"decision": permission_override.decision,
"command": tool_args.get("command", ""),
"cached": True,
}
# Check permission for shell_exec and file_write
elif tool_name == "shell_exec":
permission_result = self._permission_service.check_shell_command(
task_id=task.task_id,
session_id=task.session_id,
command=str(tool_args.get("command", "")),
)
elif tool_name == "file_write":
# Allow writing to runtime data directory without permission check
write_path = str(tool_args.get("path", ""))
if "allowed_commands.json" in write_path or "/data/runtime" in write_path:
# Internal system write - allow without permission
permission_result = {"decision": "allowed", "path": write_path}
else:
permission_result = self._permission_service.check_write_path(
task_id=task.task_id,
session_id=task.session_id,
path=write_path,
)
# Handle permission result
if permission_result:
decision = permission_result.get("decision", "unknown")
# Hard stop - deny execution
if decision == "hard_stop":
self._publish(task, PERMISSION_REQUESTED, permission_result)
return {
"status": "failed",
"result": {
"error": f"Command blocked: {permission_result.get('reason', 'Hard stop command')}",
"command": permission_result.get("command", ""),
},
}
# Cached - already allowed
if decision in ("allowed_always", "allowed") or permission_result.get("cached"):
self._publish(task, PERMISSION_RESOLVED, permission_result)
# Need user confirmation - return immediately, don't continue execution
elif decision == "prompt":
self._publish(task, PERMISSION_REQUESTED, permission_result)
return {
"status": "awaiting_permission",
"result": {
"error": "Permission required before execution.",
"permission_request": permission_result,
},
}
# Hard stop - return immediately
elif decision == "deny":
self._publish(task, PERMISSION_RESOLVED, permission_result)
return {
"status": "failed",
"result": {
"error": "Permission denied",
"command": permission_result.get("command", ""),
},
}
# Deny
elif decision == "deny":
self._publish(task, PERMISSION_RESOLVED, permission_result)
return {
"status": "failed",
"result": {
"error": "Permission denied",
"command": permission_result.get("command", ""),
},
}
if tool_name == "shell_exec":
command = str(tool_args.get("command", ""))
# Determine if sudo password is needed:
# 1. Command explicitly starts with "sudo"
# 2. Command is a known sudo-requiring command (apt, systemctl, etc.) — flagged by permission service
needs_password = command.startswith("sudo ") or (permission_result is not None and permission_result.get("requires_sudo", False))
if needs_password and secret_override is None:
secret_request = SecretRequest(
task_id=task.task_id,
session_id=task.session_id,
kind="sudo_password",
prompt="Sudo password required",
command=command,
)
self._publish(task, SECRET_REQUESTED, secret_request.model_dump(mode="json"))
return {
"status": "awaiting_input",
"result": {
"error": "Secret required",
"secret_request": secret_request.model_dump(mode="json"),
},
}
if needs_password and secret_override is not None:
# Inject sudo -S for explicit sudo commands, or prepend sudo -S for implicit ones
if command.startswith("sudo "):
tool_args["command"] = f"sudo -S -p '' {command[len('sudo '):]}"
else:
tool_args["command"] = f"sudo -S -p '' {command}"
tool_args["stdin_secret"] = f"{secret_override}\n"
tool_call = ToolCall(
tool=tool_name,
args=tool_args,
task_id=task.task_id,
step_id="step-1",
)
self._publish(task, TOOL_CALLED, tool_call.model_dump(mode="json"))
if tool_name == "shell_exec":
tool_args["__output_callback"] = lambda stream, chunk: self._publish(
task,
TOOL_OUTPUT_CHUNK,
{
"tool": tool_name,
"step_id": "step-1",
"stream": stream,
"chunk": chunk,
},
)
tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args)
self._publish(task, TOOL_COMPLETED, tool_result.model_dump(mode="json"))
metadata = tool_result.metadata or {}
needs_sudo = metadata.get("needs_sudo", False)
sudo_auth_failed = metadata.get("sudo_auth_failed", False) or self._looks_like_sudo_auth_failure(tool_result)
if tool_name == "shell_exec" and not tool_result.ok and sudo_auth_failed:
original_command = str(directive.payload.get("args", {}).get("command", tool_args.get("command", "")))
secret_request = SecretRequest(
task_id=task.task_id,
session_id=task.session_id,
kind="sudo_password",
prompt="Sudo password incorrect. Try again",
command=original_command,
)
self._publish(task, SECRET_REQUESTED, secret_request.model_dump(mode="json"))
return {
"status": "awaiting_input",
"result": {
"error": "Sudo password failed",
"secret_request": secret_request.model_dump(mode="json"),
"attempt_failed": True,
"tool_result": tool_result.model_dump(mode="json"),
},
}
if not tool_result.ok and needs_sudo:
return {
"status": "awaiting_password",
"result": {
"task_id": task.task_id,
"needs_sudo": True,
"command": tool_args.get("command", ""),
"error": tool_result.error or "Permission denied",
"tool_result": tool_result.model_dump(mode="json"),
},
}
if tool_name == "shell_exec" and not tool_result.ok and self._command_analyzer:
original_command = str(directive.payload.get("args", {}).get("command", tool_args.get("command", "")))
diagnosis = self._command_analyzer.analyze(
command=original_command,
task_id=task.task_id,
session_id=task.session_id,
)
if diagnosis.get("type") != "ok":
return {
"status": "awaiting_review",
"result": {
"error": "Tool action requires review before replanning",
"review": {
"step_id": "step-1",
"tool": tool_name,
"command": original_command,
"diagnosis": diagnosis,
"critic_assessment": {
"classification": "model_planning_error",
"needs_replan": True,
"explanation": "Structured command analysis found a model action error before recovery.",
},
},
"tool_result": tool_result.model_dump(mode="json"),
},
}
return {
"status": "completed" if tool_result.ok else "failed",
"result": tool_result.model_dump(mode="json"),
}
def _looks_like_sudo_auth_failure(self, tool_result: ToolResult) -> bool:
output = f"{tool_result.output or ''}\n{tool_result.error or ''}".lower()
return any(
marker in output
for marker in (
"incorrect password",
"incorrect password attempt",
"sudo: no password was provided",
"sorry, try again",
"authentication failure",
)
)
def _publish(self, task: UserTask, event_type: str, payload: dict[str, Any]) -> None:
if not self._event_bus:
return
event = RuntimeEvent(
task_id=task.task_id,
session_id=task.session_id,
sequence=self._event_bus.next_sequence(task.task_id),
type=event_type,
payload=payload,
)
self._event_bus.publish(event)