756 lines
27 KiB
Python
756 lines
27 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
from typing import Any
|
|
|
|
from app.core.contracts import (
|
|
CriticScore,
|
|
ExecutionDirective,
|
|
PermissionDecision,
|
|
PermissionRequest,
|
|
RuntimeEvent,
|
|
SecretRequest,
|
|
ToolCall,
|
|
UserTask,
|
|
)
|
|
from app.core.execution_scheduler import ExecutionScheduler
|
|
from app.events.event_bus import EventBus
|
|
from app.events.event_types import (
|
|
CRITIC_CALLED,
|
|
CRITIC_RESULT,
|
|
PERMISSION_REQUESTED,
|
|
PERMISSION_RESOLVED,
|
|
PLAN_FAILED,
|
|
PLAN_STARTED,
|
|
SECRET_REQUESTED,
|
|
STEP_STARTED,
|
|
STEPPED_COMPLETED,
|
|
TOOL_CALLED,
|
|
TOOL_COMPLETED,
|
|
)
|
|
from app.models.async_adapters import AsyncCriticAdapter, AsyncCoderAdapter
|
|
from app.memory.write_policy import MemoryWritePolicy
|
|
from app.memory.interface import MemoryInterface
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ExecutionEngine:
|
|
def __init__(
|
|
self,
|
|
event_bus: EventBus,
|
|
tool_registry,
|
|
permission_service,
|
|
scheduler: ExecutionScheduler | None = None,
|
|
critic: AsyncCriticAdapter | None = None,
|
|
memory_policy: MemoryWritePolicy | None = None,
|
|
memory_interface: MemoryInterface | None = None,
|
|
prompts: dict[str, str] | None = None,
|
|
recovery_limit: int = 1,
|
|
) -> None:
|
|
self._event_bus = event_bus
|
|
self._tool_registry = tool_registry
|
|
self._permission_service = permission_service
|
|
self._scheduler = scheduler or ExecutionScheduler()
|
|
self._critic = critic
|
|
self._coder: AsyncCoderAdapter | None = None
|
|
self._memory_policy = memory_policy
|
|
self._memory_interface = memory_interface
|
|
self._prompts = prompts or {}
|
|
self._recovery_limit = recovery_limit
|
|
|
|
def set_critic(self, critic: AsyncCriticAdapter) -> None:
|
|
self._critic = critic
|
|
|
|
def set_coder(self, coder: AsyncCoderAdapter) -> None:
|
|
self._coder = coder
|
|
|
|
def set_memory_policy(self, policy: MemoryWritePolicy) -> None:
|
|
self._memory_policy = policy
|
|
|
|
def execute(
|
|
self,
|
|
task: UserTask,
|
|
directive: ExecutionDirective,
|
|
permission_override: PermissionDecision | None = None,
|
|
secret_override: str | None = None,
|
|
password_override: str | None = None,
|
|
) -> dict[str, Any]:
|
|
scheduled = self._scheduler.next_directive(directive)
|
|
self._publish(task, STEP_STARTED, {"directive_type": scheduled.type})
|
|
|
|
if scheduled.type == "plan":
|
|
return self._execute_plan(
|
|
task=task,
|
|
directive=scheduled,
|
|
permission_override=permission_override,
|
|
secret_override=secret_override,
|
|
password_override=password_override,
|
|
)
|
|
|
|
if scheduled.type == "tool":
|
|
return self._execute_tool(
|
|
task=task,
|
|
directive=scheduled,
|
|
permission_override=permission_override,
|
|
secret_override=secret_override,
|
|
password_override=password_override,
|
|
)
|
|
|
|
if scheduled.type == "respond":
|
|
return {
|
|
"status": "completed",
|
|
"result": {
|
|
"message": f"Runtime accepted task: {task.input}",
|
|
"mode": scheduled.payload.get("mode", "direct_response"),
|
|
},
|
|
}
|
|
|
|
if scheduled.type == "coder":
|
|
return self._execute_coder(
|
|
task=task,
|
|
directive=scheduled,
|
|
)
|
|
|
|
if scheduled.type == "fail":
|
|
return {
|
|
"status": "failed",
|
|
"result": {"error": scheduled.reason or "Execution failed."},
|
|
}
|
|
|
|
return {
|
|
"status": "completed",
|
|
"result": {
|
|
"message": "Directive accepted.",
|
|
"directive_type": scheduled.type,
|
|
},
|
|
}
|
|
|
|
def _execute_plan(
|
|
self,
|
|
task: UserTask,
|
|
directive: ExecutionDirective,
|
|
permission_override: PermissionDecision | None = None,
|
|
secret_override: str | None = None,
|
|
password_override: str | None = None,
|
|
) -> dict[str, Any]:
|
|
# Unified format: {"type": "plan", "payload": {"steps": [...]}}
|
|
# Need to extract steps from nested payload
|
|
import json
|
|
|
|
payload = directive.payload
|
|
steps_data = []
|
|
|
|
# If payload has "steps" directly, use them
|
|
if "steps" in payload:
|
|
steps_data = payload.get("steps", [])
|
|
# If payload is a string (JSON), parse it
|
|
elif isinstance(payload, str) and payload.strip().startswith("{"):
|
|
try:
|
|
parsed = json.loads(payload)
|
|
steps_data = parsed.get("payload", {}).get("steps", [])
|
|
except:
|
|
steps_data = []
|
|
|
|
if steps_data:
|
|
plan_json = json.dumps({"type": "plan", "payload": {"steps": steps_data}})
|
|
else:
|
|
plan_json = json.dumps(payload)
|
|
|
|
plan_steps = self._scheduler.parse_plan_steps(plan_json, task.task_id)
|
|
|
|
if not plan_steps:
|
|
return {
|
|
"status": "failed",
|
|
"result": {"error": "Failed to parse plan steps from directive"},
|
|
}
|
|
|
|
if not self._scheduler.validate_no_cycles(plan_steps):
|
|
self._publish(task, PLAN_FAILED, {"error": "Cycle detected in plan"})
|
|
return {
|
|
"status": "failed",
|
|
"result": {"error": "Cycle detected in plan"},
|
|
}
|
|
|
|
graph = self._scheduler.build_task_graph(plan_steps)
|
|
self._publish(task, PLAN_STARTED, {"steps": len(plan_steps)})
|
|
|
|
completed_steps: set[str] = set()
|
|
step_results: list[dict[str, Any]] = []
|
|
|
|
ready_steps = self._get_ready_steps(graph, completed_steps)
|
|
|
|
while ready_steps:
|
|
step = ready_steps.pop(0)
|
|
|
|
# Handle respond kind directly without tool execution
|
|
if step.kind == "respond":
|
|
result = {
|
|
"status": "completed",
|
|
"result": {
|
|
"message": step.args.get("text", step.description),
|
|
},
|
|
}
|
|
else:
|
|
step_directive = ExecutionDirective(
|
|
type=step.kind,
|
|
payload={
|
|
"tool": step.tool,
|
|
"args": step.args,
|
|
},
|
|
requires_permission=step.requires_confirmation,
|
|
reason=step.description,
|
|
)
|
|
|
|
result = self._execute_tool(
|
|
task=task,
|
|
directive=step_directive,
|
|
permission_override=permission_override,
|
|
secret_override=secret_override,
|
|
password_override=password_override,
|
|
)
|
|
|
|
# If tool needs permission - return immediately, don't continue execution
|
|
if result.get("status") == "awaiting_permission":
|
|
return {
|
|
"status": "awaiting_permission",
|
|
"result": result.get("result", {}),
|
|
"step_results": step_results,
|
|
}
|
|
|
|
step_results.append({
|
|
"step_id": step.id,
|
|
"result": result,
|
|
})
|
|
|
|
completed_steps.add(step.id)
|
|
self._publish(task, STEPPED_COMPLETED, {
|
|
"step_id": step.id,
|
|
"status": result.get("status"),
|
|
})
|
|
|
|
if result.get("status") == "failed":
|
|
recovery = self._recover_failed_step(
|
|
task=task,
|
|
step=step,
|
|
result=result,
|
|
step_results=step_results,
|
|
permission_override=permission_override,
|
|
secret_override=secret_override,
|
|
password_override=password_override,
|
|
)
|
|
if recovery.get("status") == "awaiting_permission":
|
|
return recovery
|
|
if recovery.get("status") == "completed":
|
|
recovered_result = recovery.get("result")
|
|
if recovered_result:
|
|
step_results[-1]["result"] = recovered_result
|
|
if recovery.get("finish"):
|
|
return {
|
|
"status": "completed",
|
|
"result": {
|
|
"message": recovery.get("message", "Recovered from failed step"),
|
|
"step_results": step_results,
|
|
},
|
|
}
|
|
else:
|
|
return {
|
|
"status": "failed",
|
|
"result": {
|
|
"error": f"Step {step.id} failed",
|
|
"failed_step": step.id,
|
|
"step_results": step_results,
|
|
"recovery": recovery.get("result"),
|
|
},
|
|
}
|
|
|
|
requires_execution = directive.payload.get("requires_execution", True)
|
|
if requires_execution and self._critic:
|
|
critic_result = self._evaluate_with_critic(
|
|
task, step, result
|
|
)
|
|
if critic_result:
|
|
# Convert to dict for JSON serialization
|
|
result["critic_score"] = critic_result.model_dump(mode="json") if hasattr(critic_result, 'model_dump') else dict(critic_result)
|
|
self._save_critique_to_memory(task, step, critic_result)
|
|
|
|
ready_steps = self._get_ready_steps(graph, completed_steps)
|
|
|
|
return {
|
|
"status": "completed",
|
|
"result": {
|
|
"message": f"Plan executed: {len(completed_steps)} steps completed",
|
|
"step_results": step_results,
|
|
},
|
|
}
|
|
|
|
def _recover_failed_step(
|
|
self,
|
|
task: UserTask,
|
|
step,
|
|
result: dict[str, Any],
|
|
step_results: list[dict[str, Any]],
|
|
permission_override: PermissionDecision | None = None,
|
|
secret_override: str | None = None,
|
|
password_override: str | None = None,
|
|
) -> dict[str, Any]:
|
|
if self._recovery_limit <= 0 or not self._critic:
|
|
return {"status": "failed", "result": {"reason": "recovery_unavailable"}}
|
|
|
|
decision = self._evaluate_recovery(task, step, result, step_results)
|
|
action = decision.get("action", "fail")
|
|
|
|
if action == "continue":
|
|
recovered = dict(result)
|
|
recovered["status"] = "completed"
|
|
recovered["recovery_decision"] = decision
|
|
return {"status": "completed", "result": recovered}
|
|
|
|
if action == "respond":
|
|
recovered = dict(result)
|
|
recovered["status"] = "completed"
|
|
recovered["recovery_decision"] = decision
|
|
return {
|
|
"status": "completed",
|
|
"result": recovered,
|
|
"finish": True,
|
|
"message": decision.get("message") or decision.get("reason") or "Recovered by responding to user",
|
|
}
|
|
|
|
if action == "retry":
|
|
retry_tool = decision.get("tool") or step.tool
|
|
retry_args = decision.get("args") or step.args
|
|
retry_result = self._execute_tool(
|
|
task=task,
|
|
directive=ExecutionDirective(
|
|
type="tool",
|
|
payload={"tool": retry_tool, "args": retry_args},
|
|
requires_permission=True,
|
|
reason=decision.get("reason", "Recovery retry"),
|
|
),
|
|
permission_override=permission_override,
|
|
secret_override=secret_override,
|
|
password_override=password_override,
|
|
)
|
|
if retry_result.get("status") == "awaiting_permission":
|
|
return retry_result
|
|
retry_result["recovery_decision"] = decision
|
|
if retry_result.get("status") == "completed":
|
|
return {"status": "completed", "result": retry_result}
|
|
return {"status": "failed", "result": {"decision": decision, "retry_result": retry_result}}
|
|
|
|
return {"status": "failed", "result": decision}
|
|
|
|
def _evaluate_recovery(
|
|
self,
|
|
task: UserTask,
|
|
step,
|
|
result: dict[str, Any],
|
|
step_results: list[dict[str, Any]],
|
|
) -> dict[str, Any]:
|
|
prompt = self._build_recovery_prompt(task, step, result, step_results)
|
|
self._publish(task, CRITIC_CALLED, {"step_id": step.id, "mode": "recovery"})
|
|
|
|
try:
|
|
output = asyncio.run(self._critic.generate(prompt, max_tokens=512))
|
|
decision = self._parse_recovery_decision(output)
|
|
self._publish(task, CRITIC_RESULT, {
|
|
"step_id": step.id,
|
|
"mode": "recovery",
|
|
"decision": decision,
|
|
"raw": output,
|
|
})
|
|
return decision
|
|
except Exception as e:
|
|
logger.warning(f"Recovery evaluation failed: {e}")
|
|
self._publish(task, CRITIC_RESULT, {
|
|
"step_id": step.id,
|
|
"mode": "recovery",
|
|
"error": str(e),
|
|
})
|
|
return {"action": "fail", "reason": str(e)}
|
|
|
|
def _build_recovery_prompt(
|
|
self,
|
|
task: UserTask,
|
|
step,
|
|
result: dict[str, Any],
|
|
step_results: list[dict[str, Any]],
|
|
) -> str:
|
|
return f"""You are a recovery controller for an agent runtime.
|
|
|
|
Decide what to do after a failed tool step. A non-zero exit code is not always fatal.
|
|
Interpret the failure in context.
|
|
|
|
Allowed actions:
|
|
- continue: failure is acceptable information; continue the plan.
|
|
- retry: try one alternative tool call. Include "tool" and "args".
|
|
- respond: stop and answer the user with available information. Include "message".
|
|
- fail: real failure; stop the task.
|
|
|
|
Return ONLY JSON:
|
|
{{"action":"continue|retry|respond|fail","reason":"...","tool":"shell_exec","args":{{...}},"message":"..."}}
|
|
|
|
Task:
|
|
{task.input}
|
|
|
|
Failed step:
|
|
id={step.id}
|
|
tool={step.tool}
|
|
args={json.dumps(step.args, ensure_ascii=False)}
|
|
description={step.description}
|
|
|
|
Failed result:
|
|
{json.dumps(result, ensure_ascii=False, indent=2)}
|
|
|
|
Previous step results:
|
|
{json.dumps(step_results, ensure_ascii=False, indent=2)}
|
|
"""
|
|
|
|
def _parse_recovery_decision(self, output: str) -> dict[str, Any]:
|
|
try:
|
|
json_start = output.find("{")
|
|
json_end = output.rfind("}") + 1
|
|
if json_start < 0 or json_end <= 0:
|
|
return {"action": "fail", "reason": "Recovery output was not JSON"}
|
|
data = json.loads(output[json_start:json_end])
|
|
action = data.get("action", "fail")
|
|
if action not in {"continue", "retry", "respond", "fail"}:
|
|
action = "fail"
|
|
data["action"] = action
|
|
return data
|
|
except (json.JSONDecodeError, TypeError, ValueError) as e:
|
|
return {"action": "fail", "reason": f"Recovery JSON parse failed: {e}"}
|
|
|
|
def _get_ready_steps(
|
|
self,
|
|
graph: dict[str, Any],
|
|
completed: set[str],
|
|
) -> list:
|
|
if not graph or not graph.get("nodes"):
|
|
return []
|
|
|
|
step_map: dict = graph.get("step_map", {})
|
|
ready = []
|
|
|
|
for node in graph["nodes"]:
|
|
node_id = node["id"]
|
|
if node_id in completed:
|
|
continue
|
|
|
|
deps = node.get("depends_on", [])
|
|
if all(dep in completed for dep in deps):
|
|
step = step_map.get(node_id)
|
|
if step:
|
|
ready.append(step)
|
|
|
|
return ready
|
|
|
|
def _evaluate_with_critic(
|
|
self,
|
|
task: UserTask,
|
|
step,
|
|
result: dict[str, Any],
|
|
) -> CriticScore | None:
|
|
if not self._critic:
|
|
return None
|
|
|
|
critic_prompt = self._build_critic_prompt(step, result)
|
|
|
|
self._publish(task, CRITIC_CALLED, {"step_id": step.id})
|
|
|
|
try:
|
|
critic_output = asyncio.run(self._critic.generate(critic_prompt))
|
|
score = self._parse_critic_score(critic_output)
|
|
|
|
self._publish(task, CRITIC_RESULT, {
|
|
"step_id": step.id,
|
|
"score": score.model_dump(mode="json") if score else None,
|
|
})
|
|
|
|
if score:
|
|
result["critic_score"] = {
|
|
"correctness": score.correctness,
|
|
"usefulness": score.usefulness,
|
|
"safety": score.safety,
|
|
"memory_store": score.memory_store,
|
|
"weight": score.weight,
|
|
"explanation": score.explanation,
|
|
}
|
|
|
|
return score
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Critic evaluation failed: {e}")
|
|
self._publish(task, CRITIC_RESULT, {
|
|
"step_id": step.id,
|
|
"error": str(e),
|
|
})
|
|
return None
|
|
|
|
def _save_critique_to_memory(
|
|
self,
|
|
task: UserTask,
|
|
step,
|
|
score: CriticScore,
|
|
) -> None:
|
|
"""Save critic evaluation as critique entry in memory."""
|
|
if not self._memory_interface:
|
|
return
|
|
|
|
try:
|
|
tool_name = step.tool
|
|
tool_args = step.args or {}
|
|
args_str = ", ".join([f"{k}={v}" for k, v in tool_args.items()])
|
|
|
|
critique_text = f"Tool: {tool_name}({args_str}) | Task: {task.input[:100]} | Scores: correctness={score.correctness}, usefulness={score.usefulness}, safety={score.safety} | {score.explanation}"
|
|
|
|
metadata = {
|
|
"task_input": task.input,
|
|
"tool": tool_name,
|
|
"args": tool_args,
|
|
"step_id": step.id,
|
|
"scores": {
|
|
"correctness": score.correctness,
|
|
"usefulness": score.usefulness,
|
|
"safety": score.safety,
|
|
},
|
|
}
|
|
|
|
self._memory_interface.insert(
|
|
text=critique_text,
|
|
kind="critique",
|
|
source="critic",
|
|
task_id=task.task_id,
|
|
session_id=task.session_id,
|
|
weight=score.weight,
|
|
metadata=metadata,
|
|
)
|
|
logger.info(f"Saved critique to memory: {tool_name} task_id={task.task_id}")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Failed to save critique to memory: {e}")
|
|
|
|
def _build_critic_prompt(self, step, result: dict[str, Any]) -> str:
|
|
base_prompt = self._prompts.get("critic", "")
|
|
tool_result = result.get("result", {})
|
|
|
|
return f"""{base_prompt}
|
|
|
|
Step: {step.description}
|
|
Tool: {step.tool}
|
|
Args: {step.args}
|
|
|
|
Result:
|
|
{json.dumps(tool_result, indent=2)}
|
|
|
|
Evaluate and respond with JSON:
|
|
{{"correctness": 0.0-1.0, "usefulness": 0.0-1.0, "safety": 0.0-1.0, "memory_store": true|false, "weight": 0.0-1.0, "explanation": "..."}}"""
|
|
|
|
def _parse_critic_score(self, output: str) -> CriticScore | None:
|
|
try:
|
|
json_start = output.find("{")
|
|
json_end = output.rfind("}") + 1
|
|
if json_start < 0:
|
|
return None
|
|
|
|
json_str = output[json_start:json_end]
|
|
data = json.loads(json_str)
|
|
|
|
return CriticScore(
|
|
correctness=data.get("correctness", 0.5),
|
|
usefulness=data.get("usefulness", 0.5),
|
|
safety=data.get("safety", 1.0),
|
|
memory_store=data.get("memory_store", False),
|
|
weight=data.get("weight", 0.5),
|
|
explanation=data.get("explanation", ""),
|
|
)
|
|
|
|
except (json.JSONDecodeError, ValueError, TypeError) as e:
|
|
logger.warning(f"Critic score parsing failed: {e}")
|
|
return None
|
|
|
|
def _execute_coder(
|
|
self,
|
|
task: UserTask,
|
|
directive: ExecutionDirective,
|
|
) -> dict[str, Any]:
|
|
if not self._coder:
|
|
return {"status": "failed", "result": {"error": "Coder model not available"}}
|
|
|
|
coder_task = directive.payload.get("task", "")
|
|
if not coder_task:
|
|
return {"status": "failed", "result": {"error": "Missing task for coder"}}
|
|
|
|
try:
|
|
output = asyncio.run(self._coder.generate(coder_task))
|
|
|
|
return {
|
|
"status": "completed",
|
|
"result": {"code": output},
|
|
}
|
|
except Exception as e:
|
|
logger.warning(f"Coder execution failed: {e}")
|
|
return {"status": "failed", "result": {"error": str(e)}}
|
|
|
|
def _execute_tool(
|
|
self,
|
|
task: UserTask,
|
|
directive: ExecutionDirective,
|
|
permission_override: PermissionDecision | None = None,
|
|
secret_override: str | None = None,
|
|
password_override: str | None = None,
|
|
) -> dict[str, Any]:
|
|
tool_name = str(directive.payload.get("tool", "")).strip()
|
|
tool_args = dict(directive.payload.get("args", {}))
|
|
|
|
if password_override:
|
|
tool_args["password"] = password_override
|
|
|
|
if not tool_name:
|
|
return {"status": "failed", "result": {"error": "Missing tool name"}}
|
|
|
|
# Tool-first: validate tool exists in registry
|
|
available_tools = self._tool_registry.list_names()
|
|
if tool_name not in available_tools:
|
|
return {"status": "failed", "result": {"error": f"Unknown tool: {tool_name}. Available tools: {available_tools}"}}
|
|
|
|
permission_result = None
|
|
|
|
# Check permission for shell_exec and file_write
|
|
if tool_name == "shell_exec":
|
|
permission_result = self._permission_service.check_shell_command(
|
|
task_id=task.task_id,
|
|
session_id=task.session_id,
|
|
command=str(tool_args.get("command", "")),
|
|
)
|
|
elif tool_name == "file_write":
|
|
# Allow writing to runtime data directory without permission check
|
|
write_path = str(tool_args.get("path", ""))
|
|
if "allowed_commands.json" in write_path or "/data/runtime" in write_path:
|
|
# Internal system write - allow without permission
|
|
permission_result = {"decision": "allowed", "path": write_path}
|
|
else:
|
|
permission_result = self._permission_service.check_write_path(
|
|
task_id=task.task_id,
|
|
session_id=task.session_id,
|
|
path=write_path,
|
|
)
|
|
|
|
# Handle permission result
|
|
if permission_result:
|
|
decision = permission_result.get("decision", "unknown")
|
|
|
|
# Hard stop - deny execution
|
|
if decision == "hard_stop":
|
|
self._publish(task, PERMISSION_REQUESTED, permission_result)
|
|
return {
|
|
"status": "failed",
|
|
"result": {
|
|
"error": f"Command blocked: {permission_result.get('reason', 'Hard stop command')}",
|
|
"command": permission_result.get("command", ""),
|
|
},
|
|
}
|
|
|
|
# Cached - already allowed
|
|
if decision in ("allowed_always", "allowed") or permission_result.get("cached"):
|
|
self._publish(task, PERMISSION_RESOLVED, permission_result)
|
|
|
|
# Need user confirmation - return immediately, don't continue execution
|
|
elif decision == "prompt":
|
|
self._publish(task, PERMISSION_REQUESTED, permission_result)
|
|
return {
|
|
"status": "awaiting_permission",
|
|
"result": {
|
|
"error": "Permission required before execution.",
|
|
"permission_request": permission_result,
|
|
},
|
|
}
|
|
|
|
# Hard stop - return immediately
|
|
elif decision == "deny":
|
|
self._publish(task, PERMISSION_RESOLVED, permission_result)
|
|
return {
|
|
"status": "failed",
|
|
"result": {
|
|
"error": "Permission denied",
|
|
"command": permission_result.get("command", ""),
|
|
},
|
|
}
|
|
|
|
# Deny
|
|
elif decision == "deny":
|
|
self._publish(task, PERMISSION_RESOLVED, permission_result)
|
|
return {
|
|
"status": "failed",
|
|
"result": {
|
|
"error": "Permission denied",
|
|
"command": permission_result.get("command", ""),
|
|
},
|
|
}
|
|
|
|
if tool_name == "shell_exec":
|
|
command = str(tool_args.get("command", ""))
|
|
if command.startswith("sudo ") and secret_override is None:
|
|
secret_request = SecretRequest(
|
|
task_id=task.task_id,
|
|
session_id=task.session_id,
|
|
kind="sudo_password",
|
|
prompt="Sudo password required",
|
|
command=command,
|
|
)
|
|
self._publish(task, SECRET_REQUESTED, secret_request.model_dump(mode="json"))
|
|
return {
|
|
"status": "awaiting_input",
|
|
"result": {
|
|
"error": "Secret required",
|
|
"secret_request": secret_request.model_dump(mode="json"),
|
|
},
|
|
}
|
|
if command.startswith("sudo ") and secret_override is not None:
|
|
tool_args["command"] = f"sudo -S -p '' {command[len('sudo '):]}"
|
|
tool_args["stdin_secret"] = f"{secret_override}\n"
|
|
|
|
tool_call = ToolCall(
|
|
tool=tool_name,
|
|
args=tool_args,
|
|
task_id=task.task_id,
|
|
step_id="step-1",
|
|
)
|
|
self._publish(task, TOOL_CALLED, tool_call.model_dump(mode="json"))
|
|
tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args)
|
|
self._publish(task, TOOL_COMPLETED, tool_result.model_dump(mode="json"))
|
|
|
|
needs_sudo = tool_result.metadata.get("needs_sudo", False) if tool_result.metadata else False
|
|
|
|
if not tool_result.ok and needs_sudo:
|
|
return {
|
|
"status": "awaiting_password",
|
|
"result": {
|
|
"task_id": task.task_id,
|
|
"needs_sudo": True,
|
|
"command": tool_args.get("command", ""),
|
|
"error": tool_result.error or "Permission denied",
|
|
"tool_result": tool_result.model_dump(mode="json"),
|
|
},
|
|
}
|
|
|
|
return {
|
|
"status": "completed" if tool_result.ok else "failed",
|
|
"result": tool_result.model_dump(mode="json"),
|
|
}
|
|
|
|
def _publish(self, task: UserTask, event_type: str, payload: dict[str, Any]) -> None:
|
|
if not self._event_bus:
|
|
return
|
|
event = RuntimeEvent(
|
|
task_id=task.task_id,
|
|
session_id=task.session_id,
|
|
sequence=self._event_bus.next_sequence(task.task_id),
|
|
type=event_type,
|
|
payload=payload,
|
|
)
|
|
self._event_bus.publish(event)
|