ducklm/app/core/execution_engine.py

from __future__ import annotations

import asyncio
import json
import logging
from typing import Any

from app.core.contracts import (
    CriticScore,
    ExecutionDirective,
    PermissionDecision,
    PermissionRequest,
    RuntimeEvent,
    SecretRequest,
    ToolCall,
    ToolResult,
    UserTask,
)
from app.core.command_analyzer import CommandAnalyzer
from app.core.execution_scheduler import ExecutionScheduler
from app.events.event_bus import EventBus
from app.events.event_types import (
    CRITIC_CALLED,
    CRITIC_RESULT,
    PERMISSION_REQUESTED,
    PERMISSION_RESOLVED,
    PLAN_FAILED,
    PLAN_STARTED,
    SECRET_REQUESTED,
    STEP_STARTED,
    STEPPED_COMPLETED,
    TOOL_CALLED,
    TOOL_COMPLETED,
    TOOL_OUTPUT_CHUNK,
)
from app.models.async_adapters import AsyncCriticAdapter, AsyncCoderAdapter
from app.memory.write_policy import MemoryWritePolicy
from app.memory.interface import MemoryInterface

logger = logging.getLogger(__name__)


class ExecutionEngine:
    def __init__(
        self,
        event_bus: EventBus,
        tool_registry,
        permission_service,
        scheduler: ExecutionScheduler | None = None,
        critic: AsyncCriticAdapter | None = None,
        memory_policy: MemoryWritePolicy | None = None,
        memory_interface: MemoryInterface | None = None,
        prompts: dict[str, str] | None = None,
        recovery_limit: int = 1,
        critic_retry_limit: int = 2,
        command_analyzer: CommandAnalyzer | None = None,
    ) -> None:
        self._event_bus = event_bus
        self._tool_registry = tool_registry
        self._permission_service = permission_service
        self._scheduler = scheduler or ExecutionScheduler()
        self._critic = critic
        self._coder: AsyncCoderAdapter | None = None
        self._memory_policy = memory_policy
        self._memory_interface = memory_interface
        self._prompts = prompts or {}
        self._recovery_limit = recovery_limit
        self._critic_retry_limit = critic_retry_limit
        self._command_analyzer = command_analyzer

    def set_critic(self, critic: AsyncCriticAdapter) -> None:
        self._critic = critic

    def set_coder(self, coder: AsyncCoderAdapter) -> None:
        self._coder = coder

    def set_memory_policy(self, policy: MemoryWritePolicy) -> None:
        self._memory_policy = policy

    def execute(
        self,
        task: UserTask,
        directive: ExecutionDirective,
        permission_override: PermissionDecision | None = None,
        secret_override: str | None = None,
        password_override: str | None = None,
    ) -> dict[str, Any]:
        scheduled = self._scheduler.next_directive(directive)
        self._publish(task, STEP_STARTED, {"directive_type": scheduled.type})

        if scheduled.type == "plan":
            return self._execute_plan(
                task=task,
                directive=scheduled,
                permission_override=permission_override,
                secret_override=secret_override,
                password_override=password_override,
            )

        if scheduled.type == "tool":
            return self._execute_tool(
                task=task,
                directive=scheduled,
                permission_override=permission_override,
                secret_override=secret_override,
                password_override=password_override,
            )

        if scheduled.type == "respond":
            return {
                "status": "completed",
                "result": {
                    "message": scheduled.payload.get("text", f"Runtime accepted task: {task.input}"),
                    "mode": scheduled.payload.get("mode", "direct_response"),
                },
                "directive": scheduled.model_dump(mode="json"),
            }

        if scheduled.type == "coder":
            return self._execute_coder(
                task=task,
                directive=scheduled,
            )

        if scheduled.type == "fail":
            return {
                "status": "failed",
                "result": {"error": scheduled.reason or "Execution failed."},
            }

        return {
            "status": "completed",
            "result": {
                "message": "Directive accepted.",
                "directive_type": scheduled.type,
            },
        }

    def _execute_plan(
        self,
        task: UserTask,
        directive: ExecutionDirective,
        permission_override: PermissionDecision | None = None,
        secret_override: str | None = None,
        password_override: str | None = None,
    ) -> dict[str, Any]:
        # Unified format: {"type": "plan", "payload": {"steps": [...]}}
        # Need to extract steps from nested payload
        import json

        payload = directive.payload
        steps_data = []

        # If payload has "steps" directly, use them
        if "steps" in payload:
            steps_data = payload.get("steps", [])
        # If payload is a string (JSON), parse it
        elif isinstance(payload, str) and payload.strip().startswith("{"):
            try:
                parsed = json.loads(payload)
                steps_data = parsed.get("payload", {}).get("steps", [])
            except:
                steps_data = []

        if steps_data:
            plan_json = json.dumps({"type": "plan", "payload": {"steps": steps_data}})
        else:
            plan_json = json.dumps(payload)

        plan_steps = self._scheduler.parse_plan_steps(plan_json, task.task_id)

        if not plan_steps:
            return {
                "status": "failed",
                "result": {"error": "Failed to parse plan steps from directive"},
            }

        if not self._scheduler.validate_no_cycles(plan_steps):
            self._publish(task, PLAN_FAILED, {"error": "Cycle detected in plan"})
            return {
                "status": "failed",
                "result": {"error": "Cycle detected in plan"},
            }

        graph = self._scheduler.build_task_graph(plan_steps)
        self._publish(task, PLAN_STARTED, {"steps": len(plan_steps)})

        completed_steps: set[str] = set()
        step_results: list[dict[str, Any]] = []
        critic_retries_used = 0  # Track critic→replan cycles

        ready_steps = self._get_ready_steps(graph, completed_steps)

        while ready_steps:
            step = ready_steps.pop(0)

            # Handle respond kind directly without tool execution
            if step.kind == "respond":
                result = {
                    "status": "completed",
                    "result": {
                        "message": step.args.get("text", step.description),
                    },
                }
            else:
                step_directive = ExecutionDirective(
                    type=step.kind,
                    payload={
                        "tool": step.tool,
                        "args": step.args,
                    },
                    requires_permission=step.requires_confirmation,
                    reason=step.description,
                )

                result = self._execute_tool(
                    task=task,
                    directive=step_directive,
                    permission_override=permission_override,
                    secret_override=secret_override,
                    password_override=password_override,
                )

            # If tool needs human input/review - return immediately.
            if result.get("status") in (
                "awaiting_permission",
                "awaiting_input",
                "awaiting_password",
                "awaiting_review",
            ):
                return {
                    "status": result.get("status"),
                    "result": result.get("result", {}),
                    "step_results": step_results,
                }

            step_results.append({
                "step_id": step.id,
                "result": result,
            })

            completed_steps.add(step.id)
            self._publish(task, STEPPED_COMPLETED, {
                "step_id": step.id,
                "status": result.get("status"),
            })

            # === Critic evaluation ===
            if self._critic and result.get("status") == "completed":
                critic_score = self._evaluate_with_critic(task, step, result)
                if critic_score:
                    result["critic_score"] = {
                        "correctness": critic_score.correctness,
                        "usefulness": critic_score.usefulness,
                        "safety": critic_score.safety,
                        "memory_store": critic_score.memory_store,
                        "weight": critic_score.weight,
                        "explanation": critic_score.explanation,
                    }
                    self._save_critique_to_memory(task, step, critic_score)

                    # Check if step result is satisfactory
                    min_correctness = 0.5
                    if critic_score.correctness < min_correctness:
                        # Step failed critic check — try to recover
                        if critic_retries_used < self._critic_retry_limit and step.kind != "respond":
                            critic_retries_used += 1
                            self._publish(task, CRITIC_RESULT, {
                                "step_id": step.id,
                                "score": critic_score.model_dump(mode="json"),
                                "action": "retry",
                                "retry": critic_retries_used,
                            })
                            # Retry the same step — rebuild directive
                            retry_directive = ExecutionDirective(
                                type=step.kind,
                                payload={"tool": step.tool, "args": step.args},
                                requires_permission=step.requires_confirmation,
                                reason=step.description,
                            )
                            retry_result = self._execute_tool(
                                task=task,
                                directive=retry_directive,
                                permission_override=permission_override,
                                secret_override=secret_override,
                                password_override=password_override,
                            )
                            if retry_result.get("status") == "completed":
                                result = retry_result
                                step_results[-1]["result"] = result
                                # Re-evaluate after retry
                                critic_score2 = self._evaluate_with_critic(task, step, result)
                                if critic_score2 and critic_score2.correctness >= min_correctness:
                                    # Retry succeeded
                                    continue
                            # If retry also failed, continue to next step
                        else:
                            self._publish(task, CRITIC_RESULT, {
                                "step_id": step.id,
                                "score": critic_score.model_dump(mode="json"),
                                "action": "give_up",
                                "reason": f"Critic retry limit ({self._critic_retry_limit}) reached",
                            })

            # Handle failed step
            if result.get("status") == "failed":
                review = self._build_failed_step_review(task, step, result)
                if review:
                    return {
                        "status": "awaiting_review",
                        "result": {
                            "error": f"Step {step.id} requires review before replanning",
                            "failed_step": step.id,
                            "step_results": step_results,
                            "review": review,
                        },
                    }
                recovery = self._recover_failed_step(
                    task=task,
                    step=step,
                    result=result,
                    step_results=step_results,
                    permission_override=permission_override,
                    secret_override=secret_override,
                    password_override=password_override,
                )
                if recovery.get("status") == "awaiting_permission":
                    return recovery
                if recovery.get("status") == "completed":
                    recovered_result = recovery.get("result")
                    if recovered_result:
                        step_results[-1]["result"] = recovered_result
                    if recovery.get("finish"):
                        return {
                            "status": "completed",
                            "result": {
                                "message": recovery.get("message", "Recovered from failed step"),
                                "step_results": step_results,
                            },
                        }
                else:
                    return {
                        "status": "failed",
                        "result": {
                            "error": f"Step {step.id} failed",
                            "failed_step": step.id,
                            "step_results": step_results,
                            "recovery": recovery.get("result"),
                        },
                    }

            ready_steps = self._get_ready_steps(graph, completed_steps)

        return {
            "status": "completed",
            "result": {
                "message": f"Plan executed: {len(completed_steps)} steps completed",
                "step_results": step_results,
            },
        }

    def _build_failed_step_review(self, task: UserTask, step, result: dict[str, Any]) -> dict[str, Any] | None:
        if step.tool != "shell_exec" or not self._command_analyzer:
            return None
        command = str((step.args or {}).get("command", ""))
        if not command:
            return None
        diagnosis = self._command_analyzer.analyze(
            command=command,
            task_id=task.task_id,
            session_id=task.session_id,
        )
        if diagnosis.get("type") == "ok":
            return None
        return {
            "step_id": step.id,
            "tool": step.tool,
            "command": command,
            "diagnosis": diagnosis,
            "critic_assessment": {
                "classification": "model_planning_error",
                "needs_replan": True,
                "explanation": "Structured command analysis found a model action error before recovery.",
            },
        }

    def _recover_failed_step(
        self,
        task: UserTask,
        step,
        result: dict[str, Any],
        step_results: list[dict[str, Any]],
        permission_override: PermissionDecision | None = None,
        secret_override: str | None = None,
        password_override: str | None = None,
    ) -> dict[str, Any]:
        if self._recovery_limit <= 0 or not self._critic:
            return {"status": "failed", "result": {"reason": "recovery_unavailable"}}

        decision = self._evaluate_recovery(task, step, result, step_results)
        action = decision.get("action", "fail")

        if action == "continue":
            recovered = dict(result)
            recovered["status"] = "completed"
            recovered["recovery_decision"] = decision
            return {"status": "completed", "result": recovered}

        if action == "respond":
            recovered = dict(result)
            recovered["status"] = "completed"
            recovered["recovery_decision"] = decision
            return {
                "status": "completed",
                "result": recovered,
                "finish": True,
                "message": decision.get("message") or decision.get("reason") or "Recovered by responding to user",
            }

        if action == "retry":
            retry_tool = decision.get("tool") or step.tool
            retry_args = decision.get("args") or step.args
            retry_result = self._execute_tool(
                task=task,
                directive=ExecutionDirective(
                    type="tool",
                    payload={"tool": retry_tool, "args": retry_args},
                    requires_permission=True,
                    reason=decision.get("reason", "Recovery retry"),
                ),
                permission_override=permission_override,
                secret_override=secret_override,
                password_override=password_override,
            )
            if retry_result.get("status") == "awaiting_permission":
                return retry_result
            retry_result["recovery_decision"] = decision
            if retry_result.get("status") == "completed":
                return {"status": "completed", "result": retry_result}
            return {"status": "failed", "result": {"decision": decision, "retry_result": retry_result}}

        return {"status": "failed", "result": decision}

    def _evaluate_recovery(
        self,
        task: UserTask,
        step,
        result: dict[str, Any],
        step_results: list[dict[str, Any]],
    ) -> dict[str, Any]:
        prompt = self._build_recovery_prompt(task, step, result, step_results)
        self._publish(task, CRITIC_CALLED, {"step_id": step.id, "mode": "recovery"})

        try:
            output = asyncio.run(self._critic.generate(prompt, max_tokens=512))
            decision = self._parse_recovery_decision(output)
            self._publish(task, CRITIC_RESULT, {
                "step_id": step.id,
                "mode": "recovery",
                "decision": decision,
                "raw": output,
            })
            return decision
        except Exception as e:
            logger.warning(f"Recovery evaluation failed: {e}")
            self._publish(task, CRITIC_RESULT, {
                "step_id": step.id,
                "mode": "recovery",
                "error": str(e),
            })
            return {"action": "fail", "reason": str(e)}

    def _build_recovery_prompt(
        self,
        task: UserTask,
        step,
        result: dict[str, Any],
        step_results: list[dict[str, Any]],
    ) -> str:
        return f"""You are a recovery controller for an agent runtime.

Decide what to do after a failed tool step. A non-zero exit code is not always fatal.
Interpret the failure in context.

Allowed actions:
- continue: failure is acceptable information; continue the plan.
- retry: try one alternative tool call. Include "tool" and "args".
- respond: stop and answer the user with available information. Include "message".
- fail: real failure; stop the task.

Return ONLY JSON:
{{"action":"continue|retry|respond|fail","reason":"...","tool":"shell_exec","args":{{...}},"message":"..."}}

Task:
{task.input}

Failed step:
id={step.id}
tool={step.tool}
args={json.dumps(step.args, ensure_ascii=False)}
description={step.description}

Failed result:
{json.dumps(result, ensure_ascii=False, indent=2)}

Previous step results:
{json.dumps(step_results, ensure_ascii=False, indent=2)}
"""

    def _parse_recovery_decision(self, output: str) -> dict[str, Any]:
        try:
            json_start = output.find("{")
            json_end = output.rfind("}") + 1
            if json_start < 0 or json_end <= 0:
                return {"action": "fail", "reason": "Recovery output was not JSON"}
            data = json.loads(output[json_start:json_end])
            action = data.get("action", "fail")
            if action not in {"continue", "retry", "respond", "fail"}:
                action = "fail"
            data["action"] = action
            return data
        except (json.JSONDecodeError, TypeError, ValueError) as e:
            return {"action": "fail", "reason": f"Recovery JSON parse failed: {e}"}

    def _get_ready_steps(
        self,
        graph: dict[str, Any],
        completed: set[str],
    ) -> list:
        if not graph or not graph.get("nodes"):
            return []

        step_map: dict = graph.get("step_map", {})
        ready = []

        for node in graph["nodes"]:
            node_id = node["id"]
            if node_id in completed:
                continue

            deps = node.get("depends_on", [])
            if all(dep in completed for dep in deps):
                step = step_map.get(node_id)
                if step:
                    ready.append(step)

        return ready

    def _evaluate_with_critic(
        self,
        task: UserTask,
        step,
        result: dict[str, Any],
    ) -> CriticScore | None:
        if not self._critic:
            return None

        critic_prompt = self._build_critic_prompt(step, result)

        self._publish(task, CRITIC_CALLED, {"step_id": step.id})

        try:
            critic_output = asyncio.run(self._critic.generate(critic_prompt))
            score = self._parse_critic_score(critic_output)

            self._publish(task, CRITIC_RESULT, {
                "step_id": step.id,
                "score": score.model_dump(mode="json") if score else None,
            })

            if score:
                result["critic_score"] = {
                    "correctness": score.correctness,
                    "usefulness": score.usefulness,
                    "safety": score.safety,
                    "memory_store": score.memory_store,
                    "weight": score.weight,
                    "explanation": score.explanation,
                }

            return score

        except Exception as e:
            logger.warning(f"Critic evaluation failed: {e}")
            self._publish(task, CRITIC_RESULT, {
                "step_id": step.id,
                "error": str(e),
            })
            return None

    def _save_critique_to_memory(
        self,
        task: UserTask,
        step,
        score: CriticScore,
    ) -> None:
        """Save critic evaluation as critique entry in memory, using MemoryWritePolicy."""
        if not self._memory_interface:
            return

        try:
            # Check with policy before saving
            if self._memory_policy:
                decision = self._memory_policy.decide(
                    critic_score=score,
                    memory_type="critique",
                    session_id=task.session_id,
                )
                if decision == "skip":
                    logger.info(f"MemoryWritePolicy skipped critique for {step.tool}")
                    return
                # For "store_with_weight", we could adjust weight, but critic score already has weight

            tool_name = step.tool
            tool_args = step.args or {}
            args_str = ", ".join([f"{k}={v}" for k, v in tool_args.items()])

            critique_text = f"Tool: {tool_name}({args_str}) | Task: {task.input[:100]} | Scores: correctness={score.correctness}, usefulness={score.usefulness}, safety={score.safety} | {score.explanation}"

            metadata = {
                "task_input": task.input,
                "tool": tool_name,
                "args": tool_args,
                "step_id": step.id,
                "scores": {
                    "correctness": score.correctness,
                    "usefulness": score.usefulness,
                    "safety": score.safety,
                },
            }

            self._memory_interface.insert(
                text=critique_text,
                kind="critique",
                source="critic",
                task_id=task.task_id,
                session_id=task.session_id,
                weight=score.weight,
                metadata=metadata,
            )
            logger.info(f"Saved critique to memory: {tool_name} task_id={task.task_id}")

        except Exception as e:
            logger.warning(f"Failed to save critique to memory: {e}")

    def _build_critic_prompt(self, step, result: dict[str, Any]) -> str:
        base_prompt = self._prompts.get("critic", "")
        tool_result = result.get("result", {})

        # Truncate long outputs to avoid exceeding context window
        # Keep output under ~2000 chars to leave room for prompt + generation
        output = tool_result.get("output", "")
        if isinstance(output, str) and len(output) > 2000:
            output = output[:2000] + "\n... [truncated]"
        elif not isinstance(output, str):
            output_str = json.dumps(output, ensure_ascii=False)
            if len(output_str) > 2000:
                output = output_str[:2000] + "\n... [truncated]"
            else:
                output = output_str

        # Build a compact result representation
        compact_result = {
            "ok": tool_result.get("ok"),
            "output": output,
            "error": tool_result.get("error"),
            "exit_code": tool_result.get("metadata", {}).get("exit_code"),
        }

        return f"""{base_prompt}

Step: {step.description}
Tool: {step.tool}
Args: {step.args}

Result:
{json.dumps(compact_result, indent=2, ensure_ascii=False)}

Evaluate and respond with JSON:
{{"correctness": 0.0-1.0, "usefulness": 0.0-1.0, "safety": 0.0-1.0, "memory_store": true|false, "weight": 0.0-1.0, "explanation": "..."}}"""

    def _parse_critic_score(self, output: str) -> CriticScore | None:
        try:
            json_start = output.find("{")
            json_end = output.rfind("}") + 1
            if json_start < 0:
                return None

            json_str = output[json_start:json_end]
            data = json.loads(json_str)

            return CriticScore(
                correctness=data.get("correctness", 0.5),
                usefulness=data.get("usefulness", 0.5),
                safety=data.get("safety", 1.0),
                memory_store=data.get("memory_store", False),
                weight=data.get("weight", 0.5),
                explanation=data.get("explanation", ""),
            )

        except (json.JSONDecodeError, ValueError, TypeError) as e:
            logger.warning(f"Critic score parsing failed: {e}")
            return None

    def _execute_coder(
        self,
        task: UserTask,
        directive: ExecutionDirective,
    ) -> dict[str, Any]:
        if not self._coder:
            return {"status": "failed", "result": {"error": "Coder model not available"}}

        coder_task = directive.payload.get("task", "")
        if not coder_task:
            return {"status": "failed", "result": {"error": "Missing task for coder"}}

        try:
            output = asyncio.run(self._coder.generate(coder_task))

            return {
                "status": "completed",
                "result": {"code": output},
            }
        except Exception as e:
            logger.warning(f"Coder execution failed: {e}")
            return {"status": "failed", "result": {"error": str(e)}}

    def _execute_tool(
        self,
        task: UserTask,
        directive: ExecutionDirective,
        permission_override: PermissionDecision | None = None,
        secret_override: str | None = None,
        password_override: str | None = None,
    ) -> dict[str, Any]:
        tool_name = str(directive.payload.get("tool", "")).strip()
        tool_args = dict(directive.payload.get("args", {}))

        if password_override:
            tool_args["password"] = password_override

        if not tool_name:
            return {"status": "failed", "result": {"error": "Missing tool name"}}

        # Tool-first: validate tool exists in registry
        available_tools = self._tool_registry.list_names()
        if tool_name not in available_tools:
            return {"status": "failed", "result": {"error": f"Unknown tool: {tool_name}. Available tools: {available_tools}"}}

        permission_result = None

        # If permission_override is provided, skip permission check
        if permission_override is not None:
            permission_result = {
                "decision": permission_override.decision,
                "command": tool_args.get("command", ""),
                "cached": True,
            }
        # Check permission for shell_exec and file_write
        elif tool_name == "shell_exec":
            permission_result = self._permission_service.check_shell_command(
                task_id=task.task_id,
                session_id=task.session_id,
                command=str(tool_args.get("command", "")),
            )
        elif tool_name == "file_write":
            # Allow writing to runtime data directory without permission check
            write_path = str(tool_args.get("path", ""))
            if "allowed_commands.json" in write_path or "/data/runtime" in write_path:
                # Internal system write - allow without permission
                permission_result = {"decision": "allowed", "path": write_path}
            else:
                permission_result = self._permission_service.check_write_path(
                    task_id=task.task_id,
                    session_id=task.session_id,
                    path=write_path,
                )

        # Handle permission result
        if permission_result:
            decision = permission_result.get("decision", "unknown")

            # Hard stop - deny execution
            if decision == "hard_stop":
                self._publish(task, PERMISSION_REQUESTED, permission_result)
                return {
                    "status": "failed",
                    "result": {
                        "error": f"Command blocked: {permission_result.get('reason', 'Hard stop command')}",
                        "command": permission_result.get("command", ""),
                    },
                }

            # Cached - already allowed
            if decision in ("allowed_always", "allowed") or permission_result.get("cached"):
                self._publish(task, PERMISSION_RESOLVED, permission_result)

            # Need user confirmation - return immediately, don't continue execution
            elif decision == "prompt":
                self._publish(task, PERMISSION_REQUESTED, permission_result)
                return {
                    "status": "awaiting_permission",
                    "result": {
                        "error": "Permission required before execution.",
                        "permission_request": permission_result,
                    },
                }

            # Hard stop - return immediately
            elif decision == "deny":
                self._publish(task, PERMISSION_RESOLVED, permission_result)
                return {
                    "status": "failed",
                    "result": {
                        "error": "Permission denied",
                        "command": permission_result.get("command", ""),
                    },
                }

            # Deny
            elif decision == "deny":
                self._publish(task, PERMISSION_RESOLVED, permission_result)
                return {
                    "status": "failed",
                    "result": {
                        "error": "Permission denied",
                        "command": permission_result.get("command", ""),
                    },
                }

        if tool_name == "shell_exec":
            command = str(tool_args.get("command", ""))

            # Determine if sudo password is needed:
            # 1. Command explicitly starts with "sudo"
            # 2. Command is a known sudo-requiring command (apt, systemctl, etc.) — flagged by permission service
            needs_password = command.startswith("sudo ") or (permission_result is not None and permission_result.get("requires_sudo", False))

            if needs_password and secret_override is None:
                secret_request = SecretRequest(
                    task_id=task.task_id,
                    session_id=task.session_id,
                    kind="sudo_password",
                    prompt="Sudo password required",
                    command=command,
                )
                self._publish(task, SECRET_REQUESTED, secret_request.model_dump(mode="json"))
                return {
                    "status": "awaiting_input",
                    "result": {
                        "error": "Secret required",
                        "secret_request": secret_request.model_dump(mode="json"),
                    },
                }
            if needs_password and secret_override is not None:
                # Inject sudo -S for explicit sudo commands, or prepend sudo -S for implicit ones
                if command.startswith("sudo "):
                    tool_args["command"] = f"sudo -S -p '' {command[len('sudo '):]}"
                else:
                    tool_args["command"] = f"sudo -S -p '' {command}"
                tool_args["stdin_secret"] = f"{secret_override}\n"

        tool_call = ToolCall(
            tool=tool_name,
            args=tool_args,
            task_id=task.task_id,
            step_id="step-1",
        )
        self._publish(task, TOOL_CALLED, tool_call.model_dump(mode="json"))
        if tool_name == "shell_exec":
            tool_args["__output_callback"] = lambda stream, chunk: self._publish(
                task,
                TOOL_OUTPUT_CHUNK,
                {
                    "tool": tool_name,
                    "step_id": "step-1",
                    "stream": stream,
                    "chunk": chunk,
                },
            )
        tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args)
        self._publish(task, TOOL_COMPLETED, tool_result.model_dump(mode="json"))

        metadata = tool_result.metadata or {}
        needs_sudo = metadata.get("needs_sudo", False)
        sudo_auth_failed = metadata.get("sudo_auth_failed", False) or self._looks_like_sudo_auth_failure(tool_result)

        if tool_name == "shell_exec" and not tool_result.ok and sudo_auth_failed:
            original_command = str(directive.payload.get("args", {}).get("command", tool_args.get("command", "")))
            secret_request = SecretRequest(
                task_id=task.task_id,
                session_id=task.session_id,
                kind="sudo_password",
                prompt="Sudo password incorrect. Try again",
                command=original_command,
            )
            self._publish(task, SECRET_REQUESTED, secret_request.model_dump(mode="json"))
            return {
                "status": "awaiting_input",
                "result": {
                    "error": "Sudo password failed",
                    "secret_request": secret_request.model_dump(mode="json"),
                    "attempt_failed": True,
                    "tool_result": tool_result.model_dump(mode="json"),
                },
            }

        if not tool_result.ok and needs_sudo:
            return {
                "status": "awaiting_password",
                "result": {
                    "task_id": task.task_id,
                    "needs_sudo": True,
                    "command": tool_args.get("command", ""),
                    "error": tool_result.error or "Permission denied",
                    "tool_result": tool_result.model_dump(mode="json"),
                },
            }

        if tool_name == "shell_exec" and not tool_result.ok and self._command_analyzer:
            original_command = str(directive.payload.get("args", {}).get("command", tool_args.get("command", "")))
            diagnosis = self._command_analyzer.analyze(
                command=original_command,
                task_id=task.task_id,
                session_id=task.session_id,
            )
            if diagnosis.get("type") != "ok":
                return {
                    "status": "awaiting_review",
                    "result": {
                        "error": "Tool action requires review before replanning",
                        "review": {
                            "step_id": "step-1",
                            "tool": tool_name,
                            "command": original_command,
                            "diagnosis": diagnosis,
                            "critic_assessment": {
                                "classification": "model_planning_error",
                                "needs_replan": True,
                                "explanation": "Structured command analysis found a model action error before recovery.",
                            },
                        },
                        "tool_result": tool_result.model_dump(mode="json"),
                    },
                }

        return {
            "status": "completed" if tool_result.ok else "failed",
            "result": tool_result.model_dump(mode="json"),
        }

    def _looks_like_sudo_auth_failure(self, tool_result: ToolResult) -> bool:
        output = f"{tool_result.output or ''}\n{tool_result.error or ''}".lower()
        return any(
            marker in output
            for marker in (
                "incorrect password",
                "incorrect password attempt",
                "sudo: no password was provided",
                "sorry, try again",
                "authentication failure",
            )
        )

    def _publish(self, task: UserTask, event_type: str, payload: dict[str, Any]) -> None:
        if not self._event_bus:
            return
        event = RuntimeEvent(
            task_id=task.task_id,
            session_id=task.session_id,
            sequence=self._event_bus.next_sequence(task.task_id),
            type=event_type,
            payload=payload,
        )
        self._event_bus.publish(event)