Honor tool plans despite weak classification

2026-05-11 00:14:49 +08:00 · 2026-05-11 00:14:49 +08:00 · 28a2f63713
parent 7215033028
commit 28a2f63713
2 changed files with 55 additions and 18 deletions
--- a/app/core/async_router.py
+++ b/app/core/async_router.py
@ -169,24 +169,14 @@ class AsyncRouter:
                session_id,
            )
-            # If mode_hint is conversation, only allow respond type
+            if mode_hint == "conversation" and self._looks_like_tool_plan(thinker_result):
-            if mode_hint == "conversation" and not self._is_simple_response(thinker_result):
+                mode_hint = "execution"
-                # Check if Thinker is trying to create an execution plan instead
+                self._emit_event(
-                if any(word in thinker_result.lower() for word in ["шаг", "step", "выполнить", "execute", "shell", "команда"]):
+                    ORCHESTRATOR_FALLBACK_USED,
-                    # Override to conversation-only response
+                    {"reason": "thinker_proposed_tool_plan_despite_conversation_hint"},
-                    respond_text = self._extract_conversation_response(thinker_result)
+                    task_id,
-                    self._emit_event(
+                    session_id,
-                        ORCHESTRATOR_RESULT,
+                )
                        {"directive": {"type": "respond", "payload": {"text": respond_text}}, "mode_violation": True},
                        task_id,
                        session_id,
                    )
                    return ExecutionDirective(
                        type="respond",
                        payload={"text": respond_text},
                        requires_permission=False,
                        reason="Mode violation: conversation only",
                    )
            if self._is_simple_response(thinker_result):
                json_compiler_prompt = self._build_json_compiler_prompt(thinker_result)
@ -295,6 +285,15 @@ class AsyncRouter:
        sentences = thinker_result.split('.')[:3]
        return '. '.join(sentences).strip()
    def _looks_like_tool_plan(self, thinker_result: str) -> bool:
        result = thinker_result.lower()
        tool_names = set()
        if self._tool_registry:
            tool_names = set(self._tool_registry.list_names())
        tool_markers = {"shell_exec", "file_read", "file_write", "memory", *tool_names}
        plan_markers = ("план:", "шаг", "step", "tool", "инструмент")
        return any(marker in result for marker in tool_markers) and any(marker in result for marker in plan_markers)
    def _build_thinker_prompt(
        self, task_summary: str, context: dict[str, Any], mode_hint: str
    ) -> str:
@ -393,6 +392,14 @@ class AsyncRouter:
            return result
        result = re.sub(r"<think>.*?</think>", " ", result, flags=re.DOTALL)
        if (
            "shell_exec" in result
            or "execute command" in result
            or "command execution" in result
            or "use the tool" in result
            or "use a tool" in result
        ):
            return "execution"
        tokens = re.findall(r"\b(execution|conversation|clarification_needed)\b", result)
        if tokens:
            return tokens[-1]
--- a/tests/test_contracts.py
+++ b/tests/test_contracts.py
@ -1,6 +1,17 @@
 import asyncio
 from app.core.async_router import AsyncRouter
 from app.core.contracts import CriticScore, ExecutionDirective, PlanStep, UserTask
 class _FakeAdapter:
    def __init__(self, responses: list[str]) -> None:
        self._responses = responses
    async def generate(self, prompt: str, max_tokens: int | None = None) -> str:
        return self._responses.pop(0)
 def test_user_task_defaults() -> None:
    task = UserTask(input="hello")
    assert task.task_id
@ -35,3 +46,22 @@ def test_execution_directive_defaults() -> None:
    assert directive.payload == {}
    assert directive.confidence == 0.0
 def test_router_compiles_tool_plan_even_when_classifier_says_conversation() -> None:
    router = AsyncRouter(
        thinker=_FakeAdapter([
            "conversation",
            "ПЛАН:\nШаг 1: [shell_exec] выполнить `uptime`",
        ]),
        json_compiler=_FakeAdapter([
            '{"type":"plan","payload":{"steps":[{"id":"1","tool":"shell_exec","args":{"command":"uptime"},"depends_on":[]}]}}'
        ]),
    )
    directive = asyncio.run(
        router.decide(
            state={},
            context={"task_summary": "Проверь аптайм ПК", "task_context": {}},
        )
    )
    assert directive.type == "plan"
    assert directive.payload["steps"][0]["tool"] == "shell_exec"