Honor tool plans despite weak classification

2026-05-11 00:14:49 +08:00 · 2026-05-11 00:14:49 +08:00 · 28a2f63713
parent 7215033028
commit 28a2f63713
2 changed files with 55 additions and 18 deletions
--- a/app/core/async_router.py
+++ b/app/core/async_router.py
@ -169,24 +169,14 @@ class AsyncRouter:
                session_id,
            )

-            # If mode_hint is conversation, only allow respond type
-            if mode_hint == "conversation" and not self._is_simple_response(thinker_result):
-                # Check if Thinker is trying to create an execution plan instead
-                if any(word in thinker_result.lower() for word in ["шаг", "step", "выполнить", "execute", "shell", "команда"]):
-                    # Override to conversation-only response
-                    respond_text = self._extract_conversation_response(thinker_result)
+            if mode_hint == "conversation" and self._looks_like_tool_plan(thinker_result):
+                mode_hint = "execution"
                self._emit_event(
-                        ORCHESTRATOR_RESULT,
-                        {"directive": {"type": "respond", "payload": {"text": respond_text}}, "mode_violation": True},
+                    ORCHESTRATOR_FALLBACK_USED,
+                    {"reason": "thinker_proposed_tool_plan_despite_conversation_hint"},
                    task_id,
                    session_id,
                )
-                    return ExecutionDirective(
-                        type="respond",
-                        payload={"text": respond_text},
-                        requires_permission=False,
-                        reason="Mode violation: conversation only",
-                    )

            if self._is_simple_response(thinker_result):
                json_compiler_prompt = self._build_json_compiler_prompt(thinker_result)
@ -295,6 +285,15 @@ class AsyncRouter:
        sentences = thinker_result.split('.')[:3]
        return '. '.join(sentences).strip()

+    def _looks_like_tool_plan(self, thinker_result: str) -> bool:
+        result = thinker_result.lower()
+        tool_names = set()
+        if self._tool_registry:
+            tool_names = set(self._tool_registry.list_names())
+        tool_markers = {"shell_exec", "file_read", "file_write", "memory", *tool_names}
+        plan_markers = ("план:", "шаг", "step", "tool", "инструмент")
+        return any(marker in result for marker in tool_markers) and any(marker in result for marker in plan_markers)
+
    def _build_thinker_prompt(
        self, task_summary: str, context: dict[str, Any], mode_hint: str
    ) -> str:
@ -393,6 +392,14 @@ class AsyncRouter:
            return result

        result = re.sub(r"<think>.*?</think>", " ", result, flags=re.DOTALL)
+        if (
+            "shell_exec" in result
+            or "execute command" in result
+            or "command execution" in result
+            or "use the tool" in result
+            or "use a tool" in result
+        ):
+            return "execution"
        tokens = re.findall(r"\b(execution|conversation|clarification_needed)\b", result)
        if tokens:
            return tokens[-1]
--- a/tests/test_contracts.py
+++ b/tests/test_contracts.py
@ -1,6 +1,17 @@
+import asyncio
+
+from app.core.async_router import AsyncRouter
 from app.core.contracts import CriticScore, ExecutionDirective, PlanStep, UserTask


+class _FakeAdapter:
+    def __init__(self, responses: list[str]) -> None:
+        self._responses = responses
+
+    async def generate(self, prompt: str, max_tokens: int | None = None) -> str:
+        return self._responses.pop(0)
+
+
 def test_user_task_defaults() -> None:
    task = UserTask(input="hello")
    assert task.task_id
@ -35,3 +46,22 @@ def test_execution_directive_defaults() -> None:
    assert directive.payload == {}
    assert directive.confidence == 0.0

+
+def test_router_compiles_tool_plan_even_when_classifier_says_conversation() -> None:
+    router = AsyncRouter(
+        thinker=_FakeAdapter([
+            "conversation",
+            "ПЛАН:\nШаг 1: [shell_exec] выполнить `uptime`",
+        ]),
+        json_compiler=_FakeAdapter([
+            '{"type":"plan","payload":{"steps":[{"id":"1","tool":"shell_exec","args":{"command":"uptime"},"depends_on":[]}]}}'
+        ]),
+    )
+    directive = asyncio.run(
+        router.decide(
+            state={},
+            context={"task_summary": "Проверь аптайм ПК", "task_context": {}},
+        )
+    )
+    assert directive.type == "plan"
+    assert directive.payload["steps"][0]["tool"] == "shell_exec"