Honor tool plans despite weak classification
This commit is contained in:
parent
7215033028
commit
28a2f63713
|
|
@ -169,24 +169,14 @@ class AsyncRouter:
|
||||||
session_id,
|
session_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
# If mode_hint is conversation, only allow respond type
|
if mode_hint == "conversation" and self._looks_like_tool_plan(thinker_result):
|
||||||
if mode_hint == "conversation" and not self._is_simple_response(thinker_result):
|
mode_hint = "execution"
|
||||||
# Check if Thinker is trying to create an execution plan instead
|
self._emit_event(
|
||||||
if any(word in thinker_result.lower() for word in ["шаг", "step", "выполнить", "execute", "shell", "команда"]):
|
ORCHESTRATOR_FALLBACK_USED,
|
||||||
# Override to conversation-only response
|
{"reason": "thinker_proposed_tool_plan_despite_conversation_hint"},
|
||||||
respond_text = self._extract_conversation_response(thinker_result)
|
task_id,
|
||||||
self._emit_event(
|
session_id,
|
||||||
ORCHESTRATOR_RESULT,
|
)
|
||||||
{"directive": {"type": "respond", "payload": {"text": respond_text}}, "mode_violation": True},
|
|
||||||
task_id,
|
|
||||||
session_id,
|
|
||||||
)
|
|
||||||
return ExecutionDirective(
|
|
||||||
type="respond",
|
|
||||||
payload={"text": respond_text},
|
|
||||||
requires_permission=False,
|
|
||||||
reason="Mode violation: conversation only",
|
|
||||||
)
|
|
||||||
|
|
||||||
if self._is_simple_response(thinker_result):
|
if self._is_simple_response(thinker_result):
|
||||||
json_compiler_prompt = self._build_json_compiler_prompt(thinker_result)
|
json_compiler_prompt = self._build_json_compiler_prompt(thinker_result)
|
||||||
|
|
@ -295,6 +285,15 @@ class AsyncRouter:
|
||||||
sentences = thinker_result.split('.')[:3]
|
sentences = thinker_result.split('.')[:3]
|
||||||
return '. '.join(sentences).strip()
|
return '. '.join(sentences).strip()
|
||||||
|
|
||||||
|
def _looks_like_tool_plan(self, thinker_result: str) -> bool:
|
||||||
|
result = thinker_result.lower()
|
||||||
|
tool_names = set()
|
||||||
|
if self._tool_registry:
|
||||||
|
tool_names = set(self._tool_registry.list_names())
|
||||||
|
tool_markers = {"shell_exec", "file_read", "file_write", "memory", *tool_names}
|
||||||
|
plan_markers = ("план:", "шаг", "step", "tool", "инструмент")
|
||||||
|
return any(marker in result for marker in tool_markers) and any(marker in result for marker in plan_markers)
|
||||||
|
|
||||||
def _build_thinker_prompt(
|
def _build_thinker_prompt(
|
||||||
self, task_summary: str, context: dict[str, Any], mode_hint: str
|
self, task_summary: str, context: dict[str, Any], mode_hint: str
|
||||||
) -> str:
|
) -> str:
|
||||||
|
|
@ -393,6 +392,14 @@ class AsyncRouter:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
result = re.sub(r"<think>.*?</think>", " ", result, flags=re.DOTALL)
|
result = re.sub(r"<think>.*?</think>", " ", result, flags=re.DOTALL)
|
||||||
|
if (
|
||||||
|
"shell_exec" in result
|
||||||
|
or "execute command" in result
|
||||||
|
or "command execution" in result
|
||||||
|
or "use the tool" in result
|
||||||
|
or "use a tool" in result
|
||||||
|
):
|
||||||
|
return "execution"
|
||||||
tokens = re.findall(r"\b(execution|conversation|clarification_needed)\b", result)
|
tokens = re.findall(r"\b(execution|conversation|clarification_needed)\b", result)
|
||||||
if tokens:
|
if tokens:
|
||||||
return tokens[-1]
|
return tokens[-1]
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,17 @@
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
from app.core.async_router import AsyncRouter
|
||||||
from app.core.contracts import CriticScore, ExecutionDirective, PlanStep, UserTask
|
from app.core.contracts import CriticScore, ExecutionDirective, PlanStep, UserTask
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeAdapter:
|
||||||
|
def __init__(self, responses: list[str]) -> None:
|
||||||
|
self._responses = responses
|
||||||
|
|
||||||
|
async def generate(self, prompt: str, max_tokens: int | None = None) -> str:
|
||||||
|
return self._responses.pop(0)
|
||||||
|
|
||||||
|
|
||||||
def test_user_task_defaults() -> None:
|
def test_user_task_defaults() -> None:
|
||||||
task = UserTask(input="hello")
|
task = UserTask(input="hello")
|
||||||
assert task.task_id
|
assert task.task_id
|
||||||
|
|
@ -35,3 +46,22 @@ def test_execution_directive_defaults() -> None:
|
||||||
assert directive.payload == {}
|
assert directive.payload == {}
|
||||||
assert directive.confidence == 0.0
|
assert directive.confidence == 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_router_compiles_tool_plan_even_when_classifier_says_conversation() -> None:
|
||||||
|
router = AsyncRouter(
|
||||||
|
thinker=_FakeAdapter([
|
||||||
|
"conversation",
|
||||||
|
"ПЛАН:\nШаг 1: [shell_exec] выполнить `uptime`",
|
||||||
|
]),
|
||||||
|
json_compiler=_FakeAdapter([
|
||||||
|
'{"type":"plan","payload":{"steps":[{"id":"1","tool":"shell_exec","args":{"command":"uptime"},"depends_on":[]}]}}'
|
||||||
|
]),
|
||||||
|
)
|
||||||
|
directive = asyncio.run(
|
||||||
|
router.decide(
|
||||||
|
state={},
|
||||||
|
context={"task_summary": "Проверь аптайм ПК", "task_context": {}},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert directive.type == "plan"
|
||||||
|
assert directive.payload["steps"][0]["tool"] == "shell_exec"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue