ducklm/tests/smoke/test_context_builder.py

224 lines
7.4 KiB
Python

import pytest
from unittest.mock import AsyncMock
from duck_core.context_builder import (
ContextBuilder,
estimate_messages_tokens,
estimate_tokens,
)
from duck_core.model_client import ModelResponse
from duck_core.tasks.state import TaskState
def _make_task(message: str = "test") -> TaskState:
return TaskState(
task_id="task_1",
status="running",
user_message=message,
workspace="/tmp/test",
debug=False,
created_at="now",
updated_at="now",
)
def test_estimate_tokens_approximate():
assert estimate_tokens("hello world") == 2 # 11 chars / 4 = 2
assert estimate_tokens("") == 1 # minimum 1
assert estimate_tokens("a" * 400) == 100
def test_estimate_messages_tokens():
messages = [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "world"},
]
# Each message: content tokens + 4 overhead
tokens = estimate_messages_tokens(messages)
assert tokens > 0
# "hello" = 5 chars / 4 = 1 token + 4 overhead = 5
# "world" = 5 chars / 4 = 1 token + 4 overhead = 5
assert tokens == 10
def test_context_builder_basic_messages():
builder = ContextBuilder()
task = _make_task("What is DuckLM?")
messages = builder.build_basic_messages(task)
assert len(messages) == 1
assert messages[0]["role"] == "user"
assert messages[0]["content"] == "What is DuckLM?"
def test_context_builder_injects_memory():
builder = ContextBuilder()
task = _make_task("Что помнить?")
messages = builder.build_basic_messages(
task,
memory_records=[
{"scope": "global", "text": "Use Russian."},
{"scope": "workspace", "text": "DuckLM uses Vulkan."},
],
)
assert messages[0]["role"] == "system"
assert "Relevant memory" in messages[0]["content"]
assert "global: Use Russian." in messages[0]["content"]
assert messages[-1]["content"] == "Что помнить?"
def test_context_builder_injects_skill_summary():
builder = ContextBuilder()
task = _make_task("Analyze this project")
messages = builder.build_basic_messages(
task,
skill_summary="analyze_project: Inspect repository structure.",
)
assert any("Active skill" in m.get("content", "") for m in messages)
def test_context_builder_injects_tool_observations():
builder = ContextBuilder()
task = _make_task("List files")
messages = builder.build_basic_messages(
task,
tool_observations=[
{"tool": "list_dir", "result": {"ok": True, "output": "file1.txt\nfile2.txt"}},
],
)
obs_msg = [m for m in messages if "Tool observations" in m.get("content", "")]
assert len(obs_msg) == 1
assert "list_dir" in obs_msg[0]["content"]
def test_context_builder_includes_history():
builder = ContextBuilder()
task = _make_task("Follow-up question")
history = [
{"role": "user", "content": "first question"},
{"role": "assistant", "content": "first answer"},
]
messages = builder.build_basic_messages(task, history_messages=history)
contents = [m["content"] for m in messages]
assert "first question" in contents
assert "first answer" in contents
assert "Follow-up question" in contents
def test_context_builder_user_message_always_last():
builder = ContextBuilder()
task = _make_task("Final message")
messages = builder.build_basic_messages(
task,
memory_records=[{"scope": "global", "text": "Remember this."}],
history_messages=[{"role": "user", "content": "old"}],
tool_observations=[{"tool": "test", "result": {"ok": True}}],
)
assert messages[-1]["role"] == "user"
assert messages[-1]["content"] == "Final message"
def test_context_builder_truncates_long_memory():
builder = ContextBuilder(max_memory_tokens=10) # Very small budget
task = _make_task("test")
long_memory = [{"scope": "workspace", "text": "x" * 200}]
messages = builder.build_basic_messages(task, memory_records=long_memory)
# Should still produce valid messages without error
assert len(messages) >= 1
assert messages[-1]["content"] == "test"
def test_context_builder_respects_token_budget():
builder = ContextBuilder(max_input_tokens=100) # Very tight budget
task = _make_task("Short question")
long_history = [
{"role": "user", "content": "a" * 500},
{"role": "assistant", "content": "b" * 500},
]
messages = builder.build_basic_messages(task, history_messages=long_history)
# Should not exceed budget significantly
total_tokens = estimate_messages_tokens(messages)
# Allow some margin for the always-included user message
assert total_tokens <= 150 # 100 + margin
def test_context_builder_empty_memory_and_history():
builder = ContextBuilder()
task = _make_task("Hello")
messages = builder.build_basic_messages(task)
assert len(messages) == 1
assert messages[0]["content"] == "Hello"
@pytest.mark.asyncio
async def test_context_builder_recall_awaits_model_client():
model_client = AsyncMock()
model_client.chat = AsyncMock(
return_value=ModelResponse(
role="recall",
model="local-main",
content='{"relevant_ids":["mem_1"],"sufficient_to_answer":true,"reasoning":"matches query"}',
reasoning_content=None,
raw={},
latency_ms=1.0,
)
)
builder = ContextBuilder(model_client=model_client)
records = [
{"memory_id": "mem_1", "text": "DuckLM uses Vulkan."},
{"memory_id": "mem_2", "text": "Unrelated."},
]
relevant = await builder.recall_relevant_memory("How does DuckLM run?", records)
assert relevant == [records[0]]
model_client.chat.assert_awaited_once()
@pytest.mark.asyncio
async def test_context_builder_recall_returns_sufficiency_decision():
model_client = AsyncMock()
model_client.chat = AsyncMock(
return_value=ModelResponse(
role="recall",
model="local-main",
content='{"relevant_ids":["mem_1"],"sufficient_to_answer":true,"reasoning":"memory answers directly"}',
reasoning_content=None,
raw={},
latency_ms=1.0,
)
)
builder = ContextBuilder(model_client=model_client)
records = [
{"memory_id": "mem_1", "text": "User name is Vladimir."},
{"memory_id": "mem_2", "text": "Unrelated."},
]
decision = await builder.recall_relevant_memory_decision("What is my name?", records)
assert decision.records == [records[0]]
assert decision.sufficient_to_answer is True
assert decision.reasoning == "memory answers directly"
@pytest.mark.asyncio
async def test_context_builder_summary_awaits_model_client():
model_client = AsyncMock()
model_client.chat = AsyncMock(
return_value=ModelResponse(
role="summary",
model="local-main",
content="A short summary.",
reasoning_content=None,
raw={},
latency_ms=1.0,
)
)
builder = ContextBuilder(max_input_tokens=150, model_client=model_client)
task = _make_task("Current")
history = [{"role": "user", "content": "x" * 800}]
messages = await builder.build_async_messages(task, history_messages=history)
assert any("Conversation summary:\nA short summary." in m["content"] for m in messages)
model_client.chat.assert_awaited_once()