119 lines
3.9 KiB
Python
119 lines
3.9 KiB
Python
import pytest
|
|
import httpx
|
|
|
|
from duck_core.model_client import ModelClient
|
|
|
|
|
|
def test_model_client_loads_role_settings():
|
|
client = ModelClient("config/models.yaml")
|
|
|
|
thinker = client.get_role_config("thinker")
|
|
action = client.get_role_config("action")
|
|
|
|
assert thinker.model == "local-main"
|
|
assert thinker.temperature == 0.4
|
|
assert action.structured_output is True
|
|
assert action.response_schema == "duck_core/schemas/action_directive.schema.json"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_model_client_missing_role_is_clear_error():
|
|
client = ModelClient("config/models.yaml")
|
|
|
|
with pytest.raises(KeyError, match="Unknown model role"):
|
|
await client.chat("missing", [{"role": "user", "content": "hello"}])
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_model_client_preserves_reasoning_content(monkeypatch):
|
|
async def fake_post(self, url, json):
|
|
return httpx.Response(
|
|
200,
|
|
json={
|
|
"choices": [
|
|
{
|
|
"message": {
|
|
"role": "assistant",
|
|
"content": "final answer",
|
|
"reasoning_content": "private reasoning",
|
|
}
|
|
}
|
|
],
|
|
"usage": {
|
|
"prompt_tokens": 3,
|
|
"completion_tokens": 2,
|
|
"total_tokens": 5,
|
|
},
|
|
},
|
|
request=httpx.Request("POST", url),
|
|
)
|
|
|
|
monkeypatch.setattr(httpx.AsyncClient, "post", fake_post)
|
|
client = ModelClient("config/models.yaml")
|
|
|
|
response = await client.chat("thinker", [{"role": "user", "content": "hello"}])
|
|
|
|
assert response.content == "final answer"
|
|
assert response.reasoning_content == "private reasoning"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_model_client_adds_request_reasoning_options(monkeypatch):
|
|
payloads = []
|
|
|
|
async def fake_post(self, url, json):
|
|
payloads.append(json)
|
|
return httpx.Response(
|
|
200,
|
|
json={"choices": [{"message": {"role": "assistant", "content": "ok"}}]},
|
|
request=httpx.Request("POST", url),
|
|
)
|
|
|
|
monkeypatch.setattr(httpx.AsyncClient, "post", fake_post)
|
|
client = ModelClient("config/models.yaml")
|
|
|
|
await client.chat("thinker", [{"role": "user", "content": "hello"}], reasoning="on")
|
|
await client.chat("thinker", [{"role": "user", "content": "hello"}], reasoning="off")
|
|
|
|
assert payloads[0]["reasoning_format"] == "deepseek"
|
|
assert payloads[0]["chat_template_kwargs"] == {"enable_thinking": True}
|
|
assert "thinking_budget_tokens" not in payloads[0]
|
|
assert payloads[1]["reasoning_format"] == "deepseek"
|
|
assert payloads[1]["chat_template_kwargs"] == {"enable_thinking": False}
|
|
assert payloads[1]["thinking_budget_tokens"] == 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_model_client_stream_chat_yields_reasoning_then_content(monkeypatch):
|
|
class FakeStreamResponse:
|
|
def raise_for_status(self):
|
|
return None
|
|
|
|
async def aiter_lines(self):
|
|
yield 'data: {"choices":[{"delta":{"reasoning_content":"thinking "}}]}'
|
|
yield 'data: {"choices":[{"delta":{"content":"answer"}}]}'
|
|
yield "data: [DONE]"
|
|
|
|
class FakeStreamContext:
|
|
async def __aenter__(self):
|
|
return FakeStreamResponse()
|
|
|
|
async def __aexit__(self, exc_type, exc, tb):
|
|
return False
|
|
|
|
def fake_stream(self, method, url, json):
|
|
return FakeStreamContext()
|
|
|
|
monkeypatch.setattr(httpx.AsyncClient, "stream", fake_stream)
|
|
client = ModelClient("config/models.yaml")
|
|
|
|
chunks = [
|
|
chunk
|
|
async for chunk in client.stream_chat("thinker", [{"role": "user", "content": "hello"}])
|
|
]
|
|
|
|
assert chunks == [
|
|
{"type": "reasoning_delta", "delta": "thinking "},
|
|
{"type": "content_delta", "delta": "answer"},
|
|
]
|