import pytest import httpx from duck_core.model_client import ModelClient def test_model_client_loads_role_settings(): client = ModelClient("config/models.yaml") thinker = client.get_role_config("thinker") action = client.get_role_config("action") assert thinker.model == "local-main" assert thinker.temperature == 0.4 assert action.structured_output is True assert action.response_schema == "duck_core/schemas/action_directive.schema.json" @pytest.mark.asyncio async def test_model_client_missing_role_is_clear_error(): client = ModelClient("config/models.yaml") with pytest.raises(KeyError, match="Unknown model role"): await client.chat("missing", [{"role": "user", "content": "hello"}]) @pytest.mark.asyncio async def test_model_client_preserves_reasoning_content(monkeypatch): async def fake_post(self, url, json): return httpx.Response( 200, json={ "choices": [ { "message": { "role": "assistant", "content": "final answer", "reasoning_content": "private reasoning", } } ], "usage": { "prompt_tokens": 3, "completion_tokens": 2, "total_tokens": 5, }, }, request=httpx.Request("POST", url), ) monkeypatch.setattr(httpx.AsyncClient, "post", fake_post) client = ModelClient("config/models.yaml") response = await client.chat("thinker", [{"role": "user", "content": "hello"}]) assert response.content == "final answer" assert response.reasoning_content == "private reasoning" @pytest.mark.asyncio async def test_model_client_adds_request_reasoning_options(monkeypatch): payloads = [] async def fake_post(self, url, json): payloads.append(json) return httpx.Response( 200, json={"choices": [{"message": {"role": "assistant", "content": "ok"}}]}, request=httpx.Request("POST", url), ) monkeypatch.setattr(httpx.AsyncClient, "post", fake_post) client = ModelClient("config/models.yaml") await client.chat("thinker", [{"role": "user", "content": "hello"}], reasoning="on") await client.chat("thinker", [{"role": "user", "content": "hello"}], reasoning="off") assert payloads[0]["reasoning_format"] == "deepseek" assert payloads[0]["chat_template_kwargs"] == {"enable_thinking": True} assert "thinking_budget_tokens" not in payloads[0] assert payloads[1]["reasoning_format"] == "deepseek" assert payloads[1]["chat_template_kwargs"] == {"enable_thinking": False} assert payloads[1]["thinking_budget_tokens"] == 0 @pytest.mark.asyncio async def test_model_client_stream_chat_yields_reasoning_then_content(monkeypatch): class FakeStreamResponse: def raise_for_status(self): return None async def aiter_lines(self): yield 'data: {"choices":[{"delta":{"reasoning_content":"thinking "}}]}' yield 'data: {"choices":[{"delta":{"content":"answer"}}]}' yield "data: [DONE]" class FakeStreamContext: async def __aenter__(self): return FakeStreamResponse() async def __aexit__(self, exc_type, exc, tb): return False def fake_stream(self, method, url, json): return FakeStreamContext() monkeypatch.setattr(httpx.AsyncClient, "stream", fake_stream) client = ModelClient("config/models.yaml") chunks = [ chunk async for chunk in client.stream_chat("thinker", [{"role": "user", "content": "hello"}]) ] assert chunks == [ {"type": "reasoning_delta", "delta": "thinking "}, {"type": "content_delta", "delta": "answer"}, ]