ducklm/tests/smoke/test_model_client.py

import pytest
import httpx

from duck_core.model_client import ModelClient


def test_model_client_loads_role_settings():
    client = ModelClient("config/models.yaml")

    thinker = client.get_role_config("thinker")
    action = client.get_role_config("action")

    assert thinker.model == "local-main"
    assert thinker.temperature == 0.4
    assert action.structured_output is True
    assert action.response_schema == "duck_core/schemas/action_directive.schema.json"


@pytest.mark.asyncio
async def test_model_client_missing_role_is_clear_error():
    client = ModelClient("config/models.yaml")

    with pytest.raises(KeyError, match="Unknown model role"):
        await client.chat("missing", [{"role": "user", "content": "hello"}])


@pytest.mark.asyncio
async def test_model_client_preserves_reasoning_content(monkeypatch):
    async def fake_post(self, url, json):
        return httpx.Response(
            200,
            json={
                "choices": [
                    {
                        "message": {
                            "role": "assistant",
                            "content": "final answer",
                            "reasoning_content": "private reasoning",
                        }
                    }
                ],
                "usage": {
                    "prompt_tokens": 3,
                    "completion_tokens": 2,
                    "total_tokens": 5,
                },
            },
            request=httpx.Request("POST", url),
        )

    monkeypatch.setattr(httpx.AsyncClient, "post", fake_post)
    client = ModelClient("config/models.yaml")

    response = await client.chat("thinker", [{"role": "user", "content": "hello"}])

    assert response.content == "final answer"
    assert response.reasoning_content == "private reasoning"


@pytest.mark.asyncio
async def test_model_client_adds_request_reasoning_options(monkeypatch):
    payloads = []

    async def fake_post(self, url, json):
        payloads.append(json)
        return httpx.Response(
            200,
            json={"choices": [{"message": {"role": "assistant", "content": "ok"}}]},
            request=httpx.Request("POST", url),
        )

    monkeypatch.setattr(httpx.AsyncClient, "post", fake_post)
    client = ModelClient("config/models.yaml")

    await client.chat("thinker", [{"role": "user", "content": "hello"}], reasoning="on")
    await client.chat("thinker", [{"role": "user", "content": "hello"}], reasoning="off")

    assert payloads[0]["reasoning_format"] == "deepseek"
    assert payloads[0]["chat_template_kwargs"] == {"enable_thinking": True}
    assert "thinking_budget_tokens" not in payloads[0]
    assert payloads[1]["reasoning_format"] == "deepseek"
    assert payloads[1]["chat_template_kwargs"] == {"enable_thinking": False}
    assert payloads[1]["thinking_budget_tokens"] == 0


@pytest.mark.asyncio
async def test_model_client_stream_chat_yields_reasoning_then_content(monkeypatch):
    class FakeStreamResponse:
        def raise_for_status(self):
            return None

        async def aiter_lines(self):
            yield 'data: {"choices":[{"delta":{"reasoning_content":"thinking "}}]}'
            yield 'data: {"choices":[{"delta":{"content":"answer"}}]}'
            yield "data: [DONE]"

    class FakeStreamContext:
        async def __aenter__(self):
            return FakeStreamResponse()

        async def __aexit__(self, exc_type, exc, tb):
            return False

    def fake_stream(self, method, url, json):
        return FakeStreamContext()

    monkeypatch.setattr(httpx.AsyncClient, "stream", fake_stream)
    client = ModelClient("config/models.yaml")

    chunks = [
        chunk
        async for chunk in client.stream_chat("thinker", [{"role": "user", "content": "hello"}])
    ]

    assert chunks == [
        {"type": "reasoning_delta", "delta": "thinking "},
        {"type": "content_delta", "delta": "answer"},
    ]