from __future__ import annotations from threading import RLock from typing import Any, Iterator from llama_cpp import Llama class OrchestratorAdapter: def __init__(self, llm: Llama, system_prompt: str | None = None, lock: RLock | None = None) -> None: self._llm = llm self._lock = lock or RLock() self._system_prompt = system_prompt or ( "You are an expert orchestrator for a local AI agent system. " "Your role is to analyze the user's task, decide whether planning is needed." ) self._temperature = 0.2 def generate(self, prompt: str, max_tokens: int | None = None) -> str: messages = [ {"role": "system", "content": self._system_prompt}, {"role": "user", "content": prompt}, ] with self._lock: output = self._llm.create_chat_completion( messages=messages, max_tokens=max_tokens or 512, temperature=self._temperature, ) return output["choices"][0]["message"]["content"] def stream(self, prompt: str, max_tokens: int | None = None) -> Iterator[str]: messages = [ {"role": "system", "content": self._system_prompt}, {"role": "user", "content": prompt}, ] with self._lock: for chunk in self._llm.create_chat_completion( messages=messages, max_tokens=max_tokens or 512, temperature=self._temperature, stream=True, ): content = chunk["choices"][0].get("delta", {}).get("content") if content: yield content