ducklm/app/models/orchestrator.py

46 lines
1.7 KiB
Python

from __future__ import annotations
from threading import RLock
from typing import Any, Iterator
from llama_cpp import Llama
class OrchestratorAdapter:
def __init__(self, llm: Llama, system_prompt: str | None = None, lock: RLock | None = None) -> None:
self._llm = llm
self._lock = lock or RLock()
self._system_prompt = system_prompt or (
"You are an expert orchestrator for a local AI agent system. "
"Your role is to analyze the user's task, decide whether planning is needed."
)
self._temperature = 0.2
def generate(self, prompt: str, max_tokens: int | None = None) -> str:
messages = [
{"role": "system", "content": self._system_prompt},
{"role": "user", "content": prompt},
]
with self._lock:
output = self._llm.create_chat_completion(
messages=messages,
max_tokens=max_tokens or 512,
temperature=self._temperature,
)
return output["choices"][0]["message"]["content"]
def stream(self, prompt: str, max_tokens: int | None = None) -> Iterator[str]:
messages = [
{"role": "system", "content": self._system_prompt},
{"role": "user", "content": prompt},
]
with self._lock:
for chunk in self._llm.create_chat_completion(
messages=messages,
max_tokens=max_tokens or 512,
temperature=self._temperature,
stream=True,
):
content = chunk["choices"][0].get("delta", {}).get("content")
if content:
yield content