ducklm/duck_core/config.py

57 lines
2.1 KiB
Python

import os
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path
from dotenv import load_dotenv
@dataclass(frozen=True)
class Settings:
llama_server_bin: str = "llama-server"
main_model_path: str = "./models/Qwen3.6/nonMTP/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf"
main_port: int = 8081
ctx_size: int = 65536
n_gpu_layers: str = "auto"
host: str = "127.0.0.1"
api_host: str = "127.0.0.1"
api_port: int = 8000
workspace: str = "./workspace"
db_path: str = "./data/duck.sqlite3"
max_input_tokens: int = 49152
max_recent_events_tokens: int = 12000
max_memory_tokens: int = 8000
max_skill_tokens: int = 6000
qdrant_url: str = "http://127.0.0.1:6333"
skip_live_llm_tests: int = 0
@property
def db_file(self) -> Path:
return Path(self.db_path)
@lru_cache
def get_settings() -> Settings:
load_dotenv()
return Settings(
llama_server_bin=os.getenv("DUCK_LLAMA_SERVER_BIN", "llama-server"),
main_model_path=os.getenv(
"DUCK_MAIN_MODEL_PATH",
"./models/Qwen3.6/nonMTP/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf",
),
main_port=int(os.getenv("DUCK_MAIN_PORT", "8081")),
ctx_size=int(os.getenv("DUCK_CTX_SIZE", "65536")),
n_gpu_layers=os.getenv("DUCK_N_GPU_LAYERS", "auto"),
host=os.getenv("DUCK_HOST", "127.0.0.1"),
api_host=os.getenv("DUCK_API_HOST", "127.0.0.1"),
api_port=int(os.getenv("DUCK_API_PORT", "8000")),
workspace=os.getenv("DUCK_WORKSPACE", "./workspace"),
db_path=os.getenv("DUCK_DB_PATH", "./data/duck.sqlite3"),
max_input_tokens=int(os.getenv("DUCK_MAX_INPUT_TOKENS", "49152")),
max_recent_events_tokens=int(os.getenv("DUCK_MAX_RECENT_EVENTS_TOKENS", "12000")),
max_memory_tokens=int(os.getenv("DUCK_MAX_MEMORY_TOKENS", "8000")),
max_skill_tokens=int(os.getenv("DUCK_MAX_SKILL_TOKENS", "6000")),
qdrant_url=os.getenv("QDRANT_URL", "http://127.0.0.1:6333"),
skip_live_llm_tests=int(os.getenv("DUCK_SKIP_LIVE_LLM_TESTS", "0")),
)