import os from dataclasses import dataclass from functools import lru_cache from pathlib import Path from dotenv import load_dotenv @dataclass(frozen=True) class Settings: llama_server_bin: str = "llama-server" main_model_path: str = "./models/Qwen3.6/nonMTP/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf" main_port: int = 8081 ctx_size: int = 65536 n_gpu_layers: str = "auto" host: str = "127.0.0.1" api_host: str = "127.0.0.1" api_port: int = 8000 workspace: str = "./workspace" db_path: str = "./data/duck.sqlite3" max_input_tokens: int = 49152 max_recent_events_tokens: int = 12000 max_memory_tokens: int = 8000 max_skill_tokens: int = 6000 qdrant_url: str = "http://127.0.0.1:6333" enable_reflection: int = 0 skip_live_llm_tests: int = 0 @property def db_file(self) -> Path: return Path(self.db_path) @lru_cache def get_settings() -> Settings: load_dotenv() return Settings( llama_server_bin=os.getenv("DUCK_LLAMA_SERVER_BIN", "llama-server"), main_model_path=os.getenv( "DUCK_MAIN_MODEL_PATH", "./models/Qwen3.6/nonMTP/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf", ), main_port=int(os.getenv("DUCK_MAIN_PORT", "8081")), ctx_size=int(os.getenv("DUCK_CTX_SIZE", "65536")), n_gpu_layers=os.getenv("DUCK_N_GPU_LAYERS", "auto"), host=os.getenv("DUCK_HOST", "127.0.0.1"), api_host=os.getenv("DUCK_API_HOST", "127.0.0.1"), api_port=int(os.getenv("DUCK_API_PORT", "8000")), workspace=os.getenv("DUCK_WORKSPACE", "./workspace"), db_path=os.getenv("DUCK_DB_PATH", "./data/duck.sqlite3"), max_input_tokens=int(os.getenv("DUCK_MAX_INPUT_TOKENS", "49152")), max_recent_events_tokens=int(os.getenv("DUCK_MAX_RECENT_EVENTS_TOKENS", "12000")), max_memory_tokens=int(os.getenv("DUCK_MAX_MEMORY_TOKENS", "8000")), max_skill_tokens=int(os.getenv("DUCK_MAX_SKILL_TOKENS", "6000")), qdrant_url=os.getenv("QDRANT_URL", "http://127.0.0.1:6333"), enable_reflection=int(os.getenv("DUCK_ENABLE_REFLECTION", "0")), skip_live_llm_tests=int(os.getenv("DUCK_SKIP_LIVE_LLM_TESTS", "0")), )