Expose DuckLM runtime status
This commit is contained in:
parent
ff98224eb6
commit
8452673994
|
|
@ -1,6 +1,6 @@
|
||||||
# DuckLM — текущее состояние проекта
|
# DuckLM — текущее состояние проекта
|
||||||
|
|
||||||
Дата обновления: 2026-05-21
|
Дата обновления: 2026-05-22
|
||||||
Рабочая копия: `/home/mirivlad/git/ducklm`
|
Рабочая копия: `/home/mirivlad/git/ducklm`
|
||||||
Git remote: `origin/main`
|
Git remote: `origin/main`
|
||||||
|
|
||||||
|
|
@ -25,6 +25,7 @@ WebChat доступен через FastAPI на `http://127.0.0.1:8000/`.
|
||||||
- TaskStore и EventStore в SQLite.
|
- TaskStore и EventStore в SQLite.
|
||||||
- ModelClient с логическими ролями из `config/models.yaml`.
|
- ModelClient с логическими ролями из `config/models.yaml`.
|
||||||
- Роли: `thinker`, `critic`, `coder`, `action`, `summary`, `memory_policy`, `recall`.
|
- Роли: `thinker`, `critic`, `coder`, `action`, `summary`, `memory_policy`, `recall`.
|
||||||
|
- Расширенный `/v1/status` с API paths, token budgets, model role map и optional live-probe для llama/Qdrant.
|
||||||
- SSE streaming chat: reasoning/content deltas, runtime status events, final stats.
|
- SSE streaming chat: reasoning/content deltas, runtime status events, final stats.
|
||||||
- Runtime status в чате для долгих этапов: planning, running_tool(s), answering.
|
- Runtime status в чате для долгих этапов: planning, running_tool(s), answering.
|
||||||
- Min/avg/max token speed в конце ответа.
|
- Min/avg/max token speed в конце ответа.
|
||||||
|
|
@ -65,6 +66,7 @@ WebChat доступен через FastAPI на `http://127.0.0.1:8000/`.
|
||||||
- reflection
|
- reflection
|
||||||
- experience records
|
- experience records
|
||||||
- Skill candidate selection теперь используется в обычном и streaming chat.
|
- Skill candidate selection теперь используется в обычном и streaming chat.
|
||||||
|
- `scripts/duck.sh status --probe` и `scripts/duck-mtp.sh status --probe` показывают live-состояние DuckLM runtime, model backend и vector memory.
|
||||||
|
|
||||||
## Соответствие этапам из Ducklm.md
|
## Соответствие этапам из Ducklm.md
|
||||||
|
|
||||||
|
|
@ -78,7 +80,7 @@ WebChat доступен через FastAPI на `http://127.0.0.1:8000/`.
|
||||||
| 6. Approvals | Готово | UI и API approvals, allow_once/forever/deny |
|
| 6. Approvals | Готово | UI и API approvals, allow_once/forever/deny |
|
||||||
| 7. Skills | Готово | Registry, API/UI, candidate skill injection |
|
| 7. Skills | Готово | Registry, API/UI, candidate skill injection |
|
||||||
| 8. Reflection/Experience | Готово | Reflection после completed задач, experience records |
|
| 8. Reflection/Experience | Готово | Reflection после completed задач, experience records |
|
||||||
| 9. Memory/VectorMemory | Готово частично | SQLite memory готова; Qdrant зависит от запущенного сервиса и embeddings |
|
| 9. Memory/VectorMemory | Готово частично | SQLite memory готова; `/v1/status?probe=true` показывает live health Qdrant; embeddings зависят от локальной модели/endpoint |
|
||||||
| 10. MTP/benchmark | Готово как experimental | MTP script есть, action по умолчанию остаётся на main endpoint |
|
| 10. MTP/benchmark | Готово как experimental | MTP script есть, action по умолчанию остаётся на main endpoint |
|
||||||
|
|
||||||
## Остаточные ограничения
|
## Остаточные ограничения
|
||||||
|
|
@ -119,6 +121,7 @@ http://127.0.0.1:8000/
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --noproxy '*' http://127.0.0.1:8000/health
|
curl --noproxy '*' http://127.0.0.1:8000/health
|
||||||
|
curl --noproxy '*' 'http://127.0.0.1:8000/v1/status?probe=true'
|
||||||
curl --noproxy '*' http://127.0.0.1:8000/v1/models/roles
|
curl --noproxy '*' http://127.0.0.1:8000/v1/models/roles
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -126,6 +129,7 @@ curl --noproxy '*' http://127.0.0.1:8000/v1/models/roles
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
bash scripts/duck.sh status
|
bash scripts/duck.sh status
|
||||||
|
bash scripts/duck.sh status --probe
|
||||||
bash scripts/duck.sh logs --follow
|
bash scripts/duck.sh logs --follow
|
||||||
bash scripts/duck.sh restart
|
bash scripts/duck.sh restart
|
||||||
bash scripts/duck.sh stop
|
bash scripts/duck.sh stop
|
||||||
|
|
@ -143,6 +147,7 @@ bash scripts/duck-mtp.sh logs --follow
|
||||||
## Что делать следующим
|
## Что делать следующим
|
||||||
|
|
||||||
1. Пройти live E2E checklist в WebChat на реальной модели.
|
1. Пройти live E2E checklist в WebChat на реальной модели.
|
||||||
2. Если Qdrant нужен постоянно, добавить отдельную health-индикацию vector memory в `/v1/status`.
|
2. Вынести runtime/model role routing в явный конфиг с fallback-политикой, оставив Qwen основным backend для всех ролей.
|
||||||
3. При необходимости заменить keyword skill selection на LLM-based selection.
|
3. Добавить строгую JSON validation/fallback для structured utility-ролей.
|
||||||
4. Позже мигрировать FastAPI startup на lifespan.
|
4. При необходимости заменить keyword skill selection на LLM-based selection.
|
||||||
|
5. Позже мигрировать FastAPI startup на lifespan.
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,13 @@ bash scripts/duck.sh restart
|
||||||
bash scripts/duck.sh stop
|
bash scripts/duck.sh stop
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Use live probes when you need backend diagnostics, not just process status:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash scripts/duck.sh status --probe
|
||||||
|
curl --noproxy '*' 'http://127.0.0.1:8000/v1/status?probe=true'
|
||||||
|
```
|
||||||
|
|
||||||
4. Open WebChat:
|
4. Open WebChat:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,29 @@ GET /v1/experience/{id}
|
||||||
GET /v1/memory/search?q=...
|
GET /v1/memory/search?q=...
|
||||||
```
|
```
|
||||||
|
|
||||||
|
`GET /v1/status` returns a fast runtime snapshot without live backend checks:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "DuckLM",
|
||||||
|
"api": {"host": "127.0.0.1", "port": 8000},
|
||||||
|
"paths": {"workspace": "./workspace", "db_path": "./data/duck.sqlite3"},
|
||||||
|
"token_budget": {"ctx_size": 65536},
|
||||||
|
"models": {
|
||||||
|
"default_provider": "llama_server",
|
||||||
|
"endpoints": ["llama_server:http://127.0.0.1:8081/v1:local-main"],
|
||||||
|
"roles": {"thinker": {"model": "local-main"}}
|
||||||
|
},
|
||||||
|
"services": {
|
||||||
|
"duck_api": {"ok": true, "probed": true},
|
||||||
|
"llama": {"ok": null, "probed": false, "roles": {}},
|
||||||
|
"vector_memory": {"ok": null, "probed": false}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Use `GET /v1/status?probe=true` to also call the model backend and Qdrant.
|
||||||
|
|
||||||
Chat requests accept optional `reasoning`:
|
Chat requests accept optional `reasoning`:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ from pathlib import Path
|
||||||
from typing import Any, Literal
|
from typing import Any, Literal
|
||||||
|
|
||||||
import uvicorn
|
import uvicorn
|
||||||
from fastapi import FastAPI, HTTPException, Request
|
from fastapi import FastAPI, HTTPException, Query, Request
|
||||||
from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse
|
from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
from fastapi.templating import Jinja2Templates
|
from fastapi.templating import Jinja2Templates
|
||||||
|
|
@ -165,7 +165,31 @@ def create_app() -> FastAPI:
|
||||||
return {"status": "ok"}
|
return {"status": "ok"}
|
||||||
|
|
||||||
@app.get("/v1/status")
|
@app.get("/v1/status")
|
||||||
async def status() -> dict[str, Any]:
|
async def status(probe: bool = Query(False)) -> dict[str, Any]:
|
||||||
|
role_configs = model_client.list_roles()
|
||||||
|
endpoints = sorted(
|
||||||
|
{
|
||||||
|
f"{role_config['provider']}:{role_config['base_url']}:{role_config['model']}"
|
||||||
|
for role_config in role_configs.values()
|
||||||
|
}
|
||||||
|
)
|
||||||
|
llama_status: dict[str, Any] = {"probed": False, "ok": None, "roles": {}}
|
||||||
|
vector_status: dict[str, Any] = {
|
||||||
|
**vector_memory.config_status(),
|
||||||
|
"probed": False,
|
||||||
|
"ok": None,
|
||||||
|
}
|
||||||
|
if probe:
|
||||||
|
model_ping = await model_client.ping()
|
||||||
|
llama_status = {
|
||||||
|
"probed": True,
|
||||||
|
"ok": all(role.get("ok") for role in model_ping.values()) if model_ping else False,
|
||||||
|
"roles": model_ping,
|
||||||
|
}
|
||||||
|
vector_status = {
|
||||||
|
**await vector_memory.health(),
|
||||||
|
"probed": True,
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
"name": "DuckLM",
|
"name": "DuckLM",
|
||||||
"version": "0.1.0",
|
"version": "0.1.0",
|
||||||
|
|
@ -173,6 +197,33 @@ def create_app() -> FastAPI:
|
||||||
"api_port": settings.api_port,
|
"api_port": settings.api_port,
|
||||||
"workspace": settings.workspace,
|
"workspace": settings.workspace,
|
||||||
"db_path": settings.db_path,
|
"db_path": settings.db_path,
|
||||||
|
"api": {
|
||||||
|
"host": settings.api_host,
|
||||||
|
"port": settings.api_port,
|
||||||
|
"base_url": f"http://{settings.api_host}:{settings.api_port}",
|
||||||
|
},
|
||||||
|
"paths": {
|
||||||
|
"workspace": settings.workspace,
|
||||||
|
"db_path": settings.db_path,
|
||||||
|
"models_config": str(model_client.config_path),
|
||||||
|
},
|
||||||
|
"token_budget": {
|
||||||
|
"ctx_size": settings.ctx_size,
|
||||||
|
"max_input_tokens": settings.max_input_tokens,
|
||||||
|
"max_recent_events_tokens": settings.max_recent_events_tokens,
|
||||||
|
"max_memory_tokens": settings.max_memory_tokens,
|
||||||
|
"max_skill_tokens": settings.max_skill_tokens,
|
||||||
|
},
|
||||||
|
"models": {
|
||||||
|
"default_provider": model_client.default_provider,
|
||||||
|
"roles": role_configs,
|
||||||
|
"endpoints": endpoints,
|
||||||
|
},
|
||||||
|
"services": {
|
||||||
|
"duck_api": {"ok": True, "probed": True},
|
||||||
|
"llama": llama_status,
|
||||||
|
"vector_memory": vector_status,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@app.get("/v1/models/roles")
|
@app.get("/v1/models/roles")
|
||||||
|
|
|
||||||
|
|
@ -94,6 +94,43 @@ class VectorMemory:
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.json().get("result", [])
|
return response.json().get("result", [])
|
||||||
|
|
||||||
|
def config_status(self) -> dict[str, Any]:
|
||||||
|
if self.embeddings_base_url:
|
||||||
|
embedding_source = f"remote:{self.embeddings_base_url}"
|
||||||
|
elif self._local_model_path:
|
||||||
|
embedding_source = f"local:{self._local_model_path}"
|
||||||
|
else:
|
||||||
|
embedding_source = "none"
|
||||||
|
return {
|
||||||
|
"configured": bool(self.qdrant_url and embedding_source != "none"),
|
||||||
|
"qdrant_url": self.qdrant_url,
|
||||||
|
"collection": self.collection_name,
|
||||||
|
"embedding_source": embedding_source,
|
||||||
|
}
|
||||||
|
|
||||||
|
async def health(self) -> dict[str, Any]:
|
||||||
|
"""Probe Qdrant without loading the local embedding model."""
|
||||||
|
import time
|
||||||
|
|
||||||
|
status = self.config_status()
|
||||||
|
started = time.perf_counter()
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=5.0, trust_env=False) as client:
|
||||||
|
response = await client.get(f"{self.qdrant_url}/")
|
||||||
|
response.raise_for_status()
|
||||||
|
return {
|
||||||
|
**status,
|
||||||
|
"ok": True,
|
||||||
|
"latency_ms": round((time.perf_counter() - started) * 1000, 1),
|
||||||
|
}
|
||||||
|
except httpx.HTTPError as exc:
|
||||||
|
return {
|
||||||
|
**status,
|
||||||
|
"ok": False,
|
||||||
|
"error": str(exc),
|
||||||
|
"latency_ms": round((time.perf_counter() - started) * 1000, 1),
|
||||||
|
}
|
||||||
|
|
||||||
async def _embed(self, text: str) -> list[float]:
|
async def _embed(self, text: str) -> list[float]:
|
||||||
"""Generate embeddings using local model or remote endpoint."""
|
"""Generate embeddings using local model or remote endpoint."""
|
||||||
# Prefer local model if available
|
# Prefer local model if available
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,7 @@ Commands:
|
||||||
stop Stop DuckLM API and managed MTP llama-server
|
stop Stop DuckLM API and managed MTP llama-server
|
||||||
restart Stop and start the whole local DuckLM stack
|
restart Stop and start the whole local DuckLM stack
|
||||||
status Print process and HTTP health status
|
status Print process and HTTP health status
|
||||||
|
Use "status --probe" to include live model/vector checks
|
||||||
logs Show DuckLM API and llama-server logs; use --follow/-f and --lines N
|
logs Show DuckLM API and llama-server logs; use --follow/-f and --lines N
|
||||||
help Show this help
|
help Show this help
|
||||||
|
|
||||||
|
|
@ -169,6 +170,20 @@ stop_stack() {
|
||||||
}
|
}
|
||||||
|
|
||||||
status_stack() {
|
status_stack() {
|
||||||
|
local probe=0
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--probe)
|
||||||
|
probe=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown status argument: $1" >&2
|
||||||
|
return 2
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
local rc=0
|
local rc=0
|
||||||
if api_is_running; then
|
if api_is_running; then
|
||||||
local pid
|
local pid
|
||||||
|
|
@ -176,6 +191,7 @@ status_stack() {
|
||||||
echo "DuckLM API running: pid=${pid}"
|
echo "DuckLM API running: pid=${pid}"
|
||||||
if command -v curl >/dev/null 2>&1 && curl --noproxy "*" -fsS "${API_URL}/health" >/dev/null 2>&1; then
|
if command -v curl >/dev/null 2>&1 && curl --noproxy "*" -fsS "${API_URL}/health" >/dev/null 2>&1; then
|
||||||
echo "DuckLM API health: ok (${API_URL})"
|
echo "DuckLM API health: ok (${API_URL})"
|
||||||
|
print_runtime_status "${probe}"
|
||||||
else
|
else
|
||||||
echo "DuckLM API health: not ready (${API_URL})"
|
echo "DuckLM API health: not ready (${API_URL})"
|
||||||
fi
|
fi
|
||||||
|
|
@ -197,6 +213,55 @@ status_stack() {
|
||||||
return "${rc}"
|
return "${rc}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
print_runtime_status() {
|
||||||
|
local probe="${1:-0}"
|
||||||
|
local status_url="${API_URL}/v1/status"
|
||||||
|
if [[ "${probe}" == "1" ]]; then
|
||||||
|
status_url="${status_url}?probe=true"
|
||||||
|
fi
|
||||||
|
local payload
|
||||||
|
if ! payload="$(curl --noproxy "*" -fsS "${status_url}" 2>/dev/null)"; then
|
||||||
|
echo "DuckLM runtime status: unavailable (${status_url})"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
local python_bin="${DUCK_PYTHON_BIN:-${ROOT_DIR}/.venv/bin/python}"
|
||||||
|
if [[ ! -x "${python_bin}" ]]; then
|
||||||
|
python_bin="python3"
|
||||||
|
fi
|
||||||
|
if ! command -v "${python_bin}" >/dev/null 2>&1 && [[ ! -x "${python_bin}" ]]; then
|
||||||
|
echo "DuckLM runtime status: ${status_url}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf '%s' "${payload}" | "${python_bin}" -c '
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
data = json.load(sys.stdin)
|
||||||
|
models = data.get("models", {})
|
||||||
|
roles = sorted((models.get("roles") or {}).keys())
|
||||||
|
services = data.get("services", {})
|
||||||
|
llama = services.get("llama", {})
|
||||||
|
vector = services.get("vector_memory", {})
|
||||||
|
|
||||||
|
def service_line(name, service):
|
||||||
|
if not service.get("probed"):
|
||||||
|
return f"{name}: not probed"
|
||||||
|
ok = "ok" if service.get("ok") else "failed"
|
||||||
|
error = service.get("error")
|
||||||
|
return f"{name}: {ok}" + (f" ({error})" if error else "")
|
||||||
|
|
||||||
|
print("DuckLM runtime:")
|
||||||
|
print(" workspace: {}".format(data.get("workspace")))
|
||||||
|
print(" db: {}".format(data.get("db_path")))
|
||||||
|
print(" model endpoints: {}".format(len(models.get("endpoints") or [])))
|
||||||
|
print(" roles: {}".format(", ".join(roles)))
|
||||||
|
print(" {}".format(service_line("llama", llama)))
|
||||||
|
print(" {}".format(service_line("vector memory", vector)))
|
||||||
|
'
|
||||||
|
}
|
||||||
|
|
||||||
logs_stack() {
|
logs_stack() {
|
||||||
local follow=0
|
local follow=0
|
||||||
local lines=100
|
local lines=100
|
||||||
|
|
@ -246,7 +311,8 @@ case "${ACTION}" in
|
||||||
start_stack
|
start_stack
|
||||||
;;
|
;;
|
||||||
status)
|
status)
|
||||||
status_stack
|
shift || true
|
||||||
|
status_stack "$@"
|
||||||
;;
|
;;
|
||||||
logs)
|
logs)
|
||||||
logs_stack "$@"
|
logs_stack "$@"
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,7 @@ Commands:
|
||||||
stop Stop DuckLM API and managed llama-server
|
stop Stop DuckLM API and managed llama-server
|
||||||
restart Stop and start the whole local DuckLM stack
|
restart Stop and start the whole local DuckLM stack
|
||||||
status Print process and HTTP health status
|
status Print process and HTTP health status
|
||||||
|
Use "status --probe" to include live model/vector checks
|
||||||
logs Show DuckLM API and llama-server logs; use --follow/-f and --lines N
|
logs Show DuckLM API and llama-server logs; use --follow/-f and --lines N
|
||||||
help Show this help
|
help Show this help
|
||||||
|
|
||||||
|
|
@ -169,6 +170,20 @@ stop_stack() {
|
||||||
}
|
}
|
||||||
|
|
||||||
status_stack() {
|
status_stack() {
|
||||||
|
local probe=0
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--probe)
|
||||||
|
probe=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown status argument: $1" >&2
|
||||||
|
return 2
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
local rc=0
|
local rc=0
|
||||||
if api_is_running; then
|
if api_is_running; then
|
||||||
local pid
|
local pid
|
||||||
|
|
@ -176,6 +191,7 @@ status_stack() {
|
||||||
echo "DuckLM API running: pid=${pid}"
|
echo "DuckLM API running: pid=${pid}"
|
||||||
if command -v curl >/dev/null 2>&1 && curl --noproxy "*" -fsS "${API_URL}/health" >/dev/null 2>&1; then
|
if command -v curl >/dev/null 2>&1 && curl --noproxy "*" -fsS "${API_URL}/health" >/dev/null 2>&1; then
|
||||||
echo "DuckLM API health: ok (${API_URL})"
|
echo "DuckLM API health: ok (${API_URL})"
|
||||||
|
print_runtime_status "${probe}"
|
||||||
else
|
else
|
||||||
echo "DuckLM API health: not ready (${API_URL})"
|
echo "DuckLM API health: not ready (${API_URL})"
|
||||||
fi
|
fi
|
||||||
|
|
@ -197,6 +213,55 @@ status_stack() {
|
||||||
return "${rc}"
|
return "${rc}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
print_runtime_status() {
|
||||||
|
local probe="${1:-0}"
|
||||||
|
local status_url="${API_URL}/v1/status"
|
||||||
|
if [[ "${probe}" == "1" ]]; then
|
||||||
|
status_url="${status_url}?probe=true"
|
||||||
|
fi
|
||||||
|
local payload
|
||||||
|
if ! payload="$(curl --noproxy "*" -fsS "${status_url}" 2>/dev/null)"; then
|
||||||
|
echo "DuckLM runtime status: unavailable (${status_url})"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
local python_bin="${DUCK_PYTHON_BIN:-${ROOT_DIR}/.venv/bin/python}"
|
||||||
|
if [[ ! -x "${python_bin}" ]]; then
|
||||||
|
python_bin="python3"
|
||||||
|
fi
|
||||||
|
if ! command -v "${python_bin}" >/dev/null 2>&1 && [[ ! -x "${python_bin}" ]]; then
|
||||||
|
echo "DuckLM runtime status: ${status_url}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf '%s' "${payload}" | "${python_bin}" -c '
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
data = json.load(sys.stdin)
|
||||||
|
models = data.get("models", {})
|
||||||
|
roles = sorted((models.get("roles") or {}).keys())
|
||||||
|
services = data.get("services", {})
|
||||||
|
llama = services.get("llama", {})
|
||||||
|
vector = services.get("vector_memory", {})
|
||||||
|
|
||||||
|
def service_line(name, service):
|
||||||
|
if not service.get("probed"):
|
||||||
|
return f"{name}: not probed"
|
||||||
|
ok = "ok" if service.get("ok") else "failed"
|
||||||
|
error = service.get("error")
|
||||||
|
return f"{name}: {ok}" + (f" ({error})" if error else "")
|
||||||
|
|
||||||
|
print("DuckLM runtime:")
|
||||||
|
print(" workspace: {}".format(data.get("workspace")))
|
||||||
|
print(" db: {}".format(data.get("db_path")))
|
||||||
|
print(" model endpoints: {}".format(len(models.get("endpoints") or [])))
|
||||||
|
print(" roles: {}".format(", ".join(roles)))
|
||||||
|
print(" {}".format(service_line("llama", llama)))
|
||||||
|
print(" {}".format(service_line("vector memory", vector)))
|
||||||
|
'
|
||||||
|
}
|
||||||
|
|
||||||
logs_stack() {
|
logs_stack() {
|
||||||
local follow=0
|
local follow=0
|
||||||
local lines=100
|
local lines=100
|
||||||
|
|
@ -246,7 +311,8 @@ case "${ACTION}" in
|
||||||
start_stack
|
start_stack
|
||||||
;;
|
;;
|
||||||
status)
|
status)
|
||||||
status_stack
|
shift || true
|
||||||
|
status_stack "$@"
|
||||||
;;
|
;;
|
||||||
logs)
|
logs)
|
||||||
logs_stack "$@"
|
logs_stack "$@"
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
from fastapi.testclient import TestClient
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
from duck_core.api import create_app
|
from duck_core.api import create_app
|
||||||
|
from duck_core.memory.vector_memory import VectorMemory
|
||||||
|
from duck_core.model_client import ModelClient
|
||||||
|
|
||||||
|
|
||||||
def test_health_and_status_endpoints(tmp_path, monkeypatch):
|
def test_health_and_status_endpoints(tmp_path, monkeypatch):
|
||||||
|
|
@ -12,6 +14,59 @@ def test_health_and_status_endpoints(tmp_path, monkeypatch):
|
||||||
status = client.get("/v1/status").json()
|
status = client.get("/v1/status").json()
|
||||||
assert status["name"] == "DuckLM"
|
assert status["name"] == "DuckLM"
|
||||||
assert status["api_host"] == "127.0.0.1"
|
assert status["api_host"] == "127.0.0.1"
|
||||||
|
assert status["api"]["host"] == "127.0.0.1"
|
||||||
|
assert status["api"]["port"] == 8000
|
||||||
|
assert status["paths"]["db_path"] == str(tmp_path / "duck.sqlite3")
|
||||||
|
assert status["models"]["default_provider"] == "llama_server"
|
||||||
|
assert status["models"]["roles"]["thinker"]["model"] == "local-main"
|
||||||
|
assert status["services"]["duck_api"]["ok"] is True
|
||||||
|
assert status["services"]["llama"]["probed"] is False
|
||||||
|
assert status["services"]["vector_memory"]["probed"] is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_status_endpoint_can_probe_backends(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
|
||||||
|
|
||||||
|
async def fake_ping(self):
|
||||||
|
return {
|
||||||
|
"thinker": {
|
||||||
|
"ok": True,
|
||||||
|
"base_url": "http://127.0.0.1:8081/v1",
|
||||||
|
"model": "local-main",
|
||||||
|
"latency_ms": 1.2,
|
||||||
|
},
|
||||||
|
"critic": {
|
||||||
|
"ok": False,
|
||||||
|
"base_url": "http://127.0.0.1:8081/v1",
|
||||||
|
"model": "local-main",
|
||||||
|
"error": "offline",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
async def fake_vector_health(self):
|
||||||
|
return {
|
||||||
|
"configured": True,
|
||||||
|
"ok": True,
|
||||||
|
"qdrant_url": "http://127.0.0.1:6333",
|
||||||
|
"collection": "duck_memory",
|
||||||
|
"embedding_source": "local:./models/all-MiniLM-L6-v2",
|
||||||
|
"latency_ms": 2.3,
|
||||||
|
}
|
||||||
|
|
||||||
|
monkeypatch.setattr(ModelClient, "ping", fake_ping)
|
||||||
|
monkeypatch.setattr(VectorMemory, "health", fake_vector_health)
|
||||||
|
|
||||||
|
app = create_app()
|
||||||
|
client = TestClient(app)
|
||||||
|
|
||||||
|
status = client.get("/v1/status?probe=true").json()
|
||||||
|
|
||||||
|
assert status["services"]["llama"]["probed"] is True
|
||||||
|
assert status["services"]["llama"]["ok"] is False
|
||||||
|
assert status["services"]["llama"]["roles"]["thinker"]["ok"] is True
|
||||||
|
assert status["services"]["llama"]["roles"]["critic"]["ok"] is False
|
||||||
|
assert status["services"]["vector_memory"]["probed"] is True
|
||||||
|
assert status["services"]["vector_memory"]["ok"] is True
|
||||||
|
|
||||||
|
|
||||||
def test_webchat_index_renders(tmp_path, monkeypatch):
|
def test_webchat_index_renders(tmp_path, monkeypatch):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue