415 lines
10 KiB
Bash
Executable File
415 lines
10 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
|
|
ENV_KEYS=(
|
|
DUCK_LLAMA_SERVER_BIN
|
|
DUCK_MAIN_MODEL_PATH
|
|
DUCK_MAIN_PORT
|
|
DUCK_CTX_SIZE
|
|
DUCK_N_GPU_LAYERS
|
|
DUCK_LLAMA_DEVICE
|
|
DUCK_PARALLEL
|
|
DUCK_LLAMA_PID_FILE
|
|
DUCK_LLAMA_LOG_FILE
|
|
DUCK_LLAMA_EXTRA_ARGS
|
|
DUCK_HOST
|
|
DUCK_API_HOST
|
|
DUCK_API_PORT
|
|
DUCK_API_PID_FILE
|
|
DUCK_API_LOG_FILE
|
|
DUCK_API_COMMAND
|
|
DUCK_QDRANT_MANAGED
|
|
DUCK_QDRANT_COMPOSE_FILE
|
|
DUCK_QDRANT_SERVICE
|
|
)
|
|
declare -A ENV_OVERRIDES=()
|
|
for key in "${ENV_KEYS[@]}"; do
|
|
if [[ -v "${key}" ]]; then
|
|
ENV_OVERRIDES["${key}"]="${!key}"
|
|
fi
|
|
done
|
|
|
|
if [[ -f "${ROOT_DIR}/.env" ]]; then
|
|
set -a
|
|
# shellcheck disable=SC1091
|
|
source "${ROOT_DIR}/.env"
|
|
set +a
|
|
fi
|
|
for key in "${!ENV_OVERRIDES[@]}"; do
|
|
export "${key}=${ENV_OVERRIDES[${key}]}"
|
|
done
|
|
|
|
ACTION="${1:-start}"
|
|
API_PID_FILE="${DUCK_API_PID_FILE:-${ROOT_DIR}/data/duck-api.pid}"
|
|
API_LOG_FILE="${DUCK_API_LOG_FILE:-${ROOT_DIR}/data/duck-api.log}"
|
|
API_URL="http://${DUCK_API_HOST:-127.0.0.1}:${DUCK_API_PORT:-8000}"
|
|
LLAMA_SCRIPT="${ROOT_DIR}/scripts/llama/start_main.sh"
|
|
QDRANT_MANAGED="${DUCK_QDRANT_MANAGED:-1}"
|
|
QDRANT_COMPOSE_FILE="${DUCK_QDRANT_COMPOSE_FILE:-${ROOT_DIR}/docker-compose.memory.yml}"
|
|
QDRANT_SERVICE="${DUCK_QDRANT_SERVICE:-qdrant}"
|
|
|
|
usage() {
|
|
cat <<'EOF'
|
|
Usage: scripts/duck.sh <command>
|
|
|
|
Commands:
|
|
start Start Qdrant, llama-server, and DuckLM API in the background
|
|
stop Stop DuckLM API, managed llama-server, and Qdrant
|
|
restart Stop and start the whole local DuckLM stack
|
|
status Print process and HTTP health status
|
|
Use "status --probe" to include live model/vector checks
|
|
logs Show DuckLM API and llama-server logs; use --follow/-f and --lines N
|
|
help Show this help
|
|
|
|
Environment:
|
|
DUCK_API_HOST API bind host, default 127.0.0.1
|
|
DUCK_API_PORT API port, default 8000
|
|
DUCK_API_PID_FILE API PID file path
|
|
DUCK_API_LOG_FILE API log file path
|
|
DUCK_API_COMMAND API command override, default ".venv/bin/python -m duck_core.api"
|
|
DUCK_QDRANT_MANAGED Set to 0 to skip Docker Compose Qdrant lifecycle
|
|
DUCK_QDRANT_COMPOSE_FILE
|
|
Compose file path, default docker-compose.memory.yml
|
|
DUCK_QDRANT_SERVICE Compose service name, default qdrant
|
|
|
|
llama-server environment is handled by scripts/llama/start_main.sh.
|
|
EOF
|
|
}
|
|
|
|
qdrant_enabled() {
|
|
[[ "${QDRANT_MANAGED}" != "0" && "${QDRANT_MANAGED,,}" != "false" && "${QDRANT_MANAGED,,}" != "no" ]]
|
|
}
|
|
|
|
docker_compose() {
|
|
docker compose -f "${QDRANT_COMPOSE_FILE}" "$@"
|
|
}
|
|
|
|
start_qdrant() {
|
|
if ! qdrant_enabled; then
|
|
echo "Qdrant management: disabled"
|
|
return 0
|
|
fi
|
|
if ! command -v docker >/dev/null 2>&1; then
|
|
echo "Docker not found; cannot start Qdrant" >&2
|
|
return 1
|
|
fi
|
|
if [[ ! -f "${QDRANT_COMPOSE_FILE}" ]]; then
|
|
echo "Qdrant compose file not found: ${QDRANT_COMPOSE_FILE}" >&2
|
|
return 1
|
|
fi
|
|
|
|
echo "Starting Qdrant..."
|
|
docker_compose up -d "${QDRANT_SERVICE}"
|
|
}
|
|
|
|
stop_qdrant() {
|
|
if ! qdrant_enabled; then
|
|
echo "Qdrant management: disabled"
|
|
return 0
|
|
fi
|
|
if ! command -v docker >/dev/null 2>&1; then
|
|
echo "Docker not found; cannot stop Qdrant" >&2
|
|
return 1
|
|
fi
|
|
if [[ ! -f "${QDRANT_COMPOSE_FILE}" ]]; then
|
|
echo "Qdrant compose file not found: ${QDRANT_COMPOSE_FILE}" >&2
|
|
return 1
|
|
fi
|
|
|
|
echo "Stopping Qdrant..."
|
|
docker_compose down
|
|
}
|
|
|
|
status_qdrant() {
|
|
if ! qdrant_enabled; then
|
|
echo "Qdrant status: disabled"
|
|
return 0
|
|
fi
|
|
if ! command -v docker >/dev/null 2>&1; then
|
|
echo "Qdrant status: Docker not found"
|
|
return 1
|
|
fi
|
|
if [[ ! -f "${QDRANT_COMPOSE_FILE}" ]]; then
|
|
echo "Qdrant status: compose file not found (${QDRANT_COMPOSE_FILE})"
|
|
return 1
|
|
fi
|
|
|
|
echo "Qdrant status:"
|
|
docker_compose ps "${QDRANT_SERVICE}"
|
|
}
|
|
|
|
api_is_running() {
|
|
[[ -f "${API_PID_FILE}" ]] || return 1
|
|
local pid
|
|
pid="$(cat "${API_PID_FILE}")"
|
|
[[ "${pid}" =~ ^[0-9]+$ ]] || return 1
|
|
kill -0 "${pid}" 2>/dev/null
|
|
}
|
|
|
|
api_pid_value() {
|
|
if [[ -f "${API_PID_FILE}" ]]; then
|
|
cat "${API_PID_FILE}"
|
|
fi
|
|
}
|
|
|
|
start_api() {
|
|
if api_is_running; then
|
|
echo "DuckLM API already running: pid=$(api_pid_value)"
|
|
return 0
|
|
fi
|
|
|
|
mkdir -p "$(dirname "${API_PID_FILE}")" "$(dirname "${API_LOG_FILE}")"
|
|
rm -f "${API_PID_FILE}"
|
|
|
|
local command_string="${DUCK_API_COMMAND:-.venv/bin/python -m duck_core.api}"
|
|
echo "Starting DuckLM API..."
|
|
echo "Command: ${command_string}" >> "${API_LOG_FILE}"
|
|
if command -v setsid >/dev/null 2>&1; then
|
|
(
|
|
cd "${ROOT_DIR}"
|
|
nohup setsid bash -lc "${command_string}" >> "${API_LOG_FILE}" 2>&1 < /dev/null &
|
|
echo "$!" > "${API_PID_FILE}"
|
|
)
|
|
else
|
|
(
|
|
cd "${ROOT_DIR}"
|
|
nohup bash -lc "${command_string}" >> "${API_LOG_FILE}" 2>&1 < /dev/null &
|
|
echo "$!" > "${API_PID_FILE}"
|
|
)
|
|
fi
|
|
|
|
sleep 0.2
|
|
if api_is_running; then
|
|
echo "DuckLM API started: pid=$(api_pid_value)"
|
|
echo "WebChat: ${API_URL}/"
|
|
echo "Log: ${API_LOG_FILE}"
|
|
for _ in {1..20}; do
|
|
if command -v curl >/dev/null 2>&1 && curl --noproxy "*" -fsS "${API_URL}/health" >/dev/null 2>&1; then
|
|
echo "DuckLM API health: ok (${API_URL})"
|
|
return 0
|
|
fi
|
|
sleep 0.25
|
|
done
|
|
echo "DuckLM API health: not ready yet (${API_URL})"
|
|
return 0
|
|
fi
|
|
|
|
echo "DuckLM API failed to start. See ${API_LOG_FILE}" >&2
|
|
rm -f "${API_PID_FILE}"
|
|
return 1
|
|
}
|
|
|
|
stop_api() {
|
|
if ! api_is_running; then
|
|
rm -f "${API_PID_FILE}"
|
|
echo "DuckLM API not running"
|
|
return 0
|
|
fi
|
|
|
|
local pid
|
|
pid="$(api_pid_value)"
|
|
echo "Stopping DuckLM API: pid=${pid}"
|
|
kill "${pid}" 2>/dev/null || true
|
|
|
|
for _ in {1..30}; do
|
|
if ! kill -0 "${pid}" 2>/dev/null; then
|
|
rm -f "${API_PID_FILE}"
|
|
echo "DuckLM API stopped"
|
|
return 0
|
|
fi
|
|
sleep 0.2
|
|
done
|
|
|
|
echo "DuckLM API did not stop after SIGTERM; sending SIGKILL"
|
|
kill -9 "${pid}" 2>/dev/null || true
|
|
rm -f "${API_PID_FILE}"
|
|
echo "DuckLM API stopped"
|
|
}
|
|
|
|
start_stack() {
|
|
start_qdrant
|
|
"${LLAMA_SCRIPT}" start
|
|
start_api
|
|
echo
|
|
echo "Status:"
|
|
status_stack
|
|
}
|
|
|
|
stop_stack() {
|
|
stop_api
|
|
"${LLAMA_SCRIPT}" stop
|
|
stop_qdrant
|
|
}
|
|
|
|
status_stack() {
|
|
local probe=0
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--probe)
|
|
probe=1
|
|
shift
|
|
;;
|
|
*)
|
|
echo "Unknown status argument: $1" >&2
|
|
return 2
|
|
;;
|
|
esac
|
|
done
|
|
|
|
local rc=0
|
|
if api_is_running; then
|
|
local pid
|
|
pid="$(api_pid_value)"
|
|
echo "DuckLM API running: pid=${pid}"
|
|
if command -v curl >/dev/null 2>&1 && curl --noproxy "*" -fsS "${API_URL}/health" >/dev/null 2>&1; then
|
|
echo "DuckLM API health: ok (${API_URL})"
|
|
print_runtime_status "${probe}"
|
|
else
|
|
echo "DuckLM API health: not ready (${API_URL})"
|
|
fi
|
|
else
|
|
if [[ -f "${API_PID_FILE}" ]]; then
|
|
echo "DuckLM API not running; removing stale pid file ${API_PID_FILE}"
|
|
rm -f "${API_PID_FILE}"
|
|
else
|
|
echo "DuckLM API not running"
|
|
fi
|
|
rc=3
|
|
fi
|
|
|
|
local llama_rc=0
|
|
"${LLAMA_SCRIPT}" status || llama_rc=$?
|
|
if [[ "${llama_rc}" != "0" && "${rc}" == "0" ]]; then
|
|
rc="${llama_rc}"
|
|
fi
|
|
local qdrant_rc=0
|
|
status_qdrant || qdrant_rc=$?
|
|
if [[ "${qdrant_rc}" != "0" && "${rc}" == "0" ]]; then
|
|
rc="${qdrant_rc}"
|
|
fi
|
|
return "${rc}"
|
|
}
|
|
|
|
print_runtime_status() {
|
|
local probe="${1:-0}"
|
|
local status_url="${API_URL}/v1/status"
|
|
if [[ "${probe}" == "1" ]]; then
|
|
status_url="${status_url}?probe=true"
|
|
fi
|
|
local payload
|
|
if ! payload="$(curl --noproxy "*" -fsS "${status_url}" 2>/dev/null)"; then
|
|
echo "DuckLM runtime status: unavailable (${status_url})"
|
|
return 0
|
|
fi
|
|
|
|
local python_bin="${DUCK_PYTHON_BIN:-${ROOT_DIR}/.venv/bin/python}"
|
|
if [[ ! -x "${python_bin}" ]]; then
|
|
python_bin="python3"
|
|
fi
|
|
if ! command -v "${python_bin}" >/dev/null 2>&1 && [[ ! -x "${python_bin}" ]]; then
|
|
echo "DuckLM runtime status: ${status_url}"
|
|
return 0
|
|
fi
|
|
|
|
printf '%s' "${payload}" | "${python_bin}" -c '
|
|
import json
|
|
import sys
|
|
|
|
data = json.load(sys.stdin)
|
|
models = data.get("models", {})
|
|
roles = sorted((models.get("roles") or {}).keys())
|
|
services = data.get("services", {})
|
|
llama = services.get("llama", {})
|
|
vector = services.get("vector_memory", {})
|
|
|
|
def service_line(name, service):
|
|
if not service.get("probed"):
|
|
return f"{name}: not probed"
|
|
ok = "ok" if service.get("ok") else "failed"
|
|
error = service.get("error")
|
|
return f"{name}: {ok}" + (f" ({error})" if error else "")
|
|
|
|
print("DuckLM runtime:")
|
|
print(" workspace: {}".format(data.get("workspace")))
|
|
print(" db: {}".format(data.get("db_path")))
|
|
print(" model endpoints: {}".format(len(models.get("endpoints") or [])))
|
|
print(" roles: {}".format(", ".join(roles)))
|
|
print(" {}".format(service_line("llama", llama)))
|
|
print(" {}".format(service_line("vector memory", vector)))
|
|
'
|
|
}
|
|
|
|
logs_stack() {
|
|
local follow=0
|
|
local lines=100
|
|
shift || true
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
-f|--follow)
|
|
follow=1
|
|
shift
|
|
;;
|
|
--lines)
|
|
lines="${2:?--lines requires a value}"
|
|
shift 2
|
|
;;
|
|
*)
|
|
echo "Unknown logs argument: $1" >&2
|
|
return 2
|
|
;;
|
|
esac
|
|
done
|
|
|
|
mkdir -p "$(dirname "${API_LOG_FILE}")"
|
|
touch "${API_LOG_FILE}"
|
|
if [[ "${follow}" == "1" ]]; then
|
|
local llama_log_file="${DUCK_LLAMA_LOG_FILE:-${ROOT_DIR}/data/llama-main.log}"
|
|
mkdir -p "$(dirname "${llama_log_file}")"
|
|
touch "${llama_log_file}"
|
|
tail -n "${lines}" -f "${API_LOG_FILE}" "${llama_log_file}"
|
|
else
|
|
echo "==> DuckLM API log: ${API_LOG_FILE} <=="
|
|
tail -n "${lines}" "${API_LOG_FILE}"
|
|
echo
|
|
echo "==> llama-server log: ${DUCK_LLAMA_LOG_FILE:-${ROOT_DIR}/data/llama-main.log} <=="
|
|
"${LLAMA_SCRIPT}" logs --lines "${lines}"
|
|
echo
|
|
echo "==> Qdrant log <=="
|
|
if qdrant_enabled && command -v docker >/dev/null 2>&1 && [[ -f "${QDRANT_COMPOSE_FILE}" ]]; then
|
|
docker_compose logs --tail "${lines}" "${QDRANT_SERVICE}"
|
|
else
|
|
status_qdrant || true
|
|
fi
|
|
fi
|
|
}
|
|
|
|
case "${ACTION}" in
|
|
start)
|
|
start_stack
|
|
;;
|
|
stop)
|
|
stop_stack
|
|
;;
|
|
restart)
|
|
stop_stack
|
|
start_stack
|
|
;;
|
|
status)
|
|
shift || true
|
|
status_stack "$@"
|
|
;;
|
|
logs)
|
|
logs_stack "$@"
|
|
;;
|
|
help|-h|--help)
|
|
usage
|
|
;;
|
|
*)
|
|
echo "Unknown command: ${ACTION}" >&2
|
|
usage >&2
|
|
exit 2
|
|
;;
|
|
esac
|