#!/usr/bin/env bash set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" ENV_KEYS=( DUCK_LLAMA_SERVER_BIN DUCK_MAIN_MODEL_PATH DUCK_MAIN_PORT DUCK_CTX_SIZE DUCK_N_GPU_LAYERS DUCK_LLAMA_DEVICE DUCK_PARALLEL DUCK_LLAMA_PID_FILE DUCK_LLAMA_LOG_FILE DUCK_LLAMA_EXTRA_ARGS DUCK_HOST ) declare -A ENV_OVERRIDES=() for key in "${ENV_KEYS[@]}"; do if [[ -v "${key}" ]]; then ENV_OVERRIDES["${key}"]="${!key}" fi done if [[ -f "${ROOT_DIR}/.env" ]]; then set -a # shellcheck disable=SC1091 source "${ROOT_DIR}/.env" set +a fi for key in "${!ENV_OVERRIDES[@]}"; do export "${key}=${ENV_OVERRIDES[${key}]}" done ACTION="${1:-start}" PID_FILE="${DUCK_LLAMA_PID_FILE:-${ROOT_DIR}/data/llama-main.pid}" LOG_FILE="${DUCK_LLAMA_LOG_FILE:-${ROOT_DIR}/data/llama-main.log}" BASE_URL="http://${DUCK_HOST:-127.0.0.1}:${DUCK_MAIN_PORT:-8081}/v1" LLAMA_BIN_DIR="" resolve_project_path() { local value="$1" if [[ "${value}" == /* ]]; then printf '%s\n' "${value}" else printf '%s\n' "${ROOT_DIR}/${value#./}" fi } usage() { cat <<'EOF' Usage: scripts/llama/start_main.sh Commands: start Start llama-server in the background stop Stop the managed llama-server process restart Stop and start llama-server status Print process and HTTP health status logs Show logs; use --follow/-f and --lines N help Show this help Environment: DUCK_LLAMA_SERVER_BIN Path to llama-server binary DUCK_MAIN_MODEL_PATH Path to GGUF model DUCK_HOST Bind host, default 127.0.0.1 DUCK_MAIN_PORT Port, default 8081 DUCK_CTX_SIZE Context size, default 65536 DUCK_N_GPU_LAYERS GPU layers, default auto DUCK_LLAMA_DEVICE Device name, for example Vulkan0 DUCK_PARALLEL Server slots, default 1 DUCK_LLAMA_PID_FILE PID file path DUCK_LLAMA_LOG_FILE Log file path DUCK_LLAMA_EXTRA_ARGS Extra llama-server args EOF } is_running() { [[ -f "${PID_FILE}" ]] || return 1 local pid pid="$(cat "${PID_FILE}")" [[ "${pid}" =~ ^[0-9]+$ ]] || return 1 kill -0 "${pid}" 2>/dev/null } pid_value() { if [[ -f "${PID_FILE}" ]]; then cat "${PID_FILE}" fi } status() { if is_running; then local pid pid="$(pid_value)" echo "llama-server running: pid=${pid}" if command -v curl >/dev/null 2>&1 && curl --noproxy "*" -fsS "${BASE_URL}/models" >/dev/null 2>&1; then echo "HTTP health: ok (${BASE_URL})" else echo "HTTP health: not ready (${BASE_URL})" fi return 0 fi if [[ -f "${PID_FILE}" ]]; then echo "llama-server not running; removing stale pid file ${PID_FILE}" rm -f "${PID_FILE}" else echo "llama-server not running" fi return 3 } start() { if is_running; then echo "llama-server already running: pid=$(pid_value)" return 0 fi : "${DUCK_MAIN_MODEL_PATH:?DUCK_MAIN_MODEL_PATH is required}" mkdir -p "$(dirname "${PID_FILE}")" "$(dirname "${LOG_FILE}")" rm -f "${PID_FILE}" local llama_bin model_path llama_bin="${DUCK_LLAMA_SERVER_BIN:-llama-server}" if [[ "${llama_bin}" == */* ]]; then llama_bin="$(resolve_project_path "${llama_bin}")" LLAMA_BIN_DIR="$(dirname "${llama_bin}")" fi model_path="$(resolve_project_path "${DUCK_MAIN_MODEL_PATH}")" local command=( "${llama_bin}" -m "${model_path}" --alias local-main --host "${DUCK_HOST:-127.0.0.1}" --port "${DUCK_MAIN_PORT:-8081}" -c "${DUCK_CTX_SIZE:-65536}" --parallel "${DUCK_PARALLEL:-1}" -ngl "${DUCK_N_GPU_LAYERS:-auto}" --flash-attn on --cache-prompt --metrics ) if [[ -n "${DUCK_LLAMA_DEVICE:-}" ]]; then command+=(--device "${DUCK_LLAMA_DEVICE}") fi if [[ -n "${DUCK_LLAMA_EXTRA_ARGS:-}" ]]; then # shellcheck disable=SC2206 local extra_args=( ${DUCK_LLAMA_EXTRA_ARGS} ) command+=("${extra_args[@]}") fi echo "Starting llama-server..." echo "Command: ${command[*]}" >> "${LOG_FILE}" if command -v setsid >/dev/null 2>&1; then nohup setsid env LD_LIBRARY_PATH="${LLAMA_BIN_DIR}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" "${command[@]}" >> "${LOG_FILE}" 2>&1 & else nohup env LD_LIBRARY_PATH="${LLAMA_BIN_DIR}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" "${command[@]}" >> "${LOG_FILE}" 2>&1 & fi local pid=$! echo "${pid}" > "${PID_FILE}" sleep 0.2 if is_running; then echo "llama-server started: pid=${pid}" echo "Log: ${LOG_FILE}" return 0 fi echo "llama-server failed to start. See ${LOG_FILE}" >&2 rm -f "${PID_FILE}" return 1 } stop() { if ! is_running; then rm -f "${PID_FILE}" echo "llama-server not running" return 0 fi local pid pid="$(pid_value)" echo "Stopping llama-server: pid=${pid}" kill "${pid}" 2>/dev/null || true for _ in {1..30}; do if ! kill -0 "${pid}" 2>/dev/null; then rm -f "${PID_FILE}" echo "llama-server stopped" return 0 fi sleep 0.2 done echo "llama-server did not stop after SIGTERM; sending SIGKILL" kill -9 "${pid}" 2>/dev/null || true rm -f "${PID_FILE}" echo "llama-server stopped" } restart() { stop start } logs() { local follow=0 local lines=100 shift || true while [[ $# -gt 0 ]]; do case "$1" in -f|--follow) follow=1 shift ;; --lines) lines="${2:?--lines requires a value}" shift 2 ;; *) echo "Unknown logs argument: $1" >&2 return 2 ;; esac done mkdir -p "$(dirname "${LOG_FILE}")" touch "${LOG_FILE}" if [[ "${follow}" == "1" ]]; then tail -n "${lines}" -f "${LOG_FILE}" else tail -n "${lines}" "${LOG_FILE}" fi } case "${ACTION}" in start) start ;; stop) stop ;; restart) restart ;; status) status ;; logs) logs "$@" ;; help|-h|--help) usage ;; *) echo "Unknown command: ${ACTION}" >&2 usage >&2 exit 2 ;; esac