ducklm/scripts/llama/start_main.sh

263 lines
5.9 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
ENV_KEYS=(
DUCK_LLAMA_SERVER_BIN
DUCK_MAIN_MODEL_PATH
DUCK_MAIN_PORT
DUCK_CTX_SIZE
DUCK_N_GPU_LAYERS
DUCK_LLAMA_DEVICE
DUCK_PARALLEL
DUCK_LLAMA_PID_FILE
DUCK_LLAMA_LOG_FILE
DUCK_LLAMA_EXTRA_ARGS
DUCK_HOST
)
declare -A ENV_OVERRIDES=()
for key in "${ENV_KEYS[@]}"; do
if [[ -v "${key}" ]]; then
ENV_OVERRIDES["${key}"]="${!key}"
fi
done
if [[ -f "${ROOT_DIR}/.env" ]]; then
set -a
# shellcheck disable=SC1091
source "${ROOT_DIR}/.env"
set +a
fi
for key in "${!ENV_OVERRIDES[@]}"; do
export "${key}=${ENV_OVERRIDES[${key}]}"
done
ACTION="${1:-start}"
PID_FILE="${DUCK_LLAMA_PID_FILE:-${ROOT_DIR}/data/llama-main.pid}"
LOG_FILE="${DUCK_LLAMA_LOG_FILE:-${ROOT_DIR}/data/llama-main.log}"
BASE_URL="http://${DUCK_HOST:-127.0.0.1}:${DUCK_MAIN_PORT:-8081}/v1"
LLAMA_BIN_DIR=""
resolve_project_path() {
local value="$1"
if [[ "${value}" == /* ]]; then
printf '%s\n' "${value}"
else
printf '%s\n' "${ROOT_DIR}/${value#./}"
fi
}
usage() {
cat <<'EOF'
Usage: scripts/llama/start_main.sh <command>
Commands:
start Start llama-server in the background
stop Stop the managed llama-server process
restart Stop and start llama-server
status Print process and HTTP health status
logs Show logs; use --follow/-f and --lines N
help Show this help
Environment:
DUCK_LLAMA_SERVER_BIN Path to llama-server binary
DUCK_MAIN_MODEL_PATH Path to GGUF model
DUCK_HOST Bind host, default 127.0.0.1
DUCK_MAIN_PORT Port, default 8081
DUCK_CTX_SIZE Context size, default 65536
DUCK_N_GPU_LAYERS GPU layers, default auto
DUCK_LLAMA_DEVICE Device name, for example Vulkan0
DUCK_PARALLEL Server slots, default 1
DUCK_LLAMA_PID_FILE PID file path
DUCK_LLAMA_LOG_FILE Log file path
DUCK_LLAMA_EXTRA_ARGS Extra llama-server args
EOF
}
is_running() {
[[ -f "${PID_FILE}" ]] || return 1
local pid
pid="$(cat "${PID_FILE}")"
[[ "${pid}" =~ ^[0-9]+$ ]] || return 1
kill -0 "${pid}" 2>/dev/null
}
pid_value() {
if [[ -f "${PID_FILE}" ]]; then
cat "${PID_FILE}"
fi
}
status() {
if is_running; then
local pid
pid="$(pid_value)"
echo "llama-server running: pid=${pid}"
if command -v curl >/dev/null 2>&1 && curl --noproxy "*" -fsS "${BASE_URL}/models" >/dev/null 2>&1; then
echo "HTTP health: ok (${BASE_URL})"
else
echo "HTTP health: not ready (${BASE_URL})"
fi
return 0
fi
if [[ -f "${PID_FILE}" ]]; then
echo "llama-server not running; removing stale pid file ${PID_FILE}"
rm -f "${PID_FILE}"
else
echo "llama-server not running"
fi
return 3
}
start() {
if is_running; then
echo "llama-server already running: pid=$(pid_value)"
return 0
fi
: "${DUCK_MAIN_MODEL_PATH:?DUCK_MAIN_MODEL_PATH is required}"
mkdir -p "$(dirname "${PID_FILE}")" "$(dirname "${LOG_FILE}")"
rm -f "${PID_FILE}"
local llama_bin model_path
llama_bin="${DUCK_LLAMA_SERVER_BIN:-llama-server}"
if [[ "${llama_bin}" == */* ]]; then
llama_bin="$(resolve_project_path "${llama_bin}")"
LLAMA_BIN_DIR="$(dirname "${llama_bin}")"
fi
model_path="$(resolve_project_path "${DUCK_MAIN_MODEL_PATH}")"
local command=(
"${llama_bin}"
-m "${model_path}"
--alias local-main
--host "${DUCK_HOST:-127.0.0.1}"
--port "${DUCK_MAIN_PORT:-8081}"
-c "${DUCK_CTX_SIZE:-65536}"
--parallel "${DUCK_PARALLEL:-1}"
-ngl "${DUCK_N_GPU_LAYERS:-auto}"
--flash-attn on
--cache-prompt
--metrics
)
if [[ -n "${DUCK_LLAMA_DEVICE:-}" ]]; then
command+=(--device "${DUCK_LLAMA_DEVICE}")
fi
if [[ -n "${DUCK_LLAMA_EXTRA_ARGS:-}" ]]; then
# shellcheck disable=SC2206
local extra_args=( ${DUCK_LLAMA_EXTRA_ARGS} )
command+=("${extra_args[@]}")
fi
echo "Starting llama-server..."
echo "Command: ${command[*]}" >> "${LOG_FILE}"
if command -v setsid >/dev/null 2>&1; then
nohup setsid env LD_LIBRARY_PATH="${LLAMA_BIN_DIR}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" "${command[@]}" >> "${LOG_FILE}" 2>&1 &
else
nohup env LD_LIBRARY_PATH="${LLAMA_BIN_DIR}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" "${command[@]}" >> "${LOG_FILE}" 2>&1 &
fi
local pid=$!
echo "${pid}" > "${PID_FILE}"
sleep 0.2
if is_running; then
echo "llama-server started: pid=${pid}"
echo "Log: ${LOG_FILE}"
return 0
fi
echo "llama-server failed to start. See ${LOG_FILE}" >&2
rm -f "${PID_FILE}"
return 1
}
stop() {
if ! is_running; then
rm -f "${PID_FILE}"
echo "llama-server not running"
return 0
fi
local pid
pid="$(pid_value)"
echo "Stopping llama-server: pid=${pid}"
kill "${pid}" 2>/dev/null || true
for _ in {1..30}; do
if ! kill -0 "${pid}" 2>/dev/null; then
rm -f "${PID_FILE}"
echo "llama-server stopped"
return 0
fi
sleep 0.2
done
echo "llama-server did not stop after SIGTERM; sending SIGKILL"
kill -9 "${pid}" 2>/dev/null || true
rm -f "${PID_FILE}"
echo "llama-server stopped"
}
restart() {
stop
start
}
logs() {
local follow=0
local lines=100
shift || true
while [[ $# -gt 0 ]]; do
case "$1" in
-f|--follow)
follow=1
shift
;;
--lines)
lines="${2:?--lines requires a value}"
shift 2
;;
*)
echo "Unknown logs argument: $1" >&2
return 2
;;
esac
done
mkdir -p "$(dirname "${LOG_FILE}")"
touch "${LOG_FILE}"
if [[ "${follow}" == "1" ]]; then
tail -n "${lines}" -f "${LOG_FILE}"
else
tail -n "${lines}" "${LOG_FILE}"
fi
}
case "${ACTION}" in
start)
start
;;
stop)
stop
;;
restart)
restart
;;
status)
status
;;
logs)
logs "$@"
;;
help|-h|--help)
usage
;;
*)
echo "Unknown command: ${ACTION}" >&2
usage >&2
exit 2
;;
esac