272 lines
6.2 KiB
Bash
Executable File
272 lines
6.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
|
|
ENV_KEYS=(
|
|
DUCK_LLAMA_SERVER_BIN
|
|
DUCK_MTP_MODEL_PATH
|
|
DUCK_MAIN_PORT
|
|
DUCK_CTX_SIZE
|
|
DUCK_N_GPU_LAYERS
|
|
DUCK_LLAMA_DEVICE
|
|
DUCK_PARALLEL
|
|
DUCK_LLAMA_PID_FILE
|
|
DUCK_LLAMA_LOG_FILE
|
|
DUCK_MTP_FLAGS
|
|
DUCK_HOST
|
|
)
|
|
declare -A ENV_OVERRIDES=()
|
|
for key in "${ENV_KEYS[@]}"; do
|
|
if [[ -v "${key}" ]]; then
|
|
ENV_OVERRIDES["${key}"]="${!key}"
|
|
fi
|
|
done
|
|
|
|
if [[ -f "${ROOT_DIR}/.env" ]]; then
|
|
set -a
|
|
# shellcheck disable=SC1091
|
|
source "${ROOT_DIR}/.env"
|
|
set +a
|
|
fi
|
|
for key in "${!ENV_OVERRIDES[@]}"; do
|
|
export "${key}=${ENV_OVERRIDES[${key}]}"
|
|
done
|
|
|
|
ACTION="${1:-start}"
|
|
PID_FILE="${DUCK_LLAMA_PID_FILE:-${ROOT_DIR}/data/llama-mtp.pid}"
|
|
LOG_FILE="${DUCK_LLAMA_LOG_FILE:-${ROOT_DIR}/data/llama-mtp.log}"
|
|
BASE_URL="http://${DUCK_HOST:-127.0.0.1}:${DUCK_MAIN_PORT:-8081}/v1"
|
|
LLAMA_BIN_DIR=""
|
|
|
|
resolve_project_path() {
|
|
local value="$1"
|
|
if [[ "${value}" == /* ]]; then
|
|
printf '%s\n' "${value}"
|
|
else
|
|
printf '%s\n' "${ROOT_DIR}/${value#./}"
|
|
fi
|
|
}
|
|
|
|
usage() {
|
|
cat <<'EOF'
|
|
Usage: scripts/llama/start_mtp_main.sh <command>
|
|
|
|
Commands:
|
|
start Start MTP llama-server in the background
|
|
stop Stop the managed MTP llama-server process
|
|
restart Stop and start MTP llama-server
|
|
status Print process and HTTP health status
|
|
logs Show logs; use --follow/-f and --lines N
|
|
help Show this help
|
|
|
|
Environment:
|
|
DUCK_LLAMA_SERVER_BIN Path to llama-server binary
|
|
DUCK_MTP_MODEL_PATH Path to MTP GGUF model
|
|
DUCK_HOST Bind host, default 127.0.0.1
|
|
DUCK_MAIN_PORT Port, default 8081
|
|
DUCK_CTX_SIZE Context size, default 65536
|
|
DUCK_N_GPU_LAYERS GPU layers, default auto
|
|
DUCK_LLAMA_DEVICE Device name, for example Vulkan0
|
|
DUCK_PARALLEL Server slots, default 1
|
|
DUCK_LLAMA_PID_FILE PID file path
|
|
DUCK_LLAMA_LOG_FILE Log file path
|
|
DUCK_MTP_FLAGS Extra MTP llama-server args
|
|
EOF
|
|
}
|
|
|
|
is_running() {
|
|
[[ -f "${PID_FILE}" ]] || return 1
|
|
local pid
|
|
pid="$(cat "${PID_FILE}")"
|
|
[[ "${pid}" =~ ^[0-9]+$ ]] || return 1
|
|
kill -0 "${pid}" 2>/dev/null
|
|
}
|
|
|
|
pid_value() {
|
|
if [[ -f "${PID_FILE}" ]]; then
|
|
cat "${PID_FILE}"
|
|
fi
|
|
}
|
|
|
|
status() {
|
|
if is_running; then
|
|
local pid
|
|
pid="$(pid_value)"
|
|
echo "llama-server running: pid=${pid}"
|
|
if command -v curl >/dev/null 2>&1 && curl --noproxy "*" -fsS "${BASE_URL}/models" >/dev/null 2>&1; then
|
|
echo "HTTP health: ok (${BASE_URL})"
|
|
else
|
|
echo "HTTP health: not ready (${BASE_URL})"
|
|
fi
|
|
return 0
|
|
fi
|
|
|
|
if [[ -f "${PID_FILE}" ]]; then
|
|
echo "llama-server not running; removing stale pid file ${PID_FILE}"
|
|
rm -f "${PID_FILE}"
|
|
else
|
|
echo "llama-server not running"
|
|
fi
|
|
return 3
|
|
}
|
|
|
|
start() {
|
|
if is_running; then
|
|
echo "MTP llama-server already running: pid=$(pid_value)"
|
|
return 0
|
|
fi
|
|
|
|
: "${DUCK_MTP_MODEL_PATH:?DUCK_MTP_MODEL_PATH is required}"
|
|
|
|
mkdir -p "$(dirname "${PID_FILE}")" "$(dirname "${LOG_FILE}")"
|
|
rm -f "${PID_FILE}"
|
|
|
|
local llama_bin mtp_model_path
|
|
llama_bin="${DUCK_LLAMA_SERVER_BIN:-llama-server}"
|
|
if [[ "${llama_bin}" == */* ]]; then
|
|
llama_bin="$(resolve_project_path "${llama_bin}")"
|
|
LLAMA_BIN_DIR="$(dirname "${llama_bin}")"
|
|
fi
|
|
mtp_model_path="$(resolve_project_path "${DUCK_MTP_MODEL_PATH}")"
|
|
|
|
local help_text
|
|
help_text="$(LD_LIBRARY_PATH="${LLAMA_BIN_DIR}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" "${llama_bin}" --help 2>&1 || true)"
|
|
if ! grep -qi "draft-mtp" <<< "${help_text}"; then
|
|
echo "This llama-server build does not expose draft-mtp speculative decoding." >&2
|
|
return 1
|
|
fi
|
|
|
|
local command=(
|
|
"${llama_bin}"
|
|
-m "${mtp_model_path}"
|
|
--alias local-main
|
|
--host "${DUCK_HOST:-127.0.0.1}"
|
|
--port "${DUCK_MAIN_PORT:-8081}"
|
|
-c "${DUCK_CTX_SIZE:-65536}"
|
|
--parallel "${DUCK_PARALLEL:-1}"
|
|
-ngl "${DUCK_N_GPU_LAYERS:-auto}"
|
|
--flash-attn on
|
|
--cache-prompt
|
|
--metrics
|
|
--spec-type draft-mtp
|
|
)
|
|
if [[ -n "${DUCK_LLAMA_DEVICE:-}" ]]; then
|
|
command+=(--device "${DUCK_LLAMA_DEVICE}")
|
|
fi
|
|
if [[ -n "${DUCK_MTP_FLAGS:-}" ]]; then
|
|
# shellcheck disable=SC2206
|
|
local extra_args=( ${DUCK_MTP_FLAGS} )
|
|
command+=("${extra_args[@]}")
|
|
fi
|
|
|
|
echo "Starting MTP llama-server..."
|
|
echo "Command: ${command[*]}" >> "${LOG_FILE}"
|
|
if command -v setsid >/dev/null 2>&1; then
|
|
nohup setsid env LD_LIBRARY_PATH="${LLAMA_BIN_DIR}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" "${command[@]}" >> "${LOG_FILE}" 2>&1 &
|
|
else
|
|
nohup env LD_LIBRARY_PATH="${LLAMA_BIN_DIR}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" "${command[@]}" >> "${LOG_FILE}" 2>&1 &
|
|
fi
|
|
local pid=$!
|
|
echo "${pid}" > "${PID_FILE}"
|
|
sleep 0.2
|
|
|
|
if is_running; then
|
|
echo "MTP llama-server started: pid=${pid}"
|
|
echo "Log: ${LOG_FILE}"
|
|
return 0
|
|
fi
|
|
|
|
echo "MTP llama-server failed to start. See ${LOG_FILE}" >&2
|
|
rm -f "${PID_FILE}"
|
|
return 1
|
|
}
|
|
|
|
stop() {
|
|
if ! is_running; then
|
|
rm -f "${PID_FILE}"
|
|
echo "llama-server not running"
|
|
return 0
|
|
fi
|
|
|
|
local pid
|
|
pid="$(pid_value)"
|
|
echo "Stopping MTP llama-server: pid=${pid}"
|
|
kill "${pid}" 2>/dev/null || true
|
|
|
|
for _ in {1..30}; do
|
|
if ! kill -0 "${pid}" 2>/dev/null; then
|
|
rm -f "${PID_FILE}"
|
|
echo "MTP llama-server stopped"
|
|
return 0
|
|
fi
|
|
sleep 0.2
|
|
done
|
|
|
|
echo "MTP llama-server did not stop after SIGTERM; sending SIGKILL"
|
|
kill -9 "${pid}" 2>/dev/null || true
|
|
rm -f "${PID_FILE}"
|
|
echo "MTP llama-server stopped"
|
|
}
|
|
|
|
restart() {
|
|
stop
|
|
start
|
|
}
|
|
|
|
logs() {
|
|
local follow=0
|
|
local lines=100
|
|
shift || true
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
-f|--follow)
|
|
follow=1
|
|
shift
|
|
;;
|
|
--lines)
|
|
lines="${2:?--lines requires a value}"
|
|
shift 2
|
|
;;
|
|
*)
|
|
echo "Unknown logs argument: $1" >&2
|
|
return 2
|
|
;;
|
|
esac
|
|
done
|
|
|
|
mkdir -p "$(dirname "${LOG_FILE}")"
|
|
touch "${LOG_FILE}"
|
|
if [[ "${follow}" == "1" ]]; then
|
|
tail -n "${lines}" -f "${LOG_FILE}"
|
|
else
|
|
tail -n "${lines}" "${LOG_FILE}"
|
|
fi
|
|
}
|
|
|
|
case "${ACTION}" in
|
|
start)
|
|
start
|
|
;;
|
|
stop)
|
|
stop
|
|
;;
|
|
restart)
|
|
restart
|
|
;;
|
|
status)
|
|
status
|
|
;;
|
|
logs)
|
|
logs "$@"
|
|
;;
|
|
help|-h|--help)
|
|
usage
|
|
;;
|
|
*)
|
|
echo "Unknown command: ${ACTION}" >&2
|
|
usage >&2
|
|
exit 2
|
|
;;
|
|
esac
|