ducklm/scripts/llama/start_thinker_mtp_experimen...

118 lines
2.5 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
ENV_KEYS=(
DUCK_LLAMA_SERVER_BIN
DUCK_MAIN_MODEL_PATH
DUCK_MTP_MODEL_PATH
DUCK_MAIN_MTP_PORT
DUCK_CTX_SIZE
DUCK_N_GPU_LAYERS
DUCK_LLAMA_DEVICE
DUCK_PARALLEL
DUCK_MTP_FLAGS
DUCK_HOST
)
declare -A ENV_OVERRIDES=()
for key in "${ENV_KEYS[@]}"; do
if [[ -v "${key}" ]]; then
ENV_OVERRIDES["${key}"]="${!key}"
fi
done
if [[ -f "${ROOT_DIR}/.env" ]]; then
set -a
# shellcheck disable=SC1091
source "${ROOT_DIR}/.env"
set +a
fi
for key in "${!ENV_OVERRIDES[@]}"; do
export "${key}=${ENV_OVERRIDES[${key}]}"
done
: "${DUCK_MAIN_MODEL_PATH:?DUCK_MAIN_MODEL_PATH is required}"
ACTION="${1:-start}"
usage() {
cat <<'EOF'
Usage: scripts/llama/start_thinker_mtp_experimental.sh <command>
Commands:
start Start experimental MTP/speculative llama-server in foreground
check Check whether the current llama-server binary exposes draft-mtp flags
help Show this help
EOF
}
resolve_project_path() {
local value="$1"
if [[ "${value}" == /* ]]; then
printf '%s\n' "${value}"
else
printf '%s\n' "${ROOT_DIR}/${value#./}"
fi
}
LLAMA_BIN="${DUCK_LLAMA_SERVER_BIN:-llama-server}"
if [[ "${LLAMA_BIN}" == */* ]]; then
LLAMA_BIN="$(resolve_project_path "${LLAMA_BIN}")"
fi
MAIN_MODEL_PATH="$(resolve_project_path "${DUCK_MAIN_MODEL_PATH}")"
HELP_TEXT="$("${LLAMA_BIN}" --help 2>&1 || true)"
if ! grep -qi "draft-mtp" <<< "${HELP_TEXT}"; then
echo "This llama-server build does not expose draft-mtp speculative decoding."
exit 1
fi
case "${ACTION}" in
check)
echo "OK: draft-mtp speculative decoding is exposed by ${LLAMA_BIN}"
exit 0
;;
help|-h|--help)
usage
exit 0
;;
start)
;;
*)
echo "Unknown command: ${ACTION}" >&2
usage >&2
exit 2
;;
esac
command=(
"${LLAMA_BIN}"
-m "${MAIN_MODEL_PATH}"
--alias local-main-mtp
--host "${DUCK_HOST:-127.0.0.1}"
--port "${DUCK_MAIN_MTP_PORT:-8085}"
-c "${DUCK_CTX_SIZE:-65536}"
--parallel "${DUCK_PARALLEL:-1}"
-ngl "${DUCK_N_GPU_LAYERS:-auto}"
--flash-attn on
--cache-prompt
--metrics
--spec-type draft-mtp
)
if [[ -n "${DUCK_LLAMA_DEVICE:-}" ]]; then
command+=(--device "${DUCK_LLAMA_DEVICE}")
fi
if [[ -n "${DUCK_MTP_MODEL_PATH:-}" ]]; then
command+=(--model-draft "$(resolve_project_path "${DUCK_MTP_MODEL_PATH}")")
fi
if [[ -n "${DUCK_MTP_FLAGS:-}" ]]; then
# shellcheck disable=SC2206
extra_args=( ${DUCK_MTP_FLAGS} )
command+=("${extra_args[@]}")
fi
exec "${command[@]}"