#!/usr/bin/env bash set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" ENV_KEYS=( DUCK_LLAMA_SERVER_BIN DUCK_MAIN_MODEL_PATH DUCK_MTP_MODEL_PATH DUCK_MAIN_MTP_PORT DUCK_CTX_SIZE DUCK_N_GPU_LAYERS DUCK_LLAMA_DEVICE DUCK_PARALLEL DUCK_MTP_FLAGS DUCK_HOST ) declare -A ENV_OVERRIDES=() for key in "${ENV_KEYS[@]}"; do if [[ -v "${key}" ]]; then ENV_OVERRIDES["${key}"]="${!key}" fi done if [[ -f "${ROOT_DIR}/.env" ]]; then set -a # shellcheck disable=SC1091 source "${ROOT_DIR}/.env" set +a fi for key in "${!ENV_OVERRIDES[@]}"; do export "${key}=${ENV_OVERRIDES[${key}]}" done : "${DUCK_MAIN_MODEL_PATH:?DUCK_MAIN_MODEL_PATH is required}" ACTION="${1:-start}" usage() { cat <<'EOF' Usage: scripts/llama/start_thinker_mtp_experimental.sh Commands: start Start experimental MTP/speculative llama-server in foreground check Check whether the current llama-server binary exposes draft-mtp flags help Show this help EOF } resolve_project_path() { local value="$1" if [[ "${value}" == /* ]]; then printf '%s\n' "${value}" else printf '%s\n' "${ROOT_DIR}/${value#./}" fi } LLAMA_BIN="${DUCK_LLAMA_SERVER_BIN:-llama-server}" if [[ "${LLAMA_BIN}" == */* ]]; then LLAMA_BIN="$(resolve_project_path "${LLAMA_BIN}")" fi MAIN_MODEL_PATH="$(resolve_project_path "${DUCK_MAIN_MODEL_PATH}")" HELP_TEXT="$("${LLAMA_BIN}" --help 2>&1 || true)" if ! grep -qi "draft-mtp" <<< "${HELP_TEXT}"; then echo "This llama-server build does not expose draft-mtp speculative decoding." exit 1 fi case "${ACTION}" in check) echo "OK: draft-mtp speculative decoding is exposed by ${LLAMA_BIN}" exit 0 ;; help|-h|--help) usage exit 0 ;; start) ;; *) echo "Unknown command: ${ACTION}" >&2 usage >&2 exit 2 ;; esac command=( "${LLAMA_BIN}" -m "${MAIN_MODEL_PATH}" --alias local-main-mtp --host "${DUCK_HOST:-127.0.0.1}" --port "${DUCK_MAIN_MTP_PORT:-8085}" -c "${DUCK_CTX_SIZE:-65536}" --parallel "${DUCK_PARALLEL:-1}" -ngl "${DUCK_N_GPU_LAYERS:-auto}" --flash-attn on --cache-prompt --metrics --spec-type draft-mtp ) if [[ -n "${DUCK_LLAMA_DEVICE:-}" ]]; then command+=(--device "${DUCK_LLAMA_DEVICE}") fi if [[ -n "${DUCK_MTP_MODEL_PATH:-}" ]]; then command+=(--model-draft "$(resolve_project_path "${DUCK_MTP_MODEL_PATH}")") fi if [[ -n "${DUCK_MTP_FLAGS:-}" ]]; then # shellcheck disable=SC2206 extra_args=( ${DUCK_MTP_FLAGS} ) command+=("${extra_args[@]}") fi exec "${command[@]}"