import os import subprocess import textwrap import time from pathlib import Path def test_duck_script_manages_llama_and_api(tmp_path): fake_bin = tmp_path / "llama-server" fake_bin.write_text( textwrap.dedent( """\ #!/usr/bin/env bash echo "fake llama-server $*" >&2 trap 'exit 0' TERM INT while true; do sleep 1; done """ ) ) fake_bin.chmod(0o755) fake_api = tmp_path / "fake-api.sh" fake_api.write_text( textwrap.dedent( """\ #!/usr/bin/env bash echo "fake duck api $*" >&2 trap 'exit 0' TERM INT while true; do sleep 1; done """ ) ) fake_api.chmod(0o755) pid_file = tmp_path / "llama.pid" log_file = tmp_path / "llama.log" api_pid_file = tmp_path / "duck-api.pid" api_log_file = tmp_path / "duck-api.log" model_path = tmp_path / "model.gguf" model_path.write_text("fake") env = { **os.environ, "DUCK_LLAMA_SERVER_BIN": str(fake_bin), "DUCK_MAIN_MODEL_PATH": str(model_path), "DUCK_LLAMA_PID_FILE": str(pid_file), "DUCK_LLAMA_LOG_FILE": str(log_file), "DUCK_MAIN_PORT": "18081", "DUCK_API_PID_FILE": str(api_pid_file), "DUCK_API_LOG_FILE": str(api_log_file), "DUCK_API_COMMAND": str(fake_api), "DUCK_API_PORT": "18000", "DUCK_QDRANT_MANAGED": "0", } script = "scripts/duck.sh" stopped = subprocess.run([script, "status"], env=env, text=True, capture_output=True) assert stopped.returncode == 3 assert "DuckLM API not running" in stopped.stdout assert "llama-server not running" in stopped.stdout started = subprocess.run([script, "start"], env=env, text=True, capture_output=True) assert started.returncode == 0 assert "Starting llama-server" in started.stdout assert "Starting DuckLM API" in started.stdout assert "Status:" in started.stdout assert "DuckLM API running" in started.stdout assert "llama-server running" in started.stdout assert api_pid_file.exists() assert pid_file.exists() try: running = subprocess.run([script, "status"], env=env, text=True, capture_output=True) assert running.returncode == 0 assert "DuckLM API running" in running.stdout assert "llama-server running" in running.stdout time.sleep(0.2) logs = subprocess.run( [script, "logs", "--lines", "20"], env=env, text=True, capture_output=True ) assert logs.returncode == 0 assert "DuckLM API log" in logs.stdout assert "fake duck api" in logs.stdout assert "llama-server log" in logs.stdout assert "--alias local-main" in logs.stdout finally: stopped = subprocess.run([script, "stop"], env=env, text=True, capture_output=True) assert stopped.returncode == 0 assert not api_pid_file.exists() assert not pid_file.exists() def test_duck_mtp_script_starts_mtp_llama_for_duck_api(tmp_path): fake_bin = tmp_path / "llama-server" fake_bin.write_text( textwrap.dedent( """\ #!/usr/bin/env bash if [[ "${1:-}" == "--help" ]]; then echo "supports --spec-type draft-mtp" exit 0 fi echo "fake mtp llama-server $*" >&2 trap 'exit 0' TERM INT while true; do sleep 1; done """ ) ) fake_bin.chmod(0o755) fake_api = tmp_path / "fake-api.sh" fake_api.write_text( textwrap.dedent( """\ #!/usr/bin/env bash echo "fake duck api $*" >&2 trap 'exit 0' TERM INT while true; do sleep 1; done """ ) ) fake_api.chmod(0o755) main_model_path = tmp_path / "main.gguf" mtp_model_path = tmp_path / "mtp.gguf" main_model_path.write_text("main") mtp_model_path.write_text("mtp") pid_file = tmp_path / "llama-mtp.pid" log_file = tmp_path / "llama-mtp.log" api_pid_file = tmp_path / "duck-api-mtp.pid" api_log_file = tmp_path / "duck-api-mtp.log" env = { **os.environ, "DUCK_LLAMA_SERVER_BIN": str(fake_bin), "DUCK_MAIN_MODEL_PATH": str(main_model_path), "DUCK_MTP_MODEL_PATH": str(mtp_model_path), "DUCK_LLAMA_PID_FILE": str(pid_file), "DUCK_LLAMA_LOG_FILE": str(log_file), "DUCK_MAIN_PORT": "18081", "DUCK_API_PID_FILE": str(api_pid_file), "DUCK_API_LOG_FILE": str(api_log_file), "DUCK_API_COMMAND": str(fake_api), "DUCK_API_PORT": "18000", "DUCK_QDRANT_MANAGED": "0", } script = "scripts/duck-mtp.sh" started = subprocess.run([script, "start"], env=env, text=True, capture_output=True) assert started.returncode == 0 assert "Starting MTP llama-server" in started.stdout assert "DuckLM API running" in started.stdout assert "llama-server running" in started.stdout try: logs = subprocess.run( [script, "logs", "--lines", "30"], env=env, text=True, capture_output=True ) assert logs.returncode == 0 assert "--alias local-main" in logs.stdout assert "--spec-type draft-mtp" in logs.stdout assert f"-m {mtp_model_path}" in logs.stdout assert "--model-draft" not in logs.stdout assert str(main_model_path) not in logs.stdout finally: stopped = subprocess.run([script, "stop"], env=env, text=True, capture_output=True) assert stopped.returncode == 0 assert not api_pid_file.exists() assert not pid_file.exists() def test_duck_mtp_script_sets_llama_bin_dir_library_path_for_help_check(tmp_path): bin_dir = tmp_path / "build" / "bin" bin_dir.mkdir(parents=True) fake_bin = bin_dir / "llama-server" fake_bin.write_text( textwrap.dedent( """\ #!/usr/bin/env bash case ":${LD_LIBRARY_PATH:-}:" in *":$(dirname "$0"):"*) ;; *) echo "error while loading shared libraries: libllama-common.so.0" >&2 exit 127 ;; esac if [[ "${1:-}" == "--help" ]]; then echo "supports --spec-type draft-mtp" exit 0 fi echo "fake mtp llama-server $*" >&2 trap 'exit 0' TERM INT while true; do sleep 1; done """ ) ) fake_bin.chmod(0o755) fake_api = tmp_path / "fake-api.sh" fake_api.write_text( textwrap.dedent( """\ #!/usr/bin/env bash trap 'exit 0' TERM INT while true; do sleep 1; done """ ) ) fake_api.chmod(0o755) main_model_path = tmp_path / "main.gguf" mtp_model_path = tmp_path / "mtp.gguf" main_model_path.write_text("main") mtp_model_path.write_text("mtp") env = { **os.environ, "LD_LIBRARY_PATH": "", "DUCK_LLAMA_SERVER_BIN": str(fake_bin), "DUCK_MAIN_MODEL_PATH": str(main_model_path), "DUCK_MTP_MODEL_PATH": str(mtp_model_path), "DUCK_LLAMA_PID_FILE": str(tmp_path / "llama-mtp.pid"), "DUCK_LLAMA_LOG_FILE": str(tmp_path / "llama-mtp.log"), "DUCK_MAIN_PORT": "18081", "DUCK_API_PID_FILE": str(tmp_path / "duck-api-mtp.pid"), "DUCK_API_LOG_FILE": str(tmp_path / "duck-api-mtp.log"), "DUCK_API_COMMAND": str(fake_api), "DUCK_API_PORT": "18000", "DUCK_QDRANT_MANAGED": "0", } started = subprocess.run( ["scripts/duck-mtp.sh", "start"], env=env, text=True, capture_output=True ) assert started.returncode == 0 try: logs = subprocess.run( ["scripts/duck-mtp.sh", "logs", "--lines", "30"], env=env, text=True, capture_output=True, ) assert logs.returncode == 0 assert "--spec-type draft-mtp" in logs.stdout assert "--model-draft" not in logs.stdout finally: subprocess.run(["scripts/duck-mtp.sh", "stop"], env=env, text=True, capture_output=True) def test_duck_script_manages_qdrant_compose_service(tmp_path): docker_log = tmp_path / "docker.log" fake_docker = tmp_path / "docker" fake_docker.write_text( textwrap.dedent( f"""\ #!/usr/bin/env bash echo "$*" >> "{docker_log}" if [[ "$*" == *"compose"* && "$*" == *"ps qdrant"* ]]; then echo "ducklm-qdrant-1 running" fi if [[ "$*" == *"compose"* && "$*" == *"logs"* ]]; then echo "fake qdrant log" fi exit 0 """ ) ) fake_docker.chmod(0o755) fake_llama = tmp_path / "llama-server" fake_llama.write_text( textwrap.dedent( """\ #!/usr/bin/env bash echo "fake llama-server $*" >&2 trap 'exit 0' TERM INT while true; do sleep 1; done """ ) ) fake_llama.chmod(0o755) fake_api = tmp_path / "fake-api.sh" fake_api.write_text( textwrap.dedent( """\ #!/usr/bin/env bash trap 'exit 0' TERM INT while true; do sleep 1; done """ ) ) fake_api.chmod(0o755) api_pid_file = tmp_path / "duck-api.pid" env = { **os.environ, "PATH": f"{tmp_path}:{os.environ['PATH']}", "DUCK_LLAMA_SERVER_BIN": str(fake_llama), "DUCK_MAIN_MODEL_PATH": str(tmp_path / "model.gguf"), "DUCK_LLAMA_PID_FILE": str(tmp_path / "llama.pid"), "DUCK_LLAMA_LOG_FILE": str(tmp_path / "llama.log"), "DUCK_MAIN_PORT": "18081", "DUCK_API_PID_FILE": str(api_pid_file), "DUCK_API_LOG_FILE": str(tmp_path / "duck-api.log"), "DUCK_API_COMMAND": str(fake_api), "DUCK_API_PORT": "18000", } Path(env["DUCK_MAIN_MODEL_PATH"]).write_text("fake") started = subprocess.run( ["scripts/duck.sh", "start"], env=env, text=True, capture_output=True ) assert started.returncode == 0 assert "Starting Qdrant" in started.stdout assert "Qdrant status:" in started.stdout running = subprocess.run( ["scripts/duck.sh", "status"], env=env, text=True, capture_output=True ) assert running.returncode == 0 assert "Qdrant status:" in running.stdout assert "ducklm-qdrant-1 running" in running.stdout logs = subprocess.run( ["scripts/duck.sh", "logs", "--lines", "20"], env=env, text=True, capture_output=True ) assert logs.returncode == 0 assert "Qdrant log" in logs.stdout assert "fake qdrant log" in logs.stdout stopped = subprocess.run(["scripts/duck.sh", "stop"], env=env, text=True, capture_output=True) assert stopped.returncode == 0 assert "Stopping Qdrant" in stopped.stdout calls = docker_log.read_text() assert "compose -f" in calls assert "docker-compose.memory.yml up -d qdrant" in calls assert "docker-compose.memory.yml ps qdrant" in calls assert "docker-compose.memory.yml logs --tail 20 qdrant" in calls assert "docker-compose.memory.yml down" in calls