ducklm/test_ducklm.py

314 lines
11 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Тестовый скрипт для проверки работы ducklm.
Позволяет ИИ-кодеру тестировать систему через отправку запросов и проверку выполнения.
"""
import json
import time
import requests
import sys
from typing import Dict, Any, Optional
class DuckLMTester:
def __init__(self, base_url: str = "http://127.0.0.1:8000"):
self.base_url = base_url
self.session = requests.Session()
self.test_results = []
def log_test(self, test_name: str, passed: bool, details: str = ""):
"""Записать результат теста"""
result = {
"test": test_name,
"passed": passed,
"details": details,
"timestamp": time.time()
}
self.test_results.append(result)
status = "✓ PASS" if passed else "✗ FAIL"
print(f"{status}: {test_name}")
if details:
print(f" Details: {details}")
def test_health(self) -> bool:
"""Проверить эндпоинт здоровья"""
try:
response = self.session.get(f"{self.base_url}/health", timeout=5)
if response.status_code == 200:
data = response.json()
if data.get("status") == "ok":
self.log_test("Health Check", True, "Server is healthy")
return True
else:
self.log_test("Health Check", False, f"Unexpected response: {data}")
return False
else:
self.log_test("Health Check", False, f"HTTP {response.status_code}")
return False
except Exception as e:
self.log_test("Health Check", False, f"Connection error: {str(e)}")
return False
def test_simple_chat(self) -> bool:
"""Простой тест чата"""
try:
payload = {"input": "Привет, как дела?"}
response = self.session.post(
f"{self.base_url}/chat",
json=payload,
timeout=30
)
if response.status_code == 200:
data = response.json()
status = data.get("status")
if status in ["completed", "awaiting_permission", "awaiting_input"]:
self.log_test(
"Simple Chat",
True,
f"Status: {status}, Response received"
)
return True
else:
self.log_test(
"Simple Chat",
False,
f"Unexpected status: {status}"
)
return False
else:
self.log_test(
"Simple Chat",
False,
f"HTTP {response.status_code}: {response.text}"
)
return False
except Exception as e:
self.log_test("Simple Chat", False, f"Request error: {str(e)}")
return False
def test_tool_execution(self) -> bool:
"""Тест выполнения инструмента"""
try:
# Тест простой команды shell
payload = {
"input": "Выполни простую команду",
"context": {
"requested_tool": "shell_exec",
"tool_args": {"command": "echo 'test'"}
}
}
response = self.session.post(
f"{self.base_url}/chat",
json=payload,
timeout=30
)
if response.status_code == 200:
data = response.json()
status = data.get("status")
if status == "completed":
output = data.get("result", {}).get("output", "")
if "test" in output:
self.log_test(
"Tool Execution",
True,
f"Command executed successfully: {output.strip()}"
)
return True
else:
self.log_test(
"Tool Execution",
False,
f"Unexpected output: {output}"
)
return False
elif status == "awaiting_permission":
self.log_test(
"Tool Execution",
True,
"Permission required (expected for some commands)"
)
return True
else:
self.log_test(
"Tool Execution",
False,
f"Unexpected status: {status}"
)
return False
else:
self.log_test(
"Tool Execution",
False,
f"HTTP {response.status_code}: {response.text}"
)
return False
except Exception as e:
self.log_test("Tool Execution", False, f"Request error: {str(e)}")
return False
def test_permission_flow(self) -> bool:
"""Тест потока разрешений"""
try:
# Сначала отправляем задачу, требующую разрешения
payload = {
"input": "Запусти команду, требующую разрешения",
"context": {
"requested_tool": "shell_exec",
"tool_args": {"command": "whoami"}
}
}
response = self.session.post(
f"{self.base_url}/chat",
json=payload,
timeout=30
)
if response.status_code != 200:
self.log_test(
"Permission Flow",
False,
f"Initial request failed: HTTP {response.status_code}"
)
return False
data = response.json()
if data.get("status") == "awaiting_permission":
task_id = data.get("task_id")
if not task_id:
self.log_test(
"Permission Flow",
False,
"No task_id in response"
)
return False
# Теперь разрешаем разрешение
resolve_payload = {
"task_id": task_id,
"decision": "allow_once"
}
resolve_response = self.session.post(
f"{self.base_url}/permissions/resolve",
json=resolve_payload,
timeout=10
)
if resolve_response.status_code == 200:
resolve_data = resolve_response.json()
final_status = resolve_data.get("status")
if final_status in ["completed", "failed"]:
self.log_test(
"Permission Flow",
True,
f"Permission resolved, final status: {final_status}"
)
return True
else:
self.log_test(
"Permission Flow",
False,
f"Unexpected final status: {final_status}"
)
return False
else:
self.log_test(
"Permission Flow",
False,
f"Permission resolution failed: HTTP {resolve_response.status_code}"
)
return False
else:
# Если разрешение не потребовалось, это тоже нормально для некоторых систем
self.log_test(
"Permission Flow",
True,
f"No permission required, status: {data.get('status')}"
)
return True
except Exception as e:
self.log_test("Permission Flow", False, f"Request error: {str(e)}")
return False
def run_all_tests(self) -> Dict[str, Any]:
"""Запустить все тесты"""
print("Starting ducklm tests...")
print("=" * 50)
# Ждем немного, чтобы сервер успел запуститься
time.sleep(2)
tests = [
self.test_health,
self.test_simple_chat,
self.test_tool_execution,
self.test_permission_flow,
]
passed = 0
total = len(tests)
for test in tests:
if test():
passed += 1
time.sleep(1) # Небольшая пауза между тестами для слабого железа
print("=" * 50)
print(f"Tests completed: {passed}/{total} passed")
# Сводка результатов
summary = {
"total_tests": total,
"passed_tests": passed,
"failed_tests": total - passed,
"success_rate": passed / total if total > 0 else 0,
"test_results": self.test_results
}
return summary
def main():
"""Основная функция"""
import argparse
parser = argparse.ArgumentParser(description="Тест ducklm системы")
parser.add_argument("--url", default="http://127.0.0.1:8000", help="Base URL for ducklm server")
parser.add_argument("--test", choices=["health", "chat", "tool", "permission", "all"],
default="all", help="Specific test to run")
args = parser.parse_args()
tester = DuckLMTester(args.url)
if args.test == "all":
results = tester.run_all_tests()
print("\nFINAL RESULTS:")
print(f"Passed: {results['passed_tests']}/{results['total_tests']}")
print(f"Success Rate: {results['success_rate']*100:.1f}%")
# Возвращаем код выхода basado на результатах
sys.exit(0 if results['failed_tests'] == 0 else 1)
else:
# Запуск конкретного теста
test_map = {
"health": tester.test_health,
"chat": tester.test_simple_chat,
"tool": tester.test_tool_execution,
"permission": tester.test_permission_flow,
}
test_func = test_map[args.test]
if test_func():
print(f"Test {args.test}: PASSED")
sys.exit(0)
else:
print(f"Test {args.test}: FAILED")
sys.exit(1)
if __name__ == "__main__":
main()