314 lines
11 KiB
Python
Executable File
314 lines
11 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Тестовый скрипт для проверки работы ducklm.
|
|
Позволяет ИИ-кодеру тестировать систему через отправку запросов и проверку выполнения.
|
|
"""
|
|
|
|
import json
|
|
import time
|
|
import requests
|
|
import sys
|
|
from typing import Dict, Any, Optional
|
|
|
|
|
|
class DuckLMTester:
|
|
def __init__(self, base_url: str = "http://127.0.0.1:8000"):
|
|
self.base_url = base_url
|
|
self.session = requests.Session()
|
|
self.test_results = []
|
|
|
|
def log_test(self, test_name: str, passed: bool, details: str = ""):
|
|
"""Записать результат теста"""
|
|
result = {
|
|
"test": test_name,
|
|
"passed": passed,
|
|
"details": details,
|
|
"timestamp": time.time()
|
|
}
|
|
self.test_results.append(result)
|
|
status = "✓ PASS" if passed else "✗ FAIL"
|
|
print(f"{status}: {test_name}")
|
|
if details:
|
|
print(f" Details: {details}")
|
|
|
|
def test_health(self) -> bool:
|
|
"""Проверить эндпоинт здоровья"""
|
|
try:
|
|
response = self.session.get(f"{self.base_url}/health", timeout=5)
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
if data.get("status") == "ok":
|
|
self.log_test("Health Check", True, "Server is healthy")
|
|
return True
|
|
else:
|
|
self.log_test("Health Check", False, f"Unexpected response: {data}")
|
|
return False
|
|
else:
|
|
self.log_test("Health Check", False, f"HTTP {response.status_code}")
|
|
return False
|
|
except Exception as e:
|
|
self.log_test("Health Check", False, f"Connection error: {str(e)}")
|
|
return False
|
|
|
|
def test_simple_chat(self) -> bool:
|
|
"""Простой тест чата"""
|
|
try:
|
|
payload = {"input": "Привет, как дела?"}
|
|
response = self.session.post(
|
|
f"{self.base_url}/chat",
|
|
json=payload,
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
status = data.get("status")
|
|
if status in ["completed", "awaiting_permission", "awaiting_input"]:
|
|
self.log_test(
|
|
"Simple Chat",
|
|
True,
|
|
f"Status: {status}, Response received"
|
|
)
|
|
return True
|
|
else:
|
|
self.log_test(
|
|
"Simple Chat",
|
|
False,
|
|
f"Unexpected status: {status}"
|
|
)
|
|
return False
|
|
else:
|
|
self.log_test(
|
|
"Simple Chat",
|
|
False,
|
|
f"HTTP {response.status_code}: {response.text}"
|
|
)
|
|
return False
|
|
except Exception as e:
|
|
self.log_test("Simple Chat", False, f"Request error: {str(e)}")
|
|
return False
|
|
|
|
def test_tool_execution(self) -> bool:
|
|
"""Тест выполнения инструмента"""
|
|
try:
|
|
# Тест простой команды shell
|
|
payload = {
|
|
"input": "Выполни простую команду",
|
|
"context": {
|
|
"requested_tool": "shell_exec",
|
|
"tool_args": {"command": "echo 'test'"}
|
|
}
|
|
}
|
|
response = self.session.post(
|
|
f"{self.base_url}/chat",
|
|
json=payload,
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
status = data.get("status")
|
|
if status == "completed":
|
|
output = data.get("result", {}).get("output", "")
|
|
if "test" in output:
|
|
self.log_test(
|
|
"Tool Execution",
|
|
True,
|
|
f"Command executed successfully: {output.strip()}"
|
|
)
|
|
return True
|
|
else:
|
|
self.log_test(
|
|
"Tool Execution",
|
|
False,
|
|
f"Unexpected output: {output}"
|
|
)
|
|
return False
|
|
elif status == "awaiting_permission":
|
|
self.log_test(
|
|
"Tool Execution",
|
|
True,
|
|
"Permission required (expected for some commands)"
|
|
)
|
|
return True
|
|
else:
|
|
self.log_test(
|
|
"Tool Execution",
|
|
False,
|
|
f"Unexpected status: {status}"
|
|
)
|
|
return False
|
|
else:
|
|
self.log_test(
|
|
"Tool Execution",
|
|
False,
|
|
f"HTTP {response.status_code}: {response.text}"
|
|
)
|
|
return False
|
|
except Exception as e:
|
|
self.log_test("Tool Execution", False, f"Request error: {str(e)}")
|
|
return False
|
|
|
|
def test_permission_flow(self) -> bool:
|
|
"""Тест потока разрешений"""
|
|
try:
|
|
# Сначала отправляем задачу, требующую разрешения
|
|
payload = {
|
|
"input": "Запусти команду, требующую разрешения",
|
|
"context": {
|
|
"requested_tool": "shell_exec",
|
|
"tool_args": {"command": "whoami"}
|
|
}
|
|
}
|
|
response = self.session.post(
|
|
f"{self.base_url}/chat",
|
|
json=payload,
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
self.log_test(
|
|
"Permission Flow",
|
|
False,
|
|
f"Initial request failed: HTTP {response.status_code}"
|
|
)
|
|
return False
|
|
|
|
data = response.json()
|
|
if data.get("status") == "awaiting_permission":
|
|
task_id = data.get("task_id")
|
|
if not task_id:
|
|
self.log_test(
|
|
"Permission Flow",
|
|
False,
|
|
"No task_id in response"
|
|
)
|
|
return False
|
|
|
|
# Теперь разрешаем разрешение
|
|
resolve_payload = {
|
|
"task_id": task_id,
|
|
"decision": "allow_once"
|
|
}
|
|
resolve_response = self.session.post(
|
|
f"{self.base_url}/permissions/resolve",
|
|
json=resolve_payload,
|
|
timeout=10
|
|
)
|
|
|
|
if resolve_response.status_code == 200:
|
|
resolve_data = resolve_response.json()
|
|
final_status = resolve_data.get("status")
|
|
if final_status in ["completed", "failed"]:
|
|
self.log_test(
|
|
"Permission Flow",
|
|
True,
|
|
f"Permission resolved, final status: {final_status}"
|
|
)
|
|
return True
|
|
else:
|
|
self.log_test(
|
|
"Permission Flow",
|
|
False,
|
|
f"Unexpected final status: {final_status}"
|
|
)
|
|
return False
|
|
else:
|
|
self.log_test(
|
|
"Permission Flow",
|
|
False,
|
|
f"Permission resolution failed: HTTP {resolve_response.status_code}"
|
|
)
|
|
return False
|
|
else:
|
|
# Если разрешение не потребовалось, это тоже нормально для некоторых систем
|
|
self.log_test(
|
|
"Permission Flow",
|
|
True,
|
|
f"No permission required, status: {data.get('status')}"
|
|
)
|
|
return True
|
|
|
|
except Exception as e:
|
|
self.log_test("Permission Flow", False, f"Request error: {str(e)}")
|
|
return False
|
|
|
|
def run_all_tests(self) -> Dict[str, Any]:
|
|
"""Запустить все тесты"""
|
|
print("Starting ducklm tests...")
|
|
print("=" * 50)
|
|
|
|
# Ждем немного, чтобы сервер успел запуститься
|
|
time.sleep(2)
|
|
|
|
tests = [
|
|
self.test_health,
|
|
self.test_simple_chat,
|
|
self.test_tool_execution,
|
|
self.test_permission_flow,
|
|
]
|
|
|
|
passed = 0
|
|
total = len(tests)
|
|
|
|
for test in tests:
|
|
if test():
|
|
passed += 1
|
|
time.sleep(1) # Небольшая пауза между тестами для слабого железа
|
|
|
|
print("=" * 50)
|
|
print(f"Tests completed: {passed}/{total} passed")
|
|
|
|
# Сводка результатов
|
|
summary = {
|
|
"total_tests": total,
|
|
"passed_tests": passed,
|
|
"failed_tests": total - passed,
|
|
"success_rate": passed / total if total > 0 else 0,
|
|
"test_results": self.test_results
|
|
}
|
|
|
|
return summary
|
|
|
|
|
|
def main():
|
|
"""Основная функция"""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Тест ducklm системы")
|
|
parser.add_argument("--url", default="http://127.0.0.1:8000", help="Base URL for ducklm server")
|
|
parser.add_argument("--test", choices=["health", "chat", "tool", "permission", "all"],
|
|
default="all", help="Specific test to run")
|
|
|
|
args = parser.parse_args()
|
|
|
|
tester = DuckLMTester(args.url)
|
|
|
|
if args.test == "all":
|
|
results = tester.run_all_tests()
|
|
print("\nFINAL RESULTS:")
|
|
print(f"Passed: {results['passed_tests']}/{results['total_tests']}")
|
|
print(f"Success Rate: {results['success_rate']*100:.1f}%")
|
|
|
|
# Возвращаем код выхода basado на результатах
|
|
sys.exit(0 if results['failed_tests'] == 0 else 1)
|
|
else:
|
|
# Запуск конкретного теста
|
|
test_map = {
|
|
"health": tester.test_health,
|
|
"chat": tester.test_simple_chat,
|
|
"tool": tester.test_tool_execution,
|
|
"permission": tester.test_permission_flow,
|
|
}
|
|
|
|
test_func = test_map[args.test]
|
|
if test_func():
|
|
print(f"Test {args.test}: PASSED")
|
|
sys.exit(0)
|
|
else:
|
|
print(f"Test {args.test}: FAILED")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |