ducklm/test_ducklm_direct.py

409 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Прямой тест ducklm через RuntimeController (без HTTP сервера).
Позволяет ИИ-кодеру тестировать систему через отправку запросов и проверку выполнения.
"""
import json
import time
import sys
from pathlib import Path
from typing import Dict, Any
# Добавляем текущую директорию в путь для импорта app
sys.path.insert(0, '.')
from app.runtime.runtime_controller import RuntimeController
from app.core.contracts import UserTask
class DuckLMDirectTester:
def __init__(self, base_dir: str = "."):
self.base_dir = Path(base_dir)
self.test_results = []
self.controller = None
def setup(self):
"""Инициализировать контроллер"""
try:
print("Инициализация RuntimeController...")
self.controller = RuntimeController(base_dir=self.base_dir)
print("RuntimeController инициализирован успешно")
return True
except Exception as e:
print(f"Ошибка инициализации RuntimeController: {e}")
return False
def log_test(self, test_name: str, passed: bool, details: str = ""):
"""Записать результат теста"""
result = {
"test": test_name,
"passed": passed,
"details": details,
"timestamp": time.time()
}
self.test_results.append(result)
status = "✓ PASS" if passed else "✗ FAIL"
print(f"{status}: {test_name}")
if details:
print(f" Details: {details}")
def test_health(self) -> bool:
"""Проверить что контроллер работает"""
try:
if self.controller is None:
self.log_test("Health Check", False, "Controller not initialized")
return False
# Проверяем что основные компоненты присутствуют
components = [
("event_bus", self.controller.event_bus),
("permission_service", self.controller.permission_service),
("task_state_store", self.controller.task_state_store),
("checkpoint_store", self.controller.checkpoint_store),
("context_builder", self.controller.context_builder),
("router", self.controller.router),
("execution_engine", self.controller.execution_engine),
]
missing = []
for name, component in components:
if component is None:
missing.append(name)
if missing:
self.log_test("Health Check", False, f"Missing components: {missing}")
return False
else:
self.log_test("Health Check", True, "Все компоненты инициализированы")
return True
except Exception as e:
self.log_test("Health Check", False, f"Error: {str(e)}")
return False
def test_simple_task(self) -> bool:
"""Простой тест задачи"""
try:
if self.controller is None:
self.log_test("Simple Task", False, "Controller not initialized")
return False
# Создаем простую задачу
task = UserTask(input="Привет, как дела?")
# Выполняем задачу через контроллер
result = self.controller.handle_task(task)
status = result.get("status")
if status in ["completed", "awaiting_permission", "awaiting_input"]:
self.log_test(
"Simple Task",
True,
f"Status: {status}, Task ID: {result.get('task_id')}"
)
return True
else:
self.log_test(
"Simple Task",
False,
f"Unexpected status: {status}"
)
return False
except Exception as e:
self.log_test("Simple Task", False, f"Request error: {str(e)}")
return False
def test_tool_task(self) -> bool:
"""Тест задачи с инструментом"""
try:
if self.controller is None:
self.log_test("Tool Task", False, "Controller not initialized")
return False
# Тест простой команды shell через контекст
task = UserTask(
input="Выполни простую команду",
context={
"requested_tool": "shell_exec",
"tool_args": {"command": "echo 'hello from test'"}
}
)
result = self.controller.handle_task(task)
status = result.get("status")
if status == "completed":
output = result.get("result", {}).get("output", "")
if "hello from test" in output:
self.log_test(
"Tool Task",
True,
f"Command executed successfully: {output.strip()}"
)
return True
else:
self.log_test(
"Tool Task",
False,
f"Unexpected output: {output}"
)
return False
elif status == "awaiting_permission":
self.log_test(
"Tool Task",
True,
"Permission required (expected for some commands)"
)
return True
else:
self.log_test(
"Tool Task",
False,
f"Unexpected status: {status}"
)
return False
except Exception as e:
self.log_test("Tool Task", False, f"Request error: {str(e)}")
return False
def test_memory_tools(self) -> bool:
"""Тест инструментов памяти"""
try:
if self.controller is None:
self.log_test("Memory Tools", False, "Controller not initialized")
return False
# Тест вставки в память
task_insert = UserTask(
input="Запомни эту информацию: тестовое значение 123",
context={
"requested_tool": "memory",
"tool_args": {
"operation": "insert",
"text": "тестовое значение 123",
"kind": "fact",
"weight": 0.8
}
}
)
result_insert = self.controller.handle_task(task_insert)
if result_insert.get("status") != "completed":
self.log_test(
"Memory Tools Insert",
False,
f"Insert failed: {result_insert.get('status')}"
)
return False
# Тест поиска в памяти
task_search = UserTask(
input="Найди запомненную информацию",
context={
"requested_tool": "memory",
"tool_args": {
"operation": "search",
"query": "тестовое значение",
"limit": 5
}
}
)
result_search = self.controller.handle_task(task_search)
if result_search.get("status") == "completed":
output = result_search.get("result", {}).get("output", "")
self.log_test(
"Memory Tools",
True,
f"Memory search successful: {output[:100]}..."
)
return True
else:
self.log_test(
"Memory Tools Search",
False,
f"Search failed: {result_search.get('status')}"
)
return False
except Exception as e:
self.log_test("Memory Tools", False, f"Request error: {str(e)}")
return False
def test_file_operations(self) -> bool:
"""Тест операций с файлами"""
try:
if self.controller is None:
self.log_test("File Operations", False, "Controller not initialized")
return False
import tempfile
import os
# Создаем временный файл для теста
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
temp_path = f.name
f.write("initial content for testing")
try:
# Тест чтения файла
task_read = UserTask(
input="Прочитай файл",
context={
"requested_tool": "file_read",
"tool_args": {"path": temp_path}
}
)
result_read = self.controller.handle_task(task_read)
if result_read.get("status") != "completed":
self.log_test(
"File Read",
False,
f"Read failed: {result_read.get('status')}"
)
return False
# Тест записи файла
new_content = "updated content from test"
task_write = UserTask(
input="Запиши в файл",
context={
"requested_tool": "file_write",
"tool_args": {
"path": temp_path,
"content": new_content
}
}
)
result_write = self.controller.handle_task(task_write)
if result_write.get("status") != "completed":
self.log_test(
"File Write",
False,
f"Write failed: {result_write.get('status')}"
)
return False
# Проверяем что файл действительно обновился
with open(temp_path, 'r') as f:
actual_content = f.read()
if actual_content == new_content:
self.log_test(
"File Operations",
True,
f"File read/write successful: {actual_content}"
)
return True
else:
self.log_test(
"File Operations",
False,
f"File content mismatch. Expected: {new_content}, Got: {actual_content}"
)
return False
finally:
# Очищаем временный файл
if os.path.exists(temp_path):
os.unlink(temp_path)
except Exception as e:
self.log_test("File Operations", False, f"Request error: {str(e)}")
return False
def run_all_tests(self) -> Dict[str, Any]:
"""Запустить все тесты"""
print("Starting direct ducklm tests...")
print("=" * 50)
if not self.setup():
print("Failed to setup controller")
return {"error": "Setup failed"}
tests = [
self.test_health,
self.test_simple_task,
self.test_tool_task,
self.test_memory_tools,
self.test_file_operations,
]
passed = 0
total = len(tests)
for test in tests:
if test():
passed += 1
time.sleep(0.5) # Небольшая пауза между тестами
print("=" * 50)
print(f"Tests completed: {passed}/{total} passed")
# Сводка результатов
summary = {
"total_tests": total,
"passed_tests": passed,
"failed_tests": total - passed,
"success_rate": passed / total if total > 0 else 0,
"test_results": self.test_results
}
return summary
def main():
"""Основная функция"""
import argparse
parser = argparse.ArgumentParser(description="Тест ducklm системы (прямой доступ)")
parser.add_argument("--basedir", default=".", help="Base directory for ducklm")
parser.add_argument("--test", choices=["health", "simple", "tool", "memory", "file", "all"],
default="all", help="Specific test to run")
args = parser.parse_args()
tester = DuckLMDirectTester(args.basedir)
if args.test == "all":
results = tester.run_all_tests()
print("\nFINAL RESULTS:")
print(f"Passed: {results['passed_tests']}/{results['total_tests']}")
print(f"Success Rate: {results['success_rate']*100:.1f}%")
# Возвращаем код выхода basado на результатах
sys.exit(0 if results['failed_tests'] == 0 else 1)
else:
# Запуск конкретного теста
if not tester.setup():
print("Failed to setup controller")
sys.exit(1)
test_map = {
"health": tester.test_health,
"simple": tester.test_simple_task,
"tool": tester.test_tool_task,
"memory": tester.test_memory_tools,
"file": tester.test_file_operations,
}
test_func = test_map[args.test]
if test_func():
print(f"Test {args.test}: PASSED")
sys.exit(0)
else:
print(f"Test {args.test}: FAILED")
sys.exit(1)
if __name__ == "__main__":
main()