ducklm/test_ducklm_direct.py

#!/usr/bin/env python3
"""
Прямой тест ducklm через RuntimeController (без HTTP сервера).
Позволяет ИИ-кодеру тестировать систему через отправку запросов и проверку выполнения.
"""

import json
import time
import sys
from pathlib import Path
from typing import Dict, Any

# Добавляем текущую директорию в путь для импорта app
sys.path.insert(0, '.')

from app.runtime.runtime_controller import RuntimeController
from app.core.contracts import UserTask


class DuckLMDirectTester:
    def __init__(self, base_dir: str = "."):
        self.base_dir = Path(base_dir)
        self.test_results = []
        self.controller = None

    def setup(self):
        """Инициализировать контроллер"""
        try:
            print("Инициализация RuntimeController...")
            self.controller = RuntimeController(base_dir=self.base_dir)
            print("RuntimeController инициализирован успешно")
            return True
        except Exception as e:
            print(f"Ошибка инициализации RuntimeController: {e}")
            return False

    def log_test(self, test_name: str, passed: bool, details: str = ""):
        """Записать результат теста"""
        result = {
            "test": test_name,
            "passed": passed,
            "details": details,
            "timestamp": time.time()
        }
        self.test_results.append(result)
        status = "✓ PASS" if passed else "✗ FAIL"
        print(f"{status}: {test_name}")
        if details:
            print(f"  Details: {details}")

    def test_health(self) -> bool:
        """Проверить что контроллер работает"""
        try:
            if self.controller is None:
                self.log_test("Health Check", False, "Controller not initialized")
                return False

            # Проверяем что основные компоненты присутствуют
            components = [
                ("event_bus", self.controller.event_bus),
                ("permission_service", self.controller.permission_service),
                ("task_state_store", self.controller.task_state_store),
                ("checkpoint_store", self.controller.checkpoint_store),
                ("context_builder", self.controller.context_builder),
                ("router", self.controller.router),
                ("execution_engine", self.controller.execution_engine),
            ]

            missing = []
            for name, component in components:
                if component is None:
                    missing.append(name)

            if missing:
                self.log_test("Health Check", False, f"Missing components: {missing}")
                return False
            else:
                self.log_test("Health Check", True, "Все компоненты инициализированы")
                return True

        except Exception as e:
            self.log_test("Health Check", False, f"Error: {str(e)}")
            return False

    def test_simple_task(self) -> bool:
        """Простой тест задачи"""
        try:
            if self.controller is None:
                self.log_test("Simple Task", False, "Controller not initialized")
                return False

            # Создаем простую задачу
            task = UserTask(input="Привет, как дела?")

            # Выполняем задачу через контроллер
            result = self.controller.handle_task(task)

            status = result.get("status")
            if status in ["completed", "awaiting_permission", "awaiting_input"]:
                self.log_test(
                    "Simple Task",
                    True,
                    f"Status: {status}, Task ID: {result.get('task_id')}"
                )
                return True
            else:
                self.log_test(
                    "Simple Task",
                    False,
                    f"Unexpected status: {status}"
                )
                return False

        except Exception as e:
            self.log_test("Simple Task", False, f"Request error: {str(e)}")
            return False

    def test_tool_task(self) -> bool:
        """Тест задачи с инструментом"""
        try:
            if self.controller is None:
                self.log_test("Tool Task", False, "Controller not initialized")
                return False

            # Тест простой команды shell через контекст
            task = UserTask(
                input="Выполни простую команду",
                context={
                    "requested_tool": "shell_exec",
                    "tool_args": {"command": "echo 'hello from test'"}
                }
            )

            result = self.controller.handle_task(task)

            status = result.get("status")
            if status == "completed":
                output = result.get("result", {}).get("output", "")
                if "hello from test" in output:
                    self.log_test(
                        "Tool Task",
                        True,
                        f"Command executed successfully: {output.strip()}"
                    )
                    return True
                else:
                    self.log_test(
                        "Tool Task",
                        False,
                        f"Unexpected output: {output}"
                    )
                    return False
            elif status == "awaiting_permission":
                self.log_test(
                    "Tool Task",
                    True,
                    "Permission required (expected for some commands)"
                )
                return True
            else:
                self.log_test(
                    "Tool Task",
                    False,
                    f"Unexpected status: {status}"
                )
                return False

        except Exception as e:
            self.log_test("Tool Task", False, f"Request error: {str(e)}")
            return False

    def test_memory_tools(self) -> bool:
        """Тест инструментов памяти"""
        try:
            if self.controller is None:
                self.log_test("Memory Tools", False, "Controller not initialized")
                return False

            # Тест вставки в память
            task_insert = UserTask(
                input="Запомни эту информацию: тестовое значение 123",
                context={
                    "requested_tool": "memory",
                    "tool_args": {
                        "operation": "insert",
                        "text": "тестовое значение 123",
                        "kind": "fact",
                        "weight": 0.8
                    }
                }
            )

            result_insert = self.controller.handle_task(task_insert)

            if result_insert.get("status") != "completed":
                self.log_test(
                    "Memory Tools Insert",
                    False,
                    f"Insert failed: {result_insert.get('status')}"
                )
                return False

            # Тест поиска в памяти
            task_search = UserTask(
                input="Найди запомненную информацию",
                context={
                    "requested_tool": "memory",
                    "tool_args": {
                        "operation": "search",
                        "query": "тестовое значение",
                        "limit": 5
                    }
                }
            )

            result_search = self.controller.handle_task(task_search)

            if result_search.get("status") == "completed":
                output = result_search.get("result", {}).get("output", "")
                self.log_test(
                    "Memory Tools",
                    True,
                    f"Memory search successful: {output[:100]}..."
                )
                return True
            else:
                self.log_test(
                    "Memory Tools Search",
                    False,
                    f"Search failed: {result_search.get('status')}"
                )
                return False

        except Exception as e:
            self.log_test("Memory Tools", False, f"Request error: {str(e)}")
            return False

    def test_file_operations(self) -> bool:
        """Тест операций с файлами"""
        try:
            if self.controller is None:
                self.log_test("File Operations", False, "Controller not initialized")
                return False

            import tempfile
            import os

            # Создаем временный файл для теста
            with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
                temp_path = f.name
                f.write("initial content for testing")

            try:
                # Тест чтения файла
                task_read = UserTask(
                    input="Прочитай файл",
                    context={
                        "requested_tool": "file_read",
                        "tool_args": {"path": temp_path}
                    }
                )

                result_read = self.controller.handle_task(task_read)

                if result_read.get("status") != "completed":
                    self.log_test(
                        "File Read",
                        False,
                        f"Read failed: {result_read.get('status')}"
                    )
                    return False

                # Тест записи файла
                new_content = "updated content from test"
                task_write = UserTask(
                    input="Запиши в файл",
                    context={
                        "requested_tool": "file_write",
                        "tool_args": {
                            "path": temp_path,
                            "content": new_content
                        }
                    }
                )

                result_write = self.controller.handle_task(task_write)

                if result_write.get("status") != "completed":
                    self.log_test(
                        "File Write",
                        False,
                        f"Write failed: {result_write.get('status')}"
                    )
                    return False

                # Проверяем что файл действительно обновился
                with open(temp_path, 'r') as f:
                    actual_content = f.read()

                if actual_content == new_content:
                    self.log_test(
                        "File Operations",
                        True,
                        f"File read/write successful: {actual_content}"
                    )
                    return True
                else:
                    self.log_test(
                        "File Operations",
                        False,
                        f"File content mismatch. Expected: {new_content}, Got: {actual_content}"
                    )
                    return False

            finally:
                # Очищаем временный файл
                if os.path.exists(temp_path):
                    os.unlink(temp_path)

        except Exception as e:
            self.log_test("File Operations", False, f"Request error: {str(e)}")
            return False

    def run_all_tests(self) -> Dict[str, Any]:
        """Запустить все тесты"""
        print("Starting direct ducklm tests...")
        print("=" * 50)

        if not self.setup():
            print("Failed to setup controller")
            return {"error": "Setup failed"}

        tests = [
            self.test_health,
            self.test_simple_task,
            self.test_tool_task,
            self.test_memory_tools,
            self.test_file_operations,
        ]

        passed = 0
        total = len(tests)

        for test in tests:
            if test():
                passed += 1
            time.sleep(0.5)  # Небольшая пауза между тестами

        print("=" * 50)
        print(f"Tests completed: {passed}/{total} passed")

        # Сводка результатов
        summary = {
            "total_tests": total,
            "passed_tests": passed,
            "failed_tests": total - passed,
            "success_rate": passed / total if total > 0 else 0,
            "test_results": self.test_results
        }

        return summary


def main():
    """Основная функция"""
    import argparse

    parser = argparse.ArgumentParser(description="Тест ducklm системы (прямой доступ)")
    parser.add_argument("--basedir", default=".", help="Base directory for ducklm")
    parser.add_argument("--test", choices=["health", "simple", "tool", "memory", "file", "all"],
                       default="all", help="Specific test to run")

    args = parser.parse_args()

    tester = DuckLMDirectTester(args.basedir)

    if args.test == "all":
        results = tester.run_all_tests()
        print("\nFINAL RESULTS:")
        print(f"Passed: {results['passed_tests']}/{results['total_tests']}")
        print(f"Success Rate: {results['success_rate']*100:.1f}%")

        # Возвращаем код выхода basado на результатах
        sys.exit(0 if results['failed_tests'] == 0 else 1)
    else:
        # Запуск конкретного теста
        if not tester.setup():
            print("Failed to setup controller")
            sys.exit(1)

        test_map = {
            "health": tester.test_health,
            "simple": tester.test_simple_task,
            "tool": tester.test_tool_task,
            "memory": tester.test_memory_tools,
            "file": tester.test_file_operations,
        }

        test_func = test_map[args.test]
        if test_func():
            print(f"Test {args.test}: PASSED")
            sys.exit(0)
        else:
            print(f"Test {args.test}: FAILED")
            sys.exit(1)


if __name__ == "__main__":
    main()