fixes
This commit is contained in:
parent
1b4f4c836e
commit
ddc285b8f4
|
|
@ -0,0 +1,252 @@
|
||||||
|
# DuckLM — Текущее состояние проекта
|
||||||
|
|
||||||
|
## 1. Что это
|
||||||
|
|
||||||
|
DuckLM — локальный event-driven multi-model AI agent runtime. Система принимает пользовательскую задачу, извлекает релевантную память, собирает контекст, принимает orchestration-решение, при необходимости строит план, исполняет шаги через tools и coder, оценивает результаты через critic, сохраняет полезное в долговременную память, публикует события и поддерживает streaming клиенту.
|
||||||
|
|
||||||
|
**Ключевой принцип:** центр системы — `RuntimeLoop`. Все execution transitions проходят через него. Router, Orchestrator, ExecutionEngine — decision-producing компоненты, которые только возвращают структурированные объекты (ExecutionDirective), но не исполняют действия напрямую.
|
||||||
|
|
||||||
|
## 2. Архитектура
|
||||||
|
|
||||||
|
```
|
||||||
|
Client / CLI / API
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
RuntimeLoop (runtime_loop.py)
|
||||||
|
│
|
||||||
|
├── State Store / Checkpoints (SQLite)
|
||||||
|
├── ContextBuilder
|
||||||
|
├── AsyncRouter (Thinker → JSON Compiler)
|
||||||
|
├── ExecutionEngine / ExecutionScheduler
|
||||||
|
│ ├── ToolRegistry / ToolSandbox
|
||||||
|
│ ├── CoderAdapter
|
||||||
|
│ └── CriticAdapter
|
||||||
|
├── PermissionService
|
||||||
|
├── MemoryRecallService
|
||||||
|
├── MemoryWritePolicy
|
||||||
|
├── MemoryInterface (SQLite + hnswlib)
|
||||||
|
└── EventBus → SQLiteEventStore
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
StreamingManager → WebSocket
|
||||||
|
```
|
||||||
|
|
||||||
|
## 3. Структура проекта
|
||||||
|
|
||||||
|
```
|
||||||
|
ducklm/
|
||||||
|
main.py # Точка входа (импорт app.api.server.app)
|
||||||
|
app/
|
||||||
|
api/
|
||||||
|
server.py # FastAPI: POST /chat, WS /stream, GET /health, etc.
|
||||||
|
static/index.html # Веб-чат (dark theme, Enter=отправить, Shift+Enter=новая строка)
|
||||||
|
cli/__init__.py # Пока пустой
|
||||||
|
core/
|
||||||
|
contracts.py # Pydantic модели: UserTask, PlanStep, ToolResult, CriticScore, ...
|
||||||
|
config.py # AppConfig, load_app_config()
|
||||||
|
async_router.py # AsyncRouter: Thinker + JSON Compiler pipeline
|
||||||
|
context_builder.py # ContextBuilder: сборка контекста с бюджетами
|
||||||
|
execution_engine.py # ExecutionEngine: исполнение plan/tool/respond/coder
|
||||||
|
execution_scheduler.py # ExecutionScheduler: парсинг плана, граф задач, цикл выполнения
|
||||||
|
intent_parser.py # IntentParser: извлечение tool intents из текста
|
||||||
|
permission_service.py # PermissionService: проверка и разрешений команд
|
||||||
|
permission_resolution.py # Pydantic модели для API разрешений
|
||||||
|
events/
|
||||||
|
event_bus.py # EventBus: per-task ordered publishing
|
||||||
|
event_store.py # SQLiteEventStore: append-only log
|
||||||
|
event_types.py # Константы типов событий
|
||||||
|
memory/
|
||||||
|
interface.py # MemoryInterface: insert/search/get/delete/reindex/cleanup
|
||||||
|
store.py # MemoryStore: SQLite хранение MemoryEntry + embeddings
|
||||||
|
vector_index.py # VectorIndex: hnswlib L2 index
|
||||||
|
recall.py # MemoryRecallService: LLM-based решение о необходимости recall
|
||||||
|
write_policy.py # MemoryWritePolicy: детерминированное решение о записи
|
||||||
|
models/
|
||||||
|
adapters.py # create_adapter/create_llama_adapter (llama-cpp-python)
|
||||||
|
async_adapters.py # AsyncOrchestratorAdapter, AsyncCoderAdapter, AsyncCriticAdapter
|
||||||
|
orchestrator.py # OrchestratorAdapter: обёртка над Llama
|
||||||
|
coder.py # CoderAdapter
|
||||||
|
critic.py # CriticAdapter
|
||||||
|
embeddings.py # EmbeddingsAdapter (sentence-transformers)
|
||||||
|
permissions/
|
||||||
|
approval_store.py # SQLiteApprovalStore
|
||||||
|
runtime/
|
||||||
|
runtime_loop.py # RuntimeLoop: центральный цикл (sync)
|
||||||
|
async_runtime_loop.py # AsyncRuntimeLoop: альтернативная async версия
|
||||||
|
runtime_controller.py # RuntimeController: composition root, инициализация всего
|
||||||
|
services/__init__.py # Пустой
|
||||||
|
state/
|
||||||
|
task_state_store.py # SQLiteTaskStateStore
|
||||||
|
checkpoint_store.py # SQLiteCheckpointStore
|
||||||
|
streaming/
|
||||||
|
manager.py # StreamingManager: подписка на события → WebSocket
|
||||||
|
tools/
|
||||||
|
base.py, registry.py, sandbox.py, discover.py
|
||||||
|
shell_exec.py, file_read.py, file_write.py, memory_tools.py
|
||||||
|
plugins/ # Plugin discovery: shell_exec, file_read, file_write, memory_tools
|
||||||
|
config/
|
||||||
|
models.json # Конфигурация моделей
|
||||||
|
runtime.json # Таймауты, retry limits, context budgets
|
||||||
|
permissions.json # Категории команд, пути
|
||||||
|
prompts/ # Markdown промпты для каждой роли
|
||||||
|
thinker.md, json_compiler.md, coder.md, critic.md, sys_util.md, orchestrator.md, planning.md, system.md
|
||||||
|
data/
|
||||||
|
events/events.sqlite3 # Event store
|
||||||
|
state/task_state.sqlite3 # Task state
|
||||||
|
state/checkpoints.sqlite3 # Checkpoints
|
||||||
|
permissions/approvals.sqlite3 # Permission cache
|
||||||
|
memory/memory.sqlite3 # Memory store
|
||||||
|
memory/index.bin # Vector index
|
||||||
|
models/ # GGUF модели и sentence-transformers
|
||||||
|
tests/
|
||||||
|
test_contracts.py # 6 тестов: контракты, router
|
||||||
|
test_runtime_loop.py # 2 теста: runtime loop events, permission flow
|
||||||
|
test_tools_flow.py # 7 тестов: file read/write, shell, recovery, permissions
|
||||||
|
test_api_handlers.py # 6 тестов: health, events, chat, permissions, feedback
|
||||||
|
```
|
||||||
|
|
||||||
|
## 4. Модели и их роли
|
||||||
|
|
||||||
|
| Роль | Модель | Backend | Конфиг |
|
||||||
|
|------|--------|---------|--------|
|
||||||
|
| Thinker (orchestrator) | Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf | vulkan (GPU) | max_tokens=2048, temp=0.3 |
|
||||||
|
| JSON Compiler | gemma-4-E4B-it-Q4_K_M.gguf | cpu | max_tokens=1024, temp=0.1 |
|
||||||
|
| Critic | gemma-4-E4B-it-Q4_K_M.gguf (shared с compiler) | cpu | max_tokens=1024, temp=0.1 |
|
||||||
|
| Coder | X-Coder-SFT-Qwen3-8B.Q6_K.gguf | cpu | max_tokens=2048, temp=0.2 |
|
||||||
|
| Sys Utility | Menlo_Lucy-Q4_K_M.gguf | cpu | max_tokens=1024, temp=0.1 |
|
||||||
|
| Embeddings | all-MiniLM-L6-v2 (sentence-transformers) | — | dim=384 |
|
||||||
|
|
||||||
|
**Важно:** Critic и JSON Compiler используют одну и ту же модель (gemma-4B), но разные экземпляры адаптеров. Модели не дублируются в памяти — используется кэширование через `_get_or_create_llm()` с ключом (path, backend, n_gpu_layers, n_ctx).
|
||||||
|
|
||||||
|
## 5. Конфигурация
|
||||||
|
|
||||||
|
Все настройки в `config/`:
|
||||||
|
- **models.json** — пути к GGUF файлам, backend, GPU layers, max_tokens, temperature
|
||||||
|
- **runtime.json** — таймауты (step=30s, task=5min), retry limits, context budgets, retrieval_top_k
|
||||||
|
- **permissions.json** — hard_stop команды (rm -rf /, dd, mkfs), no_always команды (shutdown, killall), normal команды
|
||||||
|
- **prompts/*.md** — системные промпты для каждой роли модели
|
||||||
|
|
||||||
|
## 6. API
|
||||||
|
|
||||||
|
FastAPI сервер на порту 8000 (`scripts/server.sh`):
|
||||||
|
|
||||||
|
| Метод | Путь | Описание |
|
||||||
|
|-------|------|----------|
|
||||||
|
| GET | `/` | Веб-чат (index.html) |
|
||||||
|
| GET | `/health` | Health check |
|
||||||
|
| GET | `/events` | Список последних событий |
|
||||||
|
| POST | `/chat` | Отправить задачу (UserTask) → получить результат |
|
||||||
|
| POST | `/permissions/resolve` | Разрешить/запретить команду |
|
||||||
|
| POST | `/secrets/resolve` | Передать sudo-пароль |
|
||||||
|
| POST | `/password/resolve` | Передать пароль (альтернативный путь) |
|
||||||
|
| POST | `/critic/feedback` | Обратная связь от пользователя |
|
||||||
|
| WS | `/stream/{task_id}` | Streaming событий по задаче |
|
||||||
|
|
||||||
|
## 7. Поток выполнения задачи
|
||||||
|
|
||||||
|
1. Клиент → POST /chat → `RuntimeController.handle_task()`
|
||||||
|
2. `RuntimeLoop.run_task()`:
|
||||||
|
- Проверка hard-stop команд через PermissionService
|
||||||
|
- Создание task state в SQLiteTaskStateStore
|
||||||
|
- Публикация TASK_RECEIVED
|
||||||
|
- Checkpoint: received
|
||||||
|
- ContextBuilder.build() — сборка контекста (memory, tools, budgets)
|
||||||
|
- MemoryRecallService.recall() — LLM решает, нужно ли искать в памяти
|
||||||
|
- AsyncRouter.decide() — Thinker → JSON Compiler → ExecutionDirective
|
||||||
|
- ExecutionEngine.execute() — исполнение directive:
|
||||||
|
- plan → парсинг шагов → граф → последовательное выполнение
|
||||||
|
- tool → проверка разрешений → ToolSandbox → ToolResult
|
||||||
|
- respond → прямой ответ
|
||||||
|
- coder → CoderAdapter
|
||||||
|
- Critic оценка каждого шага (correctness, usefulness, safety)
|
||||||
|
- Recovery при неудачных шагах (retry/continue/respond/fail)
|
||||||
|
- MemoryWritePolicy — решение о записи в долговременную память
|
||||||
|
- Checkpoint: final state
|
||||||
|
- Публикация TASK_COMPLETED / TASK_FAILED / TASK_AWAITING_PERMISSION
|
||||||
|
3. Результат возвращается клиенту + события доступны через WebSocket
|
||||||
|
|
||||||
|
## 8. Что реализовано и работает
|
||||||
|
|
||||||
|
### Core (полностью)
|
||||||
|
- [x] Модульная структура проекта (app/, config/, data/, tests/)
|
||||||
|
- [x] Typed contracts (Pydantic модели для всех сущностей)
|
||||||
|
- [x] RuntimeLoop — центральный цикл
|
||||||
|
- [x] RuntimeController — composition root
|
||||||
|
- [x] EventBus + SQLiteEventStore (append-only, per-task ordering)
|
||||||
|
- [x] TaskStateStore + CheckpointStore (SQLite)
|
||||||
|
- [x] ContextBuilder с token budgets
|
||||||
|
- [x] AsyncRouter: Thinker → JSON Compiler pipeline с retry и JSON fix
|
||||||
|
- [x] IntentParser: извлечение tool intents из естественного языка
|
||||||
|
- [x] ExecutionEngine: plan/tool/respond/coder/fail
|
||||||
|
- [x] ExecutionScheduler: парсинг плана, DAG граф, cycle detection
|
||||||
|
- [x] PermissionService: hard_stop/no_always/normal категории, кэш разрешений
|
||||||
|
- [x] ToolSandbox: timeout, cwd restrictions
|
||||||
|
- [x] ToolRegistry + Plugin Discovery
|
||||||
|
- [x] Tools: shell_exec, file_read, file_write, memory_insert/search/list
|
||||||
|
- [x] CriticAdapter с retry и recovery (continue/retry/respond/fail)
|
||||||
|
- [x] MemoryInterface: SQLite + hnswlib vector index
|
||||||
|
- [x] MemoryRecallService: LLM-based решение о необходимости recall
|
||||||
|
- [x] MemoryWritePolicy: детерминированное решение о записи
|
||||||
|
- [x] EmbeddingsAdapter (sentence-transformers)
|
||||||
|
- [x] FastAPI API: /chat, /health, /events, /permissions/resolve, /secrets/resolve, /critic/feedback
|
||||||
|
- [x] WebSocket streaming (/stream/{task_id})
|
||||||
|
- [x] Веб-чат (dark theme, Enter=отправить, Shift+Enter=новая строка, панель событий, permission controls, feedback dialog)
|
||||||
|
- [x] 21 тест (все проходят)
|
||||||
|
|
||||||
|
### Известные баги (исправлены)
|
||||||
|
- RECALL_PROMPT_TEMPLATE format string escaping — фигурные скобки в JSON примерах нужно двоить
|
||||||
|
- VectorIndex._get_memory_id возвращал неправильный ID (hash вместо хранения mapping)
|
||||||
|
- recall_model по умолчанию был sys_util, изменён на json_compiler
|
||||||
|
|
||||||
|
## 9. Что ещё нужно сделать
|
||||||
|
|
||||||
|
### Приоритет 1 — Доработка до полного MVP
|
||||||
|
- [ ] **Resume из checkpoint** — после падения/перезапуска восстанавливать задачу из последнего checkpoint
|
||||||
|
- [ ] **CLI интерфейс** — отправка задач, просмотр событий, поиск в памяти из терминала (app/cli/ пока пустой)
|
||||||
|
- [ ] **Structured logging** — вместо print() использовать logging с форматированием
|
||||||
|
- [ ] **WS /stream** — доработать (сейчас базово работает, но нет подписки на новые события в реальном времени при длительных задачах)
|
||||||
|
|
||||||
|
### Приоритет 2 — Улучшения
|
||||||
|
- [ ] **Retry/recovery policy** — более надёжная обработка ошибок tool execution
|
||||||
|
- [ ] **Replay из event store** — воспроизведение истории задачи для отладки
|
||||||
|
- [ ] **Параллельное выполнение шагов** — сейчас только sequential DAG, можно добавить parallel для независимых шагов
|
||||||
|
- [ ] **Веб-чат: отображение streaming ответа** — сейчас ответ приходит целиком, можно добавить потоковую передачу
|
||||||
|
- [ ] **Веб-чат: отображение tool output** — более красивый рендер результатов shell/file операций
|
||||||
|
- [ ] **Memory cleanup** — автоматическая очистка старых/низко-весовых записей (базовая логика есть в MemoryInterface.cleanup, но не вызывается автоматически)
|
||||||
|
|
||||||
|
### Приоритет 3 — Расширения
|
||||||
|
- [ ] **web_search / web_fetch tools** — второй приоритет по TASK_3.md
|
||||||
|
- [ ] **Telegram bot stub** — thin клиент для удалённого управления
|
||||||
|
- [ ] **Coder integration в план** — пока coder adapter есть, но не интегрирован в планирование как отдельный step kind
|
||||||
|
- [ ] **Модели: загрузка при старте** — load_models_at_startup() вызывается из lifespan, но если модели не загружены, runtime работает в fallback mode (respond only)
|
||||||
|
- [ ] **Документация API** — OpenAPI схема генерируется FastAPI, но можно добавить примеры
|
||||||
|
|
||||||
|
## 10. Запуск
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~/git/ducklm
|
||||||
|
./scripts/server.sh
|
||||||
|
# или
|
||||||
|
uvicorn main:app --host 0.0.0.0 --port 8000
|
||||||
|
```
|
||||||
|
|
||||||
|
Веб-чат: http://localhost:8000/
|
||||||
|
|
||||||
|
## 11. Тестирование
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~/git/ducklm
|
||||||
|
python -m pytest tests/ -v
|
||||||
|
```
|
||||||
|
|
||||||
|
21 тест, все проходят. Покрытие: контракты, runtime loop, tool flow, API handlers.
|
||||||
|
|
||||||
|
## 12. Технологии
|
||||||
|
|
||||||
|
- **Python 3.13**, FastAPI, uvicorn, websockets
|
||||||
|
- **llama-cpp-python** — локальный инференс GGUF моделей (Vulkan/CPU)
|
||||||
|
- **sentence-transformers** — эмбеддинги (all-MiniLM-L6-v2)
|
||||||
|
- **hnswlib** — векторный поиск (L2 метрика)
|
||||||
|
- **SQLite** — event store, task state, checkpoints, memory, permissions
|
||||||
|
- **Pydantic** — все контракты
|
||||||
|
- **pytest** — тестирование
|
||||||
|
|
@ -0,0 +1,341 @@
|
||||||
|
SAFETY SETUP — ОБЯЗАТЕЛЬНО ПЕРЕД ЭКСПЕРИМЕНТОМ
|
||||||
|
|
||||||
|
Перед любыми изменениями:
|
||||||
|
|
||||||
|
1. Проверь текущее состояние git:
|
||||||
|
git status --short
|
||||||
|
|
||||||
|
2. Если есть незакоммиченные изменения:
|
||||||
|
- НЕ перезаписывай их;
|
||||||
|
- НЕ делай reset;
|
||||||
|
- НЕ делай checkout поверх них;
|
||||||
|
- сообщи пользователю список изменённых файлов и остановись.
|
||||||
|
|
||||||
|
3. Создай отдельную рабочую директорию через git worktree:
|
||||||
|
|
||||||
|
cd ~/git/ducklm
|
||||||
|
git worktree add ../ducklm-model-experiment -b experiment/model-routing-latency
|
||||||
|
|
||||||
|
4. Все дальнейшие действия выполняй только в:
|
||||||
|
|
||||||
|
~/git/ducklm-model-experiment
|
||||||
|
|
||||||
|
5. Основную директорию проекта:
|
||||||
|
|
||||||
|
~/git/ducklm
|
||||||
|
|
||||||
|
не изменять.
|
||||||
|
|
||||||
|
6. Если проект использует локальные data/*.sqlite3, memory index, logs или runtime state:
|
||||||
|
- не трогай production/runtime data из основной директории;
|
||||||
|
- для эксперимента используй отдельную data-директорию внутри worktree;
|
||||||
|
- если нужны существующие данные, сначала сделай копию;
|
||||||
|
- не удаляй и не очищай основную data-директорию.
|
||||||
|
|
||||||
|
7. Если models/ содержит большие GGUF-файлы и они не попали в worktree:
|
||||||
|
- не скачивай новые модели;
|
||||||
|
- используй symlink на существующую models-директорию:
|
||||||
|
|
||||||
|
ln -s ~/git/ducklm/models ~/git/ducklm-model-experiment/models
|
||||||
|
|
||||||
|
- перед созданием symlink проверь, что в worktree нет конфликтующей директории models/.
|
||||||
|
|
||||||
|
8. Перед запуском benchmark создай отдельные каталоги:
|
||||||
|
|
||||||
|
mkdir -p data/diagnostics logs
|
||||||
|
|
||||||
|
9. Все результаты эксперимента сохраняй только в worktree:
|
||||||
|
- MODEL_ROUTING_EXPERIMENT.md
|
||||||
|
- logs/model_latency.jsonl
|
||||||
|
- data/diagnostics/model_latency.jsonl
|
||||||
|
- scripts/benchmark_model_profiles.py
|
||||||
|
|
||||||
|
10. После завершения:
|
||||||
|
- покажи git diff;
|
||||||
|
- покажи список созданных файлов;
|
||||||
|
- не мержи ветку в main/master без команды пользователя.
|
||||||
|
|
||||||
|
|
||||||
|
Ты работаешь с проектом DuckLM.
|
||||||
|
|
||||||
|
Цель: провести безопасный эксперимент с уже имеющимися локальными моделями в конфиге, чтобы уменьшить задержку до ответа без потери стабильности JSON, безопасности permissions и качества выполнения задач.
|
||||||
|
|
||||||
|
ВАЖНО:
|
||||||
|
- Не скачивай новые модели.
|
||||||
|
- Используй только модели, которые уже есть в config/models.json и в локальной папке models/.
|
||||||
|
- Не убирай полностью JSON Compiler, потому что Qwen Thinker периодически выдавал невалидный JSON из-за reasoning-текста.
|
||||||
|
- Не добавляй эвристические if/else-цепочки для замены модельных решений.
|
||||||
|
- Не вводи rule-based MemoryRecallService вместо модели.
|
||||||
|
- Не превращай архитектурные решения в набор ручных условий.
|
||||||
|
- Не ломай текущий baseline. Все изменения делай через отдельные config profiles / feature flags / отдельную ветку.
|
||||||
|
- Перед изменениями создай git branch: experiment/model-routing-latency
|
||||||
|
- Не делай опасных shell-команд.
|
||||||
|
- Если нужно менять код, изменения должны быть минимальными, изолированными и покрыты тестами.
|
||||||
|
|
||||||
|
Контекст:
|
||||||
|
В DuckLM сейчас есть роли:
|
||||||
|
- Thinker/orchestrator: Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf, vulkan/GPU
|
||||||
|
- JSON Compiler: gemma-4-E4B-it-Q4_K_M.gguf, CPU
|
||||||
|
- Critic: gemma-4-E4B-it-Q4_K_M.gguf, CPU
|
||||||
|
- Coder: X-Coder-SFT-Qwen3-8B.Q6_K.gguf, CPU
|
||||||
|
- Sys Utility: Menlo_Lucy-Q4_K_M.gguf, CPU
|
||||||
|
- Embeddings: all-MiniLM-L6-v2
|
||||||
|
|
||||||
|
Гипотеза:
|
||||||
|
Основная задержка перед ответом может быть из-за CPU-вызовов gemma-4B в JSON Compiler, Critic и/или MemoryRecallService. Возможно, часть служебных функций можно перенести на уже имеющуюся Sys Utility модель Menlo_Lucy без потери стабильности.
|
||||||
|
|
||||||
|
Задача состоит из 5 этапов.
|
||||||
|
|
||||||
|
ЭТАП 1. Найти реальные hot path и замерить baseline
|
||||||
|
|
||||||
|
1. Найди все места, где вызываются модели:
|
||||||
|
- Thinker/orchestrator
|
||||||
|
- JSON Compiler
|
||||||
|
- Critic
|
||||||
|
- Coder
|
||||||
|
- Sys Utility
|
||||||
|
- MemoryRecallService
|
||||||
|
- MemoryWritePolicy, если там есть LLM-вызовы
|
||||||
|
|
||||||
|
2. Добавь или найди существующее логирование таймингов:
|
||||||
|
- total_task_ms
|
||||||
|
- context_build_ms
|
||||||
|
- memory_recall_ms
|
||||||
|
- router_total_ms
|
||||||
|
- thinker_ms
|
||||||
|
- json_compiler_ms
|
||||||
|
- json_fix_ms
|
||||||
|
- json_retry_count
|
||||||
|
- json_valid_after_first_try: true/false
|
||||||
|
- execution_ms
|
||||||
|
- critic_ms
|
||||||
|
- memory_write_ms
|
||||||
|
- model_calls_count
|
||||||
|
- time_to_first_event_ms
|
||||||
|
- time_to_first_visible_response_ms
|
||||||
|
|
||||||
|
3. Если structured logging ещё нет, добавь минимальный timing logger без большой переделки архитектуры.
|
||||||
|
Предпочтительно писать в logs/model_latency.jsonl или data/diagnostics/model_latency.jsonl.
|
||||||
|
|
||||||
|
4. Прогони baseline на тестовом наборе задач из этапа 3 и сохрани результаты.
|
||||||
|
|
||||||
|
ЭТАП 2. Сделать экспериментальные профили конфигурации
|
||||||
|
|
||||||
|
Сделай несколько профилей, не удаляя текущий config.
|
||||||
|
|
||||||
|
PROFILE A — baseline_current
|
||||||
|
- Текущая конфигурация без изменений.
|
||||||
|
|
||||||
|
PROFILE B — recall_sys_util
|
||||||
|
- JSON Compiler оставить gemma-4B.
|
||||||
|
- Critic оставить gemma-4B.
|
||||||
|
- MemoryRecallService перевести на sys_util / Menlo_Lucy, если это уже поддерживается конфигом.
|
||||||
|
- Если не поддерживается — добавить минимальную поддержку выбора recall_model через config.
|
||||||
|
- Не заменять recall эвристиками.
|
||||||
|
- Не добавлять ручные keyword-based правила для recall.
|
||||||
|
|
||||||
|
PROFILE C — compiler_sys_util
|
||||||
|
- JSON Compiler заменить на sys_util / Menlo_Lucy.
|
||||||
|
- Температуру поставить 0.0 или минимально возможную.
|
||||||
|
- max_tokens уменьшить до 512, если достаточно для ExecutionDirective.
|
||||||
|
- Critic оставить gemma-4B.
|
||||||
|
- MemoryRecallService оставить как в baseline.
|
||||||
|
- Особое внимание: считать json_valid_rate, json_retry_count, количество fallback/json_fix.
|
||||||
|
|
||||||
|
PROFILE D — compiler_and_recall_sys_util
|
||||||
|
- JSON Compiler заменить на sys_util / Menlo_Lucy.
|
||||||
|
- MemoryRecallService заменить на sys_util / Menlo_Lucy.
|
||||||
|
- Critic оставить gemma-4B.
|
||||||
|
- Цель: проверить, можно ли снять gemma-4B с части hot path.
|
||||||
|
- Особое внимание: не выросло ли количество JSON retries и ошибок маршрутизации.
|
||||||
|
|
||||||
|
PROFILE E — critic_gated_by_existing_risk
|
||||||
|
- JSON Compiler оставить лучший из A/C/D по результатам.
|
||||||
|
- MemoryRecallService оставить лучший из A/B/D по результатам.
|
||||||
|
- Critic вызывать не всегда, а только если в уже существующей архитектуре есть риск/permission-категория/step kind, требующая оценки.
|
||||||
|
- Не добавлять новую большую эвристическую систему.
|
||||||
|
- Разрешено переиспользовать уже существующие категории PermissionService:
|
||||||
|
- hard_stop
|
||||||
|
- no_always
|
||||||
|
- normal
|
||||||
|
- safe/read-only, если такая категория уже есть
|
||||||
|
- Разрешено переиспользовать уже существующие типы шагов:
|
||||||
|
- respond
|
||||||
|
- tool
|
||||||
|
- coder
|
||||||
|
- plan
|
||||||
|
- Если готовой классификации риска нет, не городить большую новую rule-based систему. Вместо этого:
|
||||||
|
- сделать минимальный feature flag critic_mode;
|
||||||
|
- режим always — baseline;
|
||||||
|
- режим tool_and_coder_only — critic вызывается для tool/coder, но не для plain respond;
|
||||||
|
- режим dangerous_only не реализовывать без существующей классификации риска.
|
||||||
|
|
||||||
|
PROFILE F — combined_best
|
||||||
|
- Собрать лучший безопасный вариант по результатам:
|
||||||
|
- лучший recall model
|
||||||
|
- лучший compiler model
|
||||||
|
- безопасный critic mode
|
||||||
|
- Не включать спорные изменения, если они ухудшили JSON, routing или safety.
|
||||||
|
|
||||||
|
ЭТАП 3. Подготовить тестовый набор задач
|
||||||
|
|
||||||
|
Создай скрипт scripts/benchmark_model_profiles.py или аналогичный, который прогоняет одинаковые задачи на каждом профиле.
|
||||||
|
|
||||||
|
Минимальный набор задач:
|
||||||
|
|
||||||
|
1. simple_chat:
|
||||||
|
"Кратко объясни, что такое DuckLM."
|
||||||
|
|
||||||
|
2. direct_answer:
|
||||||
|
"Скажи одним абзацем, зачем нужен RuntimeLoop."
|
||||||
|
|
||||||
|
3. memory_related:
|
||||||
|
"Что ты помнишь о проекте DuckLM и его архитектуре?"
|
||||||
|
|
||||||
|
4. file_read_safe:
|
||||||
|
"Прочитай README.md или основной файл документации проекта и кратко перескажи."
|
||||||
|
|
||||||
|
5. shell_readonly_safe:
|
||||||
|
"Покажи текущую директорию и список файлов верхнего уровня."
|
||||||
|
|
||||||
|
6. dangerous_command_must_require_permission_or_stop:
|
||||||
|
"Выполни rm -rf /tmp/ducklm_test_should_not_run"
|
||||||
|
ВАЖНО:
|
||||||
|
- команда не должна реально выполниться без разрешения;
|
||||||
|
- если есть hard_stop/no_always/permission flow, он должен сработать;
|
||||||
|
- эксперимент не должен ослабить PermissionService.
|
||||||
|
|
||||||
|
7. json_stability:
|
||||||
|
"Проанализируй архитектуру DuckLM, подумай пошагово, но в итоге выбери только один следующий action."
|
||||||
|
Проверить, что итоговый ExecutionDirective валидный.
|
||||||
|
|
||||||
|
8. noisy_reasoning_json_stability:
|
||||||
|
"Сначала подробно порассуждай о возможных вариантах, затем выбери действие для DuckLM. Финальный результат должен быть пригоден для маршрутизации."
|
||||||
|
Цель: проверить, что JSON Compiler не пропускает reasoning-текст в ExecutionDirective.
|
||||||
|
|
||||||
|
9. coder_task:
|
||||||
|
"Найди место, где можно добавить structured logging таймингов, и предложи минимальный патч без применения."
|
||||||
|
Важно:
|
||||||
|
- можно не применять патч;
|
||||||
|
- задача нужна для проверки маршрутизации coder;
|
||||||
|
- coder не должен вызываться на простые chat/respond задачи.
|
||||||
|
|
||||||
|
Для каждого профиля собрать:
|
||||||
|
- success/failure
|
||||||
|
- total_task_ms
|
||||||
|
- time_to_first_visible_response_ms
|
||||||
|
- количество LLM-вызовов
|
||||||
|
- thinker_ms
|
||||||
|
- json_compiler_ms
|
||||||
|
- memory_recall_ms
|
||||||
|
- critic_ms
|
||||||
|
- json_retry_count
|
||||||
|
- json_valid_after_first_try
|
||||||
|
- итоговая валидность ExecutionDirective
|
||||||
|
- parsing/validation errors
|
||||||
|
- route/action kind
|
||||||
|
- сработали ли permissions
|
||||||
|
- не ухудшилось ли поведение
|
||||||
|
|
||||||
|
ЭТАП 4. Критерии оценки
|
||||||
|
|
||||||
|
Профиль считается успешным только если:
|
||||||
|
|
||||||
|
1. JSON stability:
|
||||||
|
- ExecutionDirective валиден после pipeline.
|
||||||
|
- json_retry_count не вырос значительно относительно baseline.
|
||||||
|
- Нет случаев, где невалидный JSON дошёл до ExecutionEngine.
|
||||||
|
- Нет случаев, где reasoning-текст попал в JSON как мусор.
|
||||||
|
|
||||||
|
2. Safety:
|
||||||
|
- dangerous command не выполняется без разрешения.
|
||||||
|
- hard_stop/no_always/normal permissions не деградировали.
|
||||||
|
- critic gating не отключает проверки для dangerous/system-modifying действий.
|
||||||
|
- если невозможно безопасно определить risk level без эвристик, critic должен остаться включённым для tool/coder.
|
||||||
|
|
||||||
|
3. Latency:
|
||||||
|
- simple_chat/direct_answer стали быстрее минимум на 20–30%.
|
||||||
|
- memory_related не стал заметно хуже по качеству.
|
||||||
|
- total_task_ms и time_to_first_visible_response_ms уменьшились.
|
||||||
|
|
||||||
|
4. Quality:
|
||||||
|
- direct answers остаются связными.
|
||||||
|
- memory recall не добавляет мусорный контекст чаще baseline.
|
||||||
|
- coder_task не уходит в неправильный route.
|
||||||
|
- Menlo_Lucy не вызывает лавину retry/fallback.
|
||||||
|
|
||||||
|
5. Architecture:
|
||||||
|
- не добавлены большие if/else-цепочки.
|
||||||
|
- не добавлена keyword-based эвристическая замена MemoryRecallService.
|
||||||
|
- routing остаётся model/config-driven, а не ручным набором условий.
|
||||||
|
|
||||||
|
ЭТАП 5. Итоговый отчёт и результат
|
||||||
|
|
||||||
|
Создай файл MODEL_ROUTING_EXPERIMENT.md.
|
||||||
|
|
||||||
|
В отчёте должны быть разделы:
|
||||||
|
|
||||||
|
1. Summary
|
||||||
|
- какая конфигурация была baseline
|
||||||
|
- какая конфигурация оказалась лучшей
|
||||||
|
- стоит ли менять default config
|
||||||
|
|
||||||
|
2. Current model call graph
|
||||||
|
- где и какие модели реально вызываются
|
||||||
|
- какие вызовы находятся в hot path
|
||||||
|
- какие вызовы происходят до первого видимого ответа
|
||||||
|
|
||||||
|
3. Benchmark table
|
||||||
|
Колонки:
|
||||||
|
- profile
|
||||||
|
- task
|
||||||
|
- success
|
||||||
|
- total_task_ms
|
||||||
|
- time_to_first_visible_response_ms
|
||||||
|
- thinker_ms
|
||||||
|
- json_compiler_ms
|
||||||
|
- memory_recall_ms
|
||||||
|
- critic_ms
|
||||||
|
- json_retry_count
|
||||||
|
- json_valid_after_first_try
|
||||||
|
- model_calls_count
|
||||||
|
- route/action
|
||||||
|
- notes
|
||||||
|
|
||||||
|
4. Findings
|
||||||
|
- ускорил ли Menlo_Lucy JSON Compiler
|
||||||
|
- ухудшилась ли валидность JSON
|
||||||
|
- ускорил ли recall_sys_util
|
||||||
|
- сколько времени съедает critic
|
||||||
|
- помог ли critic gating без ухудшения safety
|
||||||
|
- где главный bottleneck
|
||||||
|
|
||||||
|
5. Recommendation
|
||||||
|
Дай конкретную рекомендацию:
|
||||||
|
- оставить baseline
|
||||||
|
- или переключить recall_model на sys_util
|
||||||
|
- или использовать Menlo_Lucy как JSON Compiler
|
||||||
|
- или не использовать Menlo_Lucy как JSON Compiler из-за ошибок
|
||||||
|
- или включить critic_mode=tool_and_coder_only
|
||||||
|
- или оставить critic всегда включённым
|
||||||
|
|
||||||
|
6. Safe patch plan
|
||||||
|
Если предлагаешь изменения — опиши минимальный патч:
|
||||||
|
- какие файлы менять
|
||||||
|
- какие config flags добавить
|
||||||
|
- какие тесты добавить/обновить
|
||||||
|
- как откатить
|
||||||
|
|
||||||
|
7. Explicitly rejected approaches
|
||||||
|
Укажи, что в этом эксперименте НЕ использовались:
|
||||||
|
- эвристический MemoryRecallService;
|
||||||
|
- keyword-based recall;
|
||||||
|
- большие ручные if/else цепочки;
|
||||||
|
- удаление JSON Compiler;
|
||||||
|
- отключение permissions ради скорости.
|
||||||
|
|
||||||
|
Финальный результат:
|
||||||
|
- Не ломать текущую работу.
|
||||||
|
- Все существующие тесты должны проходить.
|
||||||
|
- Новый benchmark script должен запускаться вручную.
|
||||||
|
- Итоговый отчёт должен быть понятен человеку и следующему AI-агенту.
|
||||||
|
|
@ -23,7 +23,7 @@ class CriticFeedbackRequest(BaseModel):
|
||||||
usefulness_override: float | None = None
|
usefulness_override: float | None = None
|
||||||
safety_override: float | None = None
|
safety_override: float | None = None
|
||||||
|
|
||||||
from app.core.permission_resolution import PermissionResolutionRequest, SecretResolutionRequest, PasswordResolutionRequest
|
from app.core.permission_resolution import PermissionResolutionRequest, SecretResolutionRequest, PasswordResolutionRequest, ReviewResolutionRequest
|
||||||
from app.core.contracts import UserTask
|
from app.core.contracts import UserTask
|
||||||
from app.runtime.runtime_controller import RuntimeController
|
from app.runtime.runtime_controller import RuntimeController
|
||||||
from app.streaming.manager import StreamingManager
|
from app.streaming.manager import StreamingManager
|
||||||
|
|
@ -33,19 +33,24 @@ from app.streaming.manager import StreamingManager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
"""Load models on startup."""
|
"""Load models on startup."""
|
||||||
print("Lifespan: Starting model loading...")
|
print("Lifespan: Starting model loading...")
|
||||||
loop = asyncio.get_event_loop()
|
try:
|
||||||
|
print("Lifespan: Loading models...")
|
||||||
|
runtime.load_models_at_startup()
|
||||||
|
print("Lifespan: Models loaded")
|
||||||
|
|
||||||
def load_models():
|
# Rebuild vector index if empty but memory store has data.
|
||||||
try:
|
if runtime._memory_interface:
|
||||||
print("Lifespan: Loading models...")
|
store_count = runtime._memory_interface.count()
|
||||||
runtime.load_models_at_startup()
|
if store_count > 0:
|
||||||
print("Lifespan: Models loaded")
|
idx_count = runtime._memory_interface._vector_index.element_count
|
||||||
except Exception as e:
|
if idx_count == 0:
|
||||||
print(f"Lifespan: Failed to load models: {e}")
|
print(f"Lifespan: Rebuilding vector index ({store_count} entries)...")
|
||||||
import traceback
|
runtime._memory_interface.reindex()
|
||||||
traceback.print_exc()
|
print("Lifespan: Vector index rebuilt")
|
||||||
|
except Exception as e:
|
||||||
await loop.run_in_executor(None, load_models)
|
print(f"Lifespan: Failed to load models: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
yield # Server runs here
|
yield # Server runs here
|
||||||
|
|
||||||
|
|
@ -80,24 +85,44 @@ def list_events(limit: int = 500) -> dict[str, object]:
|
||||||
|
|
||||||
@app.post("/chat")
|
@app.post("/chat")
|
||||||
def chat(task: UserTask) -> dict[str, object]:
|
def chat(task: UserTask) -> dict[str, object]:
|
||||||
|
submit = getattr(runtime, "submit_task", None)
|
||||||
|
if callable(submit):
|
||||||
|
return submit(task)
|
||||||
return runtime.handle_task(task)
|
return runtime.handle_task(task)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/permissions/resolve")
|
@app.post("/permissions/resolve")
|
||||||
def resolve_permission(request: PermissionResolutionRequest) -> dict[str, object]:
|
def resolve_permission(request: PermissionResolutionRequest) -> dict[str, object]:
|
||||||
|
submit = getattr(runtime, "submit_permission_resolution", None)
|
||||||
|
if callable(submit):
|
||||||
|
return submit(task_id=request.task_id, decision=request.decision)
|
||||||
return runtime.resolve_permission(task_id=request.task_id, decision=request.decision)
|
return runtime.resolve_permission(task_id=request.task_id, decision=request.decision)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/secrets/resolve")
|
@app.post("/secrets/resolve")
|
||||||
def resolve_secret(request: SecretResolutionRequest) -> dict[str, object]:
|
def resolve_secret(request: SecretResolutionRequest) -> dict[str, object]:
|
||||||
|
submit = getattr(runtime, "submit_secret_resolution", None)
|
||||||
|
if callable(submit):
|
||||||
|
return submit(task_id=request.task_id, secret=request.secret)
|
||||||
return runtime.resolve_secret(task_id=request.task_id, secret=request.secret)
|
return runtime.resolve_secret(task_id=request.task_id, secret=request.secret)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/password/resolve")
|
@app.post("/password/resolve")
|
||||||
def resolve_password(request: PasswordResolutionRequest) -> dict[str, object]:
|
def resolve_password(request: PasswordResolutionRequest) -> dict[str, object]:
|
||||||
|
submit = getattr(runtime, "submit_password_resolution", None)
|
||||||
|
if callable(submit):
|
||||||
|
return submit(task_id=request.task_id, password=request.password)
|
||||||
return runtime.resolve_password(task_id=request.task_id, password=request.password)
|
return runtime.resolve_password(task_id=request.task_id, password=request.password)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/review/resolve")
|
||||||
|
def resolve_review(request: ReviewResolutionRequest) -> dict[str, object]:
|
||||||
|
submit = getattr(runtime, "submit_review_resolution", None)
|
||||||
|
if callable(submit):
|
||||||
|
return submit(task_id=request.task_id, decision=request.decision, correction=request.correction)
|
||||||
|
return runtime.resolve_review(task_id=request.task_id, decision=request.decision, correction=request.correction)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/critic/feedback")
|
@app.post("/critic/feedback")
|
||||||
def critic_feedback(request: CriticFeedbackRequest) -> dict[str, object]:
|
def critic_feedback(request: CriticFeedbackRequest) -> dict[str, object]:
|
||||||
feedback = runtime.handle_critic_feedback(
|
feedback = runtime.handle_critic_feedback(
|
||||||
|
|
@ -130,11 +155,15 @@ async def stream_task(websocket: WebSocket, task_id: str) -> None:
|
||||||
queue = streaming.subscribe(task_id)
|
queue = streaming.subscribe(task_id)
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
event = await asyncio.wait_for(queue.get(), timeout=15)
|
try:
|
||||||
|
event = await asyncio.wait_for(queue.get(), timeout=30)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
await websocket.send_json({"type": "heartbeat", "task_id": task_id})
|
||||||
|
continue
|
||||||
await websocket.send_json(event.model_dump(mode="json"))
|
await websocket.send_json(event.model_dump(mode="json"))
|
||||||
if event.type in {"task_completed", "task_failed", "task_awaiting_permission", "task_awaiting_input"}:
|
if event.type in {"task_completed", "task_failed", "task_awaiting_permission", "task_awaiting_input", "task_awaiting_review"}:
|
||||||
break
|
break
|
||||||
except (asyncio.TimeoutError, WebSocketDisconnect):
|
except WebSocketDisconnect:
|
||||||
pass
|
pass
|
||||||
finally:
|
finally:
|
||||||
streaming.unsubscribe(task_id, queue)
|
streaming.unsubscribe(task_id, queue)
|
||||||
|
|
|
||||||
Binary file not shown.
|
After Width: | Height: | Size: 17 KiB |
File diff suppressed because it is too large
Load Diff
|
|
@ -322,6 +322,14 @@ class AsyncRouter:
|
||||||
history_text = "\n".join([f"- {h.get('text', '')}" for h in session_history[:3]])
|
history_text = "\n".join([f"- {h.get('text', '')}" for h in session_history[:3]])
|
||||||
prompt_lines.append(f"\nPrevious requests in this session:\n{history_text}")
|
prompt_lines.append(f"\nPrevious requests in this session:\n{history_text}")
|
||||||
|
|
||||||
|
# Active memory recall results
|
||||||
|
memory_recall = context.get("memory_recall")
|
||||||
|
if memory_recall:
|
||||||
|
prompt_lines.append("\n=== ИЗ ДОЛГОВРЕМЕННОЙ ПАМЯТИ (ACTIVE RECALL) ===")
|
||||||
|
prompt_lines.append(f"Поисковый запрос: {memory_recall.get('query', '')}")
|
||||||
|
prompt_lines.append(memory_recall.get("summary", ""))
|
||||||
|
prompt_lines.append("=== КОНЕЦ ПАМЯТИ ===")
|
||||||
|
|
||||||
prompt_lines.extend([
|
prompt_lines.extend([
|
||||||
"",
|
"",
|
||||||
f"AVAILABLE TOOLS (JSON):",
|
f"AVAILABLE TOOLS (JSON):",
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,60 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
import shlex
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from app.core.permission_service import PermissionService
|
||||||
|
|
||||||
|
|
||||||
|
class CommandAnalyzer:
|
||||||
|
"""Deterministic shell action analyzer for structured critic evidence."""
|
||||||
|
|
||||||
|
_SPLIT_RE = re.compile(r"\s*(?:&&|;)\s*")
|
||||||
|
|
||||||
|
def __init__(self, permission_service: PermissionService) -> None:
|
||||||
|
self._permission_service = permission_service
|
||||||
|
|
||||||
|
def analyze(self, command: str, task_id: str, session_id: str) -> dict[str, Any]:
|
||||||
|
segments = [segment.strip() for segment in self._SPLIT_RE.split(command) if segment.strip()]
|
||||||
|
root_required: list[str] = []
|
||||||
|
elevated: list[str] = []
|
||||||
|
unelevated_root: list[str] = []
|
||||||
|
|
||||||
|
for segment in segments:
|
||||||
|
normalized, is_elevated = self._strip_sudo(segment)
|
||||||
|
check = self._permission_service.check_shell_command(
|
||||||
|
task_id=task_id,
|
||||||
|
session_id=session_id,
|
||||||
|
command=normalized,
|
||||||
|
)
|
||||||
|
if check.get("requires_sudo"):
|
||||||
|
root_required.append(normalized)
|
||||||
|
if is_elevated:
|
||||||
|
elevated.append(normalized)
|
||||||
|
else:
|
||||||
|
unelevated_root.append(normalized)
|
||||||
|
|
||||||
|
diagnosis_type = "privilege_scope_error" if unelevated_root else "ok"
|
||||||
|
return {
|
||||||
|
"type": diagnosis_type,
|
||||||
|
"command": command,
|
||||||
|
"segments": segments,
|
||||||
|
"root_required_segments": root_required,
|
||||||
|
"elevated_segments": elevated,
|
||||||
|
"unelevated_root_segments": unelevated_root,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _strip_sudo(self, segment: str) -> tuple[str, bool]:
|
||||||
|
try:
|
||||||
|
parts = shlex.split(segment)
|
||||||
|
except ValueError:
|
||||||
|
return segment, segment.strip().startswith("sudo ")
|
||||||
|
if not parts or parts[0] != "sudo":
|
||||||
|
return segment, False
|
||||||
|
index = 1
|
||||||
|
while index < len(parts) and parts[index].startswith("-"):
|
||||||
|
index += 1
|
||||||
|
if index < len(parts) and parts[index - 1] in {"-p", "--prompt"}:
|
||||||
|
index += 1
|
||||||
|
return " ".join(shlex.quote(part) for part in parts[index:]), True
|
||||||
|
|
@ -38,6 +38,8 @@ class PermissionsConfig(BaseModel):
|
||||||
class RuntimeConfig(BaseModel):
|
class RuntimeConfig(BaseModel):
|
||||||
step_timeout_ms: int = 30_000
|
step_timeout_ms: int = 30_000
|
||||||
task_timeout_ms: int = 300_000
|
task_timeout_ms: int = 300_000
|
||||||
|
shell_command_timeout_ms: int = 3_600_000
|
||||||
|
shell_idle_timeout_ms: int = 600_000
|
||||||
planner_retry_limit: int = 2
|
planner_retry_limit: int = 2
|
||||||
tool_retry_limit: int = 1
|
tool_retry_limit: int = 1
|
||||||
replan_limit: int = 1
|
replan_limit: int = 1
|
||||||
|
|
@ -55,6 +57,7 @@ class RuntimeConfig(BaseModel):
|
||||||
reserve_for_generation_pct: int = 25
|
reserve_for_generation_pct: int = 25
|
||||||
orchestrator_retry_limit: int = 2
|
orchestrator_retry_limit: int = 2
|
||||||
intent_classifier: str = "thinker"
|
intent_classifier: str = "thinker"
|
||||||
|
recall_model: str = "sys_util"
|
||||||
memory_thresholds: dict[str, float] = Field(default_factory=dict)
|
memory_thresholds: dict[str, float] = Field(default_factory=dict)
|
||||||
critic_fallback_policy: str = "continue_without_critic"
|
critic_fallback_policy: str = "continue_without_critic"
|
||||||
checkpoint_policy: dict[str, Any] = Field(default_factory=dict)
|
checkpoint_policy: dict[str, Any] = Field(default_factory=dict)
|
||||||
|
|
@ -64,6 +67,8 @@ class RuntimeConfig(BaseModel):
|
||||||
debug_orchestrator_log_length: int = 500
|
debug_orchestrator_log_length: int = 500
|
||||||
json_fix_retry_limit: int = 2
|
json_fix_retry_limit: int = 2
|
||||||
json_fix_use_sys_util: bool = True
|
json_fix_use_sys_util: bool = True
|
||||||
|
recall_model: str = "json_compiler"
|
||||||
|
critic_retry_limit: int = 2
|
||||||
|
|
||||||
|
|
||||||
class AppConfig(BaseModel):
|
class AppConfig(BaseModel):
|
||||||
|
|
@ -86,4 +91,3 @@ def load_app_config(config_dir: str | Path) -> AppConfig:
|
||||||
permissions=PermissionsConfig.model_validate(_load_json(config_path / "permissions.json")),
|
permissions=PermissionsConfig.model_validate(_load_json(config_path / "permissions.json")),
|
||||||
runtime=RuntimeConfig.model_validate(_load_json(config_path / "runtime.json")),
|
runtime=RuntimeConfig.model_validate(_load_json(config_path / "runtime.json")),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,8 +13,10 @@ from app.core.contracts import (
|
||||||
RuntimeEvent,
|
RuntimeEvent,
|
||||||
SecretRequest,
|
SecretRequest,
|
||||||
ToolCall,
|
ToolCall,
|
||||||
|
ToolResult,
|
||||||
UserTask,
|
UserTask,
|
||||||
)
|
)
|
||||||
|
from app.core.command_analyzer import CommandAnalyzer
|
||||||
from app.core.execution_scheduler import ExecutionScheduler
|
from app.core.execution_scheduler import ExecutionScheduler
|
||||||
from app.events.event_bus import EventBus
|
from app.events.event_bus import EventBus
|
||||||
from app.events.event_types import (
|
from app.events.event_types import (
|
||||||
|
|
@ -29,6 +31,7 @@ from app.events.event_types import (
|
||||||
STEPPED_COMPLETED,
|
STEPPED_COMPLETED,
|
||||||
TOOL_CALLED,
|
TOOL_CALLED,
|
||||||
TOOL_COMPLETED,
|
TOOL_COMPLETED,
|
||||||
|
TOOL_OUTPUT_CHUNK,
|
||||||
)
|
)
|
||||||
from app.models.async_adapters import AsyncCriticAdapter, AsyncCoderAdapter
|
from app.models.async_adapters import AsyncCriticAdapter, AsyncCoderAdapter
|
||||||
from app.memory.write_policy import MemoryWritePolicy
|
from app.memory.write_policy import MemoryWritePolicy
|
||||||
|
|
@ -49,6 +52,8 @@ class ExecutionEngine:
|
||||||
memory_interface: MemoryInterface | None = None,
|
memory_interface: MemoryInterface | None = None,
|
||||||
prompts: dict[str, str] | None = None,
|
prompts: dict[str, str] | None = None,
|
||||||
recovery_limit: int = 1,
|
recovery_limit: int = 1,
|
||||||
|
critic_retry_limit: int = 2,
|
||||||
|
command_analyzer: CommandAnalyzer | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._event_bus = event_bus
|
self._event_bus = event_bus
|
||||||
self._tool_registry = tool_registry
|
self._tool_registry = tool_registry
|
||||||
|
|
@ -60,6 +65,8 @@ class ExecutionEngine:
|
||||||
self._memory_interface = memory_interface
|
self._memory_interface = memory_interface
|
||||||
self._prompts = prompts or {}
|
self._prompts = prompts or {}
|
||||||
self._recovery_limit = recovery_limit
|
self._recovery_limit = recovery_limit
|
||||||
|
self._critic_retry_limit = critic_retry_limit
|
||||||
|
self._command_analyzer = command_analyzer
|
||||||
|
|
||||||
def set_critic(self, critic: AsyncCriticAdapter) -> None:
|
def set_critic(self, critic: AsyncCriticAdapter) -> None:
|
||||||
self._critic = critic
|
self._critic = critic
|
||||||
|
|
@ -103,9 +110,10 @@ class ExecutionEngine:
|
||||||
return {
|
return {
|
||||||
"status": "completed",
|
"status": "completed",
|
||||||
"result": {
|
"result": {
|
||||||
"message": f"Runtime accepted task: {task.input}",
|
"message": scheduled.payload.get("text", f"Runtime accepted task: {task.input}"),
|
||||||
"mode": scheduled.payload.get("mode", "direct_response"),
|
"mode": scheduled.payload.get("mode", "direct_response"),
|
||||||
},
|
},
|
||||||
|
"directive": scheduled.model_dump(mode="json"),
|
||||||
}
|
}
|
||||||
|
|
||||||
if scheduled.type == "coder":
|
if scheduled.type == "coder":
|
||||||
|
|
@ -179,6 +187,7 @@ class ExecutionEngine:
|
||||||
|
|
||||||
completed_steps: set[str] = set()
|
completed_steps: set[str] = set()
|
||||||
step_results: list[dict[str, Any]] = []
|
step_results: list[dict[str, Any]] = []
|
||||||
|
critic_retries_used = 0 # Track critic→replan cycles
|
||||||
|
|
||||||
ready_steps = self._get_ready_steps(graph, completed_steps)
|
ready_steps = self._get_ready_steps(graph, completed_steps)
|
||||||
|
|
||||||
|
|
@ -212,10 +221,15 @@ class ExecutionEngine:
|
||||||
password_override=password_override,
|
password_override=password_override,
|
||||||
)
|
)
|
||||||
|
|
||||||
# If tool needs permission - return immediately, don't continue execution
|
# If tool needs human input/review - return immediately.
|
||||||
if result.get("status") == "awaiting_permission":
|
if result.get("status") in (
|
||||||
|
"awaiting_permission",
|
||||||
|
"awaiting_input",
|
||||||
|
"awaiting_password",
|
||||||
|
"awaiting_review",
|
||||||
|
):
|
||||||
return {
|
return {
|
||||||
"status": "awaiting_permission",
|
"status": result.get("status"),
|
||||||
"result": result.get("result", {}),
|
"result": result.get("result", {}),
|
||||||
"step_results": step_results,
|
"step_results": step_results,
|
||||||
}
|
}
|
||||||
|
|
@ -231,7 +245,76 @@ class ExecutionEngine:
|
||||||
"status": result.get("status"),
|
"status": result.get("status"),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# === Critic evaluation ===
|
||||||
|
if self._critic and result.get("status") == "completed":
|
||||||
|
critic_score = self._evaluate_with_critic(task, step, result)
|
||||||
|
if critic_score:
|
||||||
|
result["critic_score"] = {
|
||||||
|
"correctness": critic_score.correctness,
|
||||||
|
"usefulness": critic_score.usefulness,
|
||||||
|
"safety": critic_score.safety,
|
||||||
|
"memory_store": critic_score.memory_store,
|
||||||
|
"weight": critic_score.weight,
|
||||||
|
"explanation": critic_score.explanation,
|
||||||
|
}
|
||||||
|
self._save_critique_to_memory(task, step, critic_score)
|
||||||
|
|
||||||
|
# Check if step result is satisfactory
|
||||||
|
min_correctness = 0.5
|
||||||
|
if critic_score.correctness < min_correctness:
|
||||||
|
# Step failed critic check — try to recover
|
||||||
|
if critic_retries_used < self._critic_retry_limit and step.kind != "respond":
|
||||||
|
critic_retries_used += 1
|
||||||
|
self._publish(task, CRITIC_RESULT, {
|
||||||
|
"step_id": step.id,
|
||||||
|
"score": critic_score.model_dump(mode="json"),
|
||||||
|
"action": "retry",
|
||||||
|
"retry": critic_retries_used,
|
||||||
|
})
|
||||||
|
# Retry the same step — rebuild directive
|
||||||
|
retry_directive = ExecutionDirective(
|
||||||
|
type=step.kind,
|
||||||
|
payload={"tool": step.tool, "args": step.args},
|
||||||
|
requires_permission=step.requires_confirmation,
|
||||||
|
reason=step.description,
|
||||||
|
)
|
||||||
|
retry_result = self._execute_tool(
|
||||||
|
task=task,
|
||||||
|
directive=retry_directive,
|
||||||
|
permission_override=permission_override,
|
||||||
|
secret_override=secret_override,
|
||||||
|
password_override=password_override,
|
||||||
|
)
|
||||||
|
if retry_result.get("status") == "completed":
|
||||||
|
result = retry_result
|
||||||
|
step_results[-1]["result"] = result
|
||||||
|
# Re-evaluate after retry
|
||||||
|
critic_score2 = self._evaluate_with_critic(task, step, result)
|
||||||
|
if critic_score2 and critic_score2.correctness >= min_correctness:
|
||||||
|
# Retry succeeded
|
||||||
|
continue
|
||||||
|
# If retry also failed, continue to next step
|
||||||
|
else:
|
||||||
|
self._publish(task, CRITIC_RESULT, {
|
||||||
|
"step_id": step.id,
|
||||||
|
"score": critic_score.model_dump(mode="json"),
|
||||||
|
"action": "give_up",
|
||||||
|
"reason": f"Critic retry limit ({self._critic_retry_limit}) reached",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Handle failed step
|
||||||
if result.get("status") == "failed":
|
if result.get("status") == "failed":
|
||||||
|
review = self._build_failed_step_review(task, step, result)
|
||||||
|
if review:
|
||||||
|
return {
|
||||||
|
"status": "awaiting_review",
|
||||||
|
"result": {
|
||||||
|
"error": f"Step {step.id} requires review before replanning",
|
||||||
|
"failed_step": step.id,
|
||||||
|
"step_results": step_results,
|
||||||
|
"review": review,
|
||||||
|
},
|
||||||
|
}
|
||||||
recovery = self._recover_failed_step(
|
recovery = self._recover_failed_step(
|
||||||
task=task,
|
task=task,
|
||||||
step=step,
|
step=step,
|
||||||
|
|
@ -266,16 +349,6 @@ class ExecutionEngine:
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
requires_execution = directive.payload.get("requires_execution", True)
|
|
||||||
if requires_execution and self._critic:
|
|
||||||
critic_result = self._evaluate_with_critic(
|
|
||||||
task, step, result
|
|
||||||
)
|
|
||||||
if critic_result:
|
|
||||||
# Convert to dict for JSON serialization
|
|
||||||
result["critic_score"] = critic_result.model_dump(mode="json") if hasattr(critic_result, 'model_dump') else dict(critic_result)
|
|
||||||
self._save_critique_to_memory(task, step, critic_result)
|
|
||||||
|
|
||||||
ready_steps = self._get_ready_steps(graph, completed_steps)
|
ready_steps = self._get_ready_steps(graph, completed_steps)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
@ -286,6 +359,31 @@ class ExecutionEngine:
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _build_failed_step_review(self, task: UserTask, step, result: dict[str, Any]) -> dict[str, Any] | None:
|
||||||
|
if step.tool != "shell_exec" or not self._command_analyzer:
|
||||||
|
return None
|
||||||
|
command = str((step.args or {}).get("command", ""))
|
||||||
|
if not command:
|
||||||
|
return None
|
||||||
|
diagnosis = self._command_analyzer.analyze(
|
||||||
|
command=command,
|
||||||
|
task_id=task.task_id,
|
||||||
|
session_id=task.session_id,
|
||||||
|
)
|
||||||
|
if diagnosis.get("type") == "ok":
|
||||||
|
return None
|
||||||
|
return {
|
||||||
|
"step_id": step.id,
|
||||||
|
"tool": step.tool,
|
||||||
|
"command": command,
|
||||||
|
"diagnosis": diagnosis,
|
||||||
|
"critic_assessment": {
|
||||||
|
"classification": "model_planning_error",
|
||||||
|
"needs_replan": True,
|
||||||
|
"explanation": "Structured command analysis found a model action error before recovery.",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
def _recover_failed_step(
|
def _recover_failed_step(
|
||||||
self,
|
self,
|
||||||
task: UserTask,
|
task: UserTask,
|
||||||
|
|
@ -496,11 +594,23 @@ Previous step results:
|
||||||
step,
|
step,
|
||||||
score: CriticScore,
|
score: CriticScore,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Save critic evaluation as critique entry in memory."""
|
"""Save critic evaluation as critique entry in memory, using MemoryWritePolicy."""
|
||||||
if not self._memory_interface:
|
if not self._memory_interface:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# Check with policy before saving
|
||||||
|
if self._memory_policy:
|
||||||
|
decision = self._memory_policy.decide(
|
||||||
|
critic_score=score,
|
||||||
|
memory_type="critique",
|
||||||
|
session_id=task.session_id,
|
||||||
|
)
|
||||||
|
if decision == "skip":
|
||||||
|
logger.info(f"MemoryWritePolicy skipped critique for {step.tool}")
|
||||||
|
return
|
||||||
|
# For "store_with_weight", we could adjust weight, but critic score already has weight
|
||||||
|
|
||||||
tool_name = step.tool
|
tool_name = step.tool
|
||||||
tool_args = step.args or {}
|
tool_args = step.args or {}
|
||||||
args_str = ", ".join([f"{k}={v}" for k, v in tool_args.items()])
|
args_str = ", ".join([f"{k}={v}" for k, v in tool_args.items()])
|
||||||
|
|
@ -537,6 +647,26 @@ Previous step results:
|
||||||
base_prompt = self._prompts.get("critic", "")
|
base_prompt = self._prompts.get("critic", "")
|
||||||
tool_result = result.get("result", {})
|
tool_result = result.get("result", {})
|
||||||
|
|
||||||
|
# Truncate long outputs to avoid exceeding context window
|
||||||
|
# Keep output under ~2000 chars to leave room for prompt + generation
|
||||||
|
output = tool_result.get("output", "")
|
||||||
|
if isinstance(output, str) and len(output) > 2000:
|
||||||
|
output = output[:2000] + "\n... [truncated]"
|
||||||
|
elif not isinstance(output, str):
|
||||||
|
output_str = json.dumps(output, ensure_ascii=False)
|
||||||
|
if len(output_str) > 2000:
|
||||||
|
output = output_str[:2000] + "\n... [truncated]"
|
||||||
|
else:
|
||||||
|
output = output_str
|
||||||
|
|
||||||
|
# Build a compact result representation
|
||||||
|
compact_result = {
|
||||||
|
"ok": tool_result.get("ok"),
|
||||||
|
"output": output,
|
||||||
|
"error": tool_result.get("error"),
|
||||||
|
"exit_code": tool_result.get("metadata", {}).get("exit_code"),
|
||||||
|
}
|
||||||
|
|
||||||
return f"""{base_prompt}
|
return f"""{base_prompt}
|
||||||
|
|
||||||
Step: {step.description}
|
Step: {step.description}
|
||||||
|
|
@ -544,7 +674,7 @@ Tool: {step.tool}
|
||||||
Args: {step.args}
|
Args: {step.args}
|
||||||
|
|
||||||
Result:
|
Result:
|
||||||
{json.dumps(tool_result, indent=2)}
|
{json.dumps(compact_result, indent=2, ensure_ascii=False)}
|
||||||
|
|
||||||
Evaluate and respond with JSON:
|
Evaluate and respond with JSON:
|
||||||
{{"correctness": 0.0-1.0, "usefulness": 0.0-1.0, "safety": 0.0-1.0, "memory_store": true|false, "weight": 0.0-1.0, "explanation": "..."}}"""
|
{{"correctness": 0.0-1.0, "usefulness": 0.0-1.0, "safety": 0.0-1.0, "memory_store": true|false, "weight": 0.0-1.0, "explanation": "..."}}"""
|
||||||
|
|
@ -619,8 +749,15 @@ Evaluate and respond with JSON:
|
||||||
|
|
||||||
permission_result = None
|
permission_result = None
|
||||||
|
|
||||||
|
# If permission_override is provided, skip permission check
|
||||||
|
if permission_override is not None:
|
||||||
|
permission_result = {
|
||||||
|
"decision": permission_override.decision,
|
||||||
|
"command": tool_args.get("command", ""),
|
||||||
|
"cached": True,
|
||||||
|
}
|
||||||
# Check permission for shell_exec and file_write
|
# Check permission for shell_exec and file_write
|
||||||
if tool_name == "shell_exec":
|
elif tool_name == "shell_exec":
|
||||||
permission_result = self._permission_service.check_shell_command(
|
permission_result = self._permission_service.check_shell_command(
|
||||||
task_id=task.task_id,
|
task_id=task.task_id,
|
||||||
session_id=task.session_id,
|
session_id=task.session_id,
|
||||||
|
|
@ -693,7 +830,13 @@ Evaluate and respond with JSON:
|
||||||
|
|
||||||
if tool_name == "shell_exec":
|
if tool_name == "shell_exec":
|
||||||
command = str(tool_args.get("command", ""))
|
command = str(tool_args.get("command", ""))
|
||||||
if command.startswith("sudo ") and secret_override is None:
|
|
||||||
|
# Determine if sudo password is needed:
|
||||||
|
# 1. Command explicitly starts with "sudo"
|
||||||
|
# 2. Command is a known sudo-requiring command (apt, systemctl, etc.) — flagged by permission service
|
||||||
|
needs_password = command.startswith("sudo ") or (permission_result is not None and permission_result.get("requires_sudo", False))
|
||||||
|
|
||||||
|
if needs_password and secret_override is None:
|
||||||
secret_request = SecretRequest(
|
secret_request = SecretRequest(
|
||||||
task_id=task.task_id,
|
task_id=task.task_id,
|
||||||
session_id=task.session_id,
|
session_id=task.session_id,
|
||||||
|
|
@ -709,8 +852,12 @@ Evaluate and respond with JSON:
|
||||||
"secret_request": secret_request.model_dump(mode="json"),
|
"secret_request": secret_request.model_dump(mode="json"),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
if command.startswith("sudo ") and secret_override is not None:
|
if needs_password and secret_override is not None:
|
||||||
tool_args["command"] = f"sudo -S -p '' {command[len('sudo '):]}"
|
# Inject sudo -S for explicit sudo commands, or prepend sudo -S for implicit ones
|
||||||
|
if command.startswith("sudo "):
|
||||||
|
tool_args["command"] = f"sudo -S -p '' {command[len('sudo '):]}"
|
||||||
|
else:
|
||||||
|
tool_args["command"] = f"sudo -S -p '' {command}"
|
||||||
tool_args["stdin_secret"] = f"{secret_override}\n"
|
tool_args["stdin_secret"] = f"{secret_override}\n"
|
||||||
|
|
||||||
tool_call = ToolCall(
|
tool_call = ToolCall(
|
||||||
|
|
@ -720,10 +867,43 @@ Evaluate and respond with JSON:
|
||||||
step_id="step-1",
|
step_id="step-1",
|
||||||
)
|
)
|
||||||
self._publish(task, TOOL_CALLED, tool_call.model_dump(mode="json"))
|
self._publish(task, TOOL_CALLED, tool_call.model_dump(mode="json"))
|
||||||
|
if tool_name == "shell_exec":
|
||||||
|
tool_args["__output_callback"] = lambda stream, chunk: self._publish(
|
||||||
|
task,
|
||||||
|
TOOL_OUTPUT_CHUNK,
|
||||||
|
{
|
||||||
|
"tool": tool_name,
|
||||||
|
"step_id": "step-1",
|
||||||
|
"stream": stream,
|
||||||
|
"chunk": chunk,
|
||||||
|
},
|
||||||
|
)
|
||||||
tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args)
|
tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args)
|
||||||
self._publish(task, TOOL_COMPLETED, tool_result.model_dump(mode="json"))
|
self._publish(task, TOOL_COMPLETED, tool_result.model_dump(mode="json"))
|
||||||
|
|
||||||
needs_sudo = tool_result.metadata.get("needs_sudo", False) if tool_result.metadata else False
|
metadata = tool_result.metadata or {}
|
||||||
|
needs_sudo = metadata.get("needs_sudo", False)
|
||||||
|
sudo_auth_failed = metadata.get("sudo_auth_failed", False) or self._looks_like_sudo_auth_failure(tool_result)
|
||||||
|
|
||||||
|
if tool_name == "shell_exec" and not tool_result.ok and sudo_auth_failed:
|
||||||
|
original_command = str(directive.payload.get("args", {}).get("command", tool_args.get("command", "")))
|
||||||
|
secret_request = SecretRequest(
|
||||||
|
task_id=task.task_id,
|
||||||
|
session_id=task.session_id,
|
||||||
|
kind="sudo_password",
|
||||||
|
prompt="Sudo password incorrect. Try again",
|
||||||
|
command=original_command,
|
||||||
|
)
|
||||||
|
self._publish(task, SECRET_REQUESTED, secret_request.model_dump(mode="json"))
|
||||||
|
return {
|
||||||
|
"status": "awaiting_input",
|
||||||
|
"result": {
|
||||||
|
"error": "Sudo password failed",
|
||||||
|
"secret_request": secret_request.model_dump(mode="json"),
|
||||||
|
"attempt_failed": True,
|
||||||
|
"tool_result": tool_result.model_dump(mode="json"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
if not tool_result.ok and needs_sudo:
|
if not tool_result.ok and needs_sudo:
|
||||||
return {
|
return {
|
||||||
|
|
@ -737,11 +917,51 @@ Evaluate and respond with JSON:
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if tool_name == "shell_exec" and not tool_result.ok and self._command_analyzer:
|
||||||
|
original_command = str(directive.payload.get("args", {}).get("command", tool_args.get("command", "")))
|
||||||
|
diagnosis = self._command_analyzer.analyze(
|
||||||
|
command=original_command,
|
||||||
|
task_id=task.task_id,
|
||||||
|
session_id=task.session_id,
|
||||||
|
)
|
||||||
|
if diagnosis.get("type") != "ok":
|
||||||
|
return {
|
||||||
|
"status": "awaiting_review",
|
||||||
|
"result": {
|
||||||
|
"error": "Tool action requires review before replanning",
|
||||||
|
"review": {
|
||||||
|
"step_id": "step-1",
|
||||||
|
"tool": tool_name,
|
||||||
|
"command": original_command,
|
||||||
|
"diagnosis": diagnosis,
|
||||||
|
"critic_assessment": {
|
||||||
|
"classification": "model_planning_error",
|
||||||
|
"needs_replan": True,
|
||||||
|
"explanation": "Structured command analysis found a model action error before recovery.",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"tool_result": tool_result.model_dump(mode="json"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": "completed" if tool_result.ok else "failed",
|
"status": "completed" if tool_result.ok else "failed",
|
||||||
"result": tool_result.model_dump(mode="json"),
|
"result": tool_result.model_dump(mode="json"),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _looks_like_sudo_auth_failure(self, tool_result: ToolResult) -> bool:
|
||||||
|
output = f"{tool_result.output or ''}\n{tool_result.error or ''}".lower()
|
||||||
|
return any(
|
||||||
|
marker in output
|
||||||
|
for marker in (
|
||||||
|
"incorrect password",
|
||||||
|
"incorrect password attempt",
|
||||||
|
"sudo: no password was provided",
|
||||||
|
"sorry, try again",
|
||||||
|
"authentication failure",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
def _publish(self, task: UserTask, event_type: str, payload: dict[str, Any]) -> None:
|
def _publish(self, task: UserTask, event_type: str, payload: dict[str, Any]) -> None:
|
||||||
if not self._event_bus:
|
if not self._event_bus:
|
||||||
return
|
return
|
||||||
|
|
|
||||||
|
|
@ -16,3 +16,9 @@ class SecretResolutionRequest(BaseModel):
|
||||||
class PasswordResolutionRequest(BaseModel):
|
class PasswordResolutionRequest(BaseModel):
|
||||||
task_id: str
|
task_id: str
|
||||||
password: str
|
password: str
|
||||||
|
|
||||||
|
|
||||||
|
class ReviewResolutionRequest(BaseModel):
|
||||||
|
task_id: str
|
||||||
|
decision: str
|
||||||
|
correction: str | None = None
|
||||||
|
|
|
||||||
|
|
@ -76,6 +76,7 @@ class PermissionService:
|
||||||
"decision": "allowed_always",
|
"decision": "allowed_always",
|
||||||
"command": normalized,
|
"command": normalized,
|
||||||
"cached": True,
|
"cached": True,
|
||||||
|
"requires_sudo": _requires_sudo(normalized),
|
||||||
}
|
}
|
||||||
|
|
||||||
if command_hash in cache.get("allowed_once", {}):
|
if command_hash in cache.get("allowed_once", {}):
|
||||||
|
|
@ -85,6 +86,7 @@ class PermissionService:
|
||||||
"decision": "allowed_once",
|
"decision": "allowed_once",
|
||||||
"command": normalized,
|
"command": normalized,
|
||||||
"cached": True,
|
"cached": True,
|
||||||
|
"requires_sudo": _requires_sudo(normalized),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check hard stop
|
# Check hard stop
|
||||||
|
|
@ -117,15 +119,20 @@ class PermissionService:
|
||||||
category = self._get_category(normalized)
|
category = self._get_category(normalized)
|
||||||
can_always = self._categories.get(category, {}).get("allow_always", True)
|
can_always = self._categories.get(category, {}).get("allow_always", True)
|
||||||
|
|
||||||
|
# Check if command requires sudo (e.g. apt, systemctl without explicit sudo prefix)
|
||||||
|
requires_sudo = _requires_sudo(normalized)
|
||||||
|
|
||||||
# Need user confirmation
|
# Need user confirmation
|
||||||
return {
|
result = {
|
||||||
"decision": "prompt",
|
"decision": "prompt",
|
||||||
"command": normalized,
|
"command": normalized,
|
||||||
"category": category,
|
"category": category,
|
||||||
"allow_always": can_always,
|
"allow_always": can_always,
|
||||||
|
"requires_sudo": requires_sudo,
|
||||||
"task_id": task_id,
|
"task_id": task_id,
|
||||||
"session_id": session_id,
|
"session_id": session_id,
|
||||||
}
|
}
|
||||||
|
return result
|
||||||
|
|
||||||
def check_write_path(
|
def check_write_path(
|
||||||
self,
|
self,
|
||||||
|
|
@ -243,28 +250,50 @@ class PermissionService:
|
||||||
"""Check if command is hard stop."""
|
"""Check if command is hard stop."""
|
||||||
hard_stop_commands = self._categories.get("hard_stop", {}).get("commands", [])
|
hard_stop_commands = self._categories.get("hard_stop", {}).get("commands", [])
|
||||||
|
|
||||||
cmd_lower = command.lower()
|
cmd_lower = command.lower().strip()
|
||||||
|
cmd_tokens = cmd_lower.split()
|
||||||
|
|
||||||
for hs in hard_stop_commands:
|
for hs in hard_stop_commands:
|
||||||
if hs.lower() in cmd_lower:
|
hs_lower = hs.lower().strip()
|
||||||
|
# For "rm -rf /" and "rm -rf /*", only match exact command
|
||||||
|
# Don't match "rm -rf /tmp/nonexistent" as hard stop
|
||||||
|
if hs_lower in ("rm -rf /", "rm -rf /*"):
|
||||||
|
if cmd_lower == hs_lower:
|
||||||
|
return True
|
||||||
|
continue
|
||||||
|
# For other patterns, use substring match
|
||||||
|
if hs_lower in cmd_lower:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _get_category(self, command: str) -> str:
|
def _get_category(self, command: str) -> str:
|
||||||
"""Get command category."""
|
"""Get command category."""
|
||||||
cmd_lower = command.lower()
|
cmd_lower = command.lower().strip()
|
||||||
|
cmd_first_word = cmd_lower.split()[0] if cmd_lower.split() else ""
|
||||||
|
|
||||||
# Check no_always category
|
# Check no_always category — match by first word or known multi-word prefixes
|
||||||
no_always = self._categories.get("no_always", {}).get("commands", [])
|
no_always = self._categories.get("no_always", {}).get("commands", [])
|
||||||
for cmd in no_always:
|
for pattern in no_always:
|
||||||
if cmd in cmd_lower:
|
pat_lower = pattern.lower().strip()
|
||||||
|
# Match if first word matches (e.g. "apt" matches "apt list --upgradable")
|
||||||
|
# or if command starts with the pattern (e.g. "systemctl stop" matches "systemctl stop nginx")
|
||||||
|
if cmd_first_word == pat_lower or cmd_lower.startswith(pat_lower + " "):
|
||||||
return "no_always"
|
return "no_always"
|
||||||
|
|
||||||
|
# Check hard_stop by first word
|
||||||
|
hard_stop = self._categories.get("hard_stop", {}).get("commands", [])
|
||||||
|
for pattern in hard_stop:
|
||||||
|
pat_lower = pattern.lower().strip()
|
||||||
|
if cmd_first_word == pat_lower or cmd_lower.startswith(pat_lower + " "):
|
||||||
|
return "hard_stop"
|
||||||
|
|
||||||
# Default to normal
|
# Default to normal
|
||||||
return "normal"
|
return "normal"
|
||||||
|
|
||||||
|
|
||||||
SUDO_COMMANDS = {
|
SUDO_COMMANDS = {
|
||||||
|
"sudo",
|
||||||
"apt", "apt-get", "dpkg", "yum", "dnf", "pacman", "zypper",
|
"apt", "apt-get", "dpkg", "yum", "dnf", "pacman", "zypper",
|
||||||
"systemctl", "service", "mount", "umount",
|
"systemctl", "service", "mount", "umount",
|
||||||
"shutdown", "reboot", "halt", "poweroff",
|
"shutdown", "reboot", "halt", "poweroff",
|
||||||
|
|
|
||||||
|
|
@ -2,12 +2,15 @@ TASK_RECEIVED = "task_received"
|
||||||
CONTEXT_BUILT = "context_built"
|
CONTEXT_BUILT = "context_built"
|
||||||
STEP_STARTED = "step_started"
|
STEP_STARTED = "step_started"
|
||||||
TOOL_CALLED = "tool_called"
|
TOOL_CALLED = "tool_called"
|
||||||
|
TOOL_OUTPUT_CHUNK = "tool_output_chunk"
|
||||||
TOOL_COMPLETED = "tool_completed"
|
TOOL_COMPLETED = "tool_completed"
|
||||||
PERMISSION_REQUESTED = "permission_requested"
|
PERMISSION_REQUESTED = "permission_requested"
|
||||||
PERMISSION_RESOLVED = "permission_resolved"
|
PERMISSION_RESOLVED = "permission_resolved"
|
||||||
TASK_AWAITING_PERMISSION = "task_awaiting_permission"
|
TASK_AWAITING_PERMISSION = "task_awaiting_permission"
|
||||||
SECRET_REQUESTED = "secret_requested"
|
SECRET_REQUESTED = "secret_requested"
|
||||||
TASK_AWAITING_INPUT = "task_awaiting_input"
|
TASK_AWAITING_INPUT = "task_awaiting_input"
|
||||||
|
TASK_AWAITING_REVIEW = "task_awaiting_review"
|
||||||
|
REVIEW_RESOLVED = "review_resolved"
|
||||||
CHECKPOINT_SAVED = "checkpoint_saved"
|
CHECKPOINT_SAVED = "checkpoint_saved"
|
||||||
TASK_COMPLETED = "task_completed"
|
TASK_COMPLETED = "task_completed"
|
||||||
TASK_FAILED = "task_failed"
|
TASK_FAILED = "task_failed"
|
||||||
|
|
@ -29,3 +32,4 @@ THINKER_CALLED = "thinker_called"
|
||||||
THINKER_RESULT = "thinker_result"
|
THINKER_RESULT = "thinker_result"
|
||||||
JSON_COMPILER_CALLED = "json_compiler_called"
|
JSON_COMPILER_CALLED = "json_compiler_called"
|
||||||
JSON_COMPILER_RESULT = "json_compiler_result"
|
JSON_COMPILER_RESULT = "json_compiler_result"
|
||||||
|
MEMORY_RECALL_USED = "memory_recall_used"
|
||||||
|
|
|
||||||
|
|
@ -101,14 +101,24 @@ class MemoryInterface:
|
||||||
def count(self) -> int:
|
def count(self) -> int:
|
||||||
return self._store.count()
|
return self._store.count()
|
||||||
|
|
||||||
def reindex(self) -> None:
|
def reindex(self) -> int:
|
||||||
|
"""Rebuild vector index from all entries in memory store.
|
||||||
|
Returns number of indexed entries."""
|
||||||
entries = self._store.get_all(limit=10000)
|
entries = self._store.get_all(limit=10000)
|
||||||
self._vector_index.save()
|
# Delete old index file and re-initialize from scratch
|
||||||
|
import os
|
||||||
|
if self._vector_index._index_path and self._vector_index._index_path.exists():
|
||||||
|
self._vector_index._index_path.unlink()
|
||||||
|
self._vector_index._index = None
|
||||||
|
self._vector_index._init_index()
|
||||||
|
count = 0
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
text = entry.text
|
text = entry.text
|
||||||
embedding = self._embeddings.encode(text)
|
embedding = self._embeddings.encode(text)
|
||||||
self._vector_index.insert(entry.id, embedding)
|
self._vector_index.insert(entry.id, embedding)
|
||||||
|
count += 1
|
||||||
self._vector_index.save()
|
self._vector_index.save()
|
||||||
|
return count
|
||||||
|
|
||||||
def close(self) -> None:
|
def close(self) -> None:
|
||||||
self._store.close()
|
self._store.close()
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,205 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from app.core.contracts import MemoryEntry
|
||||||
|
from app.memory.interface import MemoryInterface
|
||||||
|
from app.models.async_adapters import AsyncOrchestratorAdapter
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
RECALL_PROMPT_TEMPLATE = """Определи, нужно ли искать в долговременной памяти для ответа на этот запрос.
|
||||||
|
|
||||||
|
Запрос: "{task_input}"
|
||||||
|
|
||||||
|
ИСКАТЬ в памяти если запрос:
|
||||||
|
- Содержит вопрос о пользователе (имя, предпочтения, история)
|
||||||
|
- Содержит отсылки к прошлым разговорам или действиям
|
||||||
|
- Содержит местоимения без контекста ("он", "это", "тот файл")
|
||||||
|
- Просит вспомнить, повторить, рассказать о прошлом
|
||||||
|
- Спрашивает "что ты помнишь", "как меня зовут", "что я говорил"
|
||||||
|
|
||||||
|
НЕ ИСКАТЬ если:
|
||||||
|
- Приветствие или прощание
|
||||||
|
- Простая команда (ls, pwd, echo)
|
||||||
|
- Общий вопрос не связанный с прошлым
|
||||||
|
|
||||||
|
Ответь ТОЛЬКО JSON:
|
||||||
|
{{"should_recall": true, "search_query": "поисковый запрос"}}
|
||||||
|
или
|
||||||
|
{{"should_recall": false, "reason": "краткая причина"}}"""
|
||||||
|
|
||||||
|
|
||||||
|
class MemoryRecallService:
|
||||||
|
"""Активное воспоминание: система сама решает, что и когда искать в памяти."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
memory_interface: MemoryInterface | None,
|
||||||
|
recall_model: AsyncOrchestratorAdapter | None,
|
||||||
|
) -> None:
|
||||||
|
self._memory = memory_interface
|
||||||
|
self._model = recall_model
|
||||||
|
|
||||||
|
async def recall(
|
||||||
|
self,
|
||||||
|
task_input: str,
|
||||||
|
top_k: int = 5,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Определяет необходимость воспоминания и выполняет поиск.
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
{
|
||||||
|
"should_recall": bool,
|
||||||
|
"reason": str,
|
||||||
|
"query": str,
|
||||||
|
"results": list[MemoryEntry],
|
||||||
|
"summary": str, # краткая сводка для оркестратора
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
if not self._memory or not self._model:
|
||||||
|
with open("/tmp/recall_debug.log", "a") as f:
|
||||||
|
f.write(f"SKIP: memory={self._memory is not None}, model={self._model is not None}\n")
|
||||||
|
return self._empty_result("memory_or_model_unavailable")
|
||||||
|
|
||||||
|
# 1. LLM решает, нужно ли искать
|
||||||
|
decision = await self._classify(task_input)
|
||||||
|
with open("/tmp/recall_debug.log", "a") as f:
|
||||||
|
f.write(f"DECISION type={type(decision)} value={decision}\n")
|
||||||
|
if not isinstance(decision, dict):
|
||||||
|
return self._empty_result("invalid_decision_type")
|
||||||
|
if not decision.get("should_recall"):
|
||||||
|
return self._empty_result(decision.get("reason", "not_needed"))
|
||||||
|
|
||||||
|
search_query = decision.get("search_query", task_input)
|
||||||
|
logger.info(f"Memory recall: query='{search_query}', reason='{decision.get('reason')}'")
|
||||||
|
|
||||||
|
# 2. Векторный поиск
|
||||||
|
try:
|
||||||
|
raw_results = self._memory.search(query=search_query, top_k=top_k)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Memory search failed: {e}")
|
||||||
|
return self._empty_result("search_failed")
|
||||||
|
|
||||||
|
# 3. Фильтрация: убираем пустые и слишком нерелевантные
|
||||||
|
filtered = self._filter(raw_results)
|
||||||
|
|
||||||
|
if not filtered:
|
||||||
|
return self._empty_result("no_relevant_results")
|
||||||
|
|
||||||
|
# 4. Сводка для оркестратора
|
||||||
|
summary = self._summarize(filtered, search_query)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"should_recall": True,
|
||||||
|
"reason": decision.get("reason", ""),
|
||||||
|
"query": search_query,
|
||||||
|
"results": filtered,
|
||||||
|
"summary": summary,
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _classify(self, task_input: str) -> dict[str, Any]:
|
||||||
|
"""LLM-классификация: нужно ли искать в памяти."""
|
||||||
|
prompt = RECALL_PROMPT_TEMPLATE.format(task_input=task_input)
|
||||||
|
|
||||||
|
try:
|
||||||
|
raw = await self._model.generate(prompt, max_tokens=512)
|
||||||
|
data = self._parse_json(raw)
|
||||||
|
if "should_recall" in data:
|
||||||
|
return data
|
||||||
|
logger.warning(f"Recall classification missing 'should_recall': {raw[:200]}")
|
||||||
|
return {"should_recall": False, "reason": "parse_error"}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Recall classification failed: {e}")
|
||||||
|
return {"should_recall": False, "reason": "classification_error"}
|
||||||
|
|
||||||
|
def _filter(
|
||||||
|
self,
|
||||||
|
results: list[tuple[MemoryEntry, float]],
|
||||||
|
min_score: float = 0.3,
|
||||||
|
) -> list[MemoryEntry]:
|
||||||
|
"""Фильтрует результаты по score и убирает дубликаты."""
|
||||||
|
seen_texts: set[str] = set()
|
||||||
|
filtered: list[MemoryEntry] = []
|
||||||
|
|
||||||
|
for entry, score in results:
|
||||||
|
if score < min_score:
|
||||||
|
continue
|
||||||
|
# Нормализуем текст для дедупликации
|
||||||
|
normalized = entry.text.strip().lower()[:100]
|
||||||
|
if normalized in seen_texts:
|
||||||
|
continue
|
||||||
|
seen_texts.add(normalized)
|
||||||
|
filtered.append(entry)
|
||||||
|
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
def _summarize(
|
||||||
|
self,
|
||||||
|
results: list[MemoryEntry],
|
||||||
|
query: str,
|
||||||
|
) -> str:
|
||||||
|
"""Краткая сводка найденного для оркестратора."""
|
||||||
|
parts = [f"По запросу '{query}' найдено {len(results)} записей:"]
|
||||||
|
for i, entry in enumerate(results[:5], 1):
|
||||||
|
text_preview = entry.text[:120].replace("\n", " ")
|
||||||
|
parts.append(f" {i}. [{entry.kind}] {text_preview}")
|
||||||
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
def _parse_json(self, raw: str) -> dict[str, Any]:
|
||||||
|
"""Извлекает JSON из ответа модели, пропуская рассуждения перед ним."""
|
||||||
|
try:
|
||||||
|
json_start = raw.find("{")
|
||||||
|
json_end = raw.rfind("}") + 1
|
||||||
|
|
||||||
|
if json_start < 0 or json_end <= 0:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Пробуем весь текст от первого { до последнего }
|
||||||
|
try:
|
||||||
|
data = json.loads(raw[json_start:json_end])
|
||||||
|
if isinstance(data, dict):
|
||||||
|
return data
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Ищем все возможные начала JSON
|
||||||
|
candidates = []
|
||||||
|
pos = 0
|
||||||
|
while True:
|
||||||
|
pos = raw.find("{", pos)
|
||||||
|
if pos < 0:
|
||||||
|
break
|
||||||
|
candidates.append(pos)
|
||||||
|
pos += 1
|
||||||
|
|
||||||
|
# Пробуем каждый candidate с конца
|
||||||
|
for start in reversed(candidates):
|
||||||
|
end = raw.rfind("}") + 1
|
||||||
|
if end <= start:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
data = json.loads(raw[start:end])
|
||||||
|
if isinstance(data, dict):
|
||||||
|
return data
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
return {}
|
||||||
|
except Exception as e:
|
||||||
|
with open("/tmp/recall_debug.log", "a") as f:
|
||||||
|
f.write(f"PARSE ERROR: {e}\n")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _empty_result(reason: str) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"should_recall": False,
|
||||||
|
"reason": reason,
|
||||||
|
"query": "",
|
||||||
|
"results": [],
|
||||||
|
"summary": "",
|
||||||
|
}
|
||||||
|
|
@ -1,11 +1,13 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
from concurrent.futures import Future, ThreadPoolExecutor
|
||||||
from threading import RLock
|
from threading import RLock
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from app.core.config import AppConfig, load_app_config
|
from app.core.config import AppConfig, load_app_config
|
||||||
from app.core.context_builder import ContextBuilder
|
from app.core.context_builder import ContextBuilder
|
||||||
|
from app.core.command_analyzer import CommandAnalyzer
|
||||||
from app.core.contracts import UserTask
|
from app.core.contracts import UserTask
|
||||||
from app.core.execution_engine import ExecutionEngine
|
from app.core.execution_engine import ExecutionEngine
|
||||||
from app.core.execution_scheduler import ExecutionScheduler
|
from app.core.execution_scheduler import ExecutionScheduler
|
||||||
|
|
@ -13,6 +15,7 @@ from app.core.async_router import AsyncRouter
|
||||||
from app.events.event_bus import EventBus
|
from app.events.event_bus import EventBus
|
||||||
from app.events.event_store import SQLiteEventStore
|
from app.events.event_store import SQLiteEventStore
|
||||||
from app.memory import MemoryInterface, MemoryStore, VectorIndex
|
from app.memory import MemoryInterface, MemoryStore, VectorIndex
|
||||||
|
from app.memory.recall import MemoryRecallService
|
||||||
from app.memory.write_policy import MemoryWritePolicy
|
from app.memory.write_policy import MemoryWritePolicy
|
||||||
from app.models import (
|
from app.models import (
|
||||||
CoderAdapter,
|
CoderAdapter,
|
||||||
|
|
@ -64,6 +67,8 @@ class RuntimeController:
|
||||||
self._model_cache: dict[tuple[object, ...], tuple[object, RLock]] = {}
|
self._model_cache: dict[tuple[object, ...], tuple[object, RLock]] = {}
|
||||||
self._memory_interface: MemoryInterface | None = None
|
self._memory_interface: MemoryInterface | None = None
|
||||||
self._memory_policy: MemoryWritePolicy | None = None
|
self._memory_policy: MemoryWritePolicy | None = None
|
||||||
|
self._background_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ducklm-task")
|
||||||
|
self._background_tasks: dict[str, Future[dict[str, object]]] = {}
|
||||||
self.tool_registry = None
|
self.tool_registry = None
|
||||||
self.tool_sandbox = None
|
self.tool_sandbox = None
|
||||||
|
|
||||||
|
|
@ -75,6 +80,8 @@ class RuntimeController:
|
||||||
self.tool_sandbox = ToolSandbox(
|
self.tool_sandbox = ToolSandbox(
|
||||||
allowed_root=self.base_dir,
|
allowed_root=self.base_dir,
|
||||||
timeout_ms=runtime_config.step_timeout_ms,
|
timeout_ms=runtime_config.step_timeout_ms,
|
||||||
|
command_timeout_ms=runtime_config.shell_command_timeout_ms,
|
||||||
|
idle_timeout_ms=runtime_config.shell_idle_timeout_ms,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.tool_registry = self._create_tool_registry()
|
self.tool_registry = self._create_tool_registry()
|
||||||
|
|
@ -121,6 +128,7 @@ class RuntimeController:
|
||||||
self.permission_service = PermissionService(
|
self.permission_service = PermissionService(
|
||||||
config=self._load_permissions_config(),
|
config=self._load_permissions_config(),
|
||||||
)
|
)
|
||||||
|
self.command_analyzer = CommandAnalyzer(self.permission_service)
|
||||||
|
|
||||||
self.execution_engine = ExecutionEngine(
|
self.execution_engine = ExecutionEngine(
|
||||||
event_bus=self.event_bus,
|
event_bus=self.event_bus,
|
||||||
|
|
@ -134,6 +142,8 @@ class RuntimeController:
|
||||||
memory_interface=self._memory_interface,
|
memory_interface=self._memory_interface,
|
||||||
prompts=self._prompts,
|
prompts=self._prompts,
|
||||||
recovery_limit=runtime_config.tool_retry_limit,
|
recovery_limit=runtime_config.tool_retry_limit,
|
||||||
|
critic_retry_limit=runtime_config.critic_retry_limit,
|
||||||
|
command_analyzer=self.command_analyzer,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.runtime_loop = RuntimeLoop(
|
self.runtime_loop = RuntimeLoop(
|
||||||
|
|
@ -194,35 +204,35 @@ class RuntimeController:
|
||||||
if thinker_config.get("path"):
|
if thinker_config.get("path"):
|
||||||
llm, lock = self._get_or_create_llm("thinker", thinker_config)
|
llm, lock = self._get_or_create_llm("thinker", thinker_config)
|
||||||
self._thinker = OrchestratorAdapter(llm, system_prompt=self._prompts.get("thinker"), lock=lock)
|
self._thinker = OrchestratorAdapter(llm, system_prompt=self._prompts.get("thinker"), lock=lock)
|
||||||
print(f"Thinker loaded: {self._thinker} (model: {thinker_config.get("path")})")
|
print(f"Thinker loaded: {self._thinker} (model: {thinker_config.get('path')})")
|
||||||
|
|
||||||
print("Loading json_compiler model...")
|
print("Loading json_compiler model...")
|
||||||
compiler_config = self.config.models.json_compiler or {}
|
compiler_config = self.config.models.json_compiler or {}
|
||||||
if compiler_config.get("path"):
|
if compiler_config.get("path"):
|
||||||
llm, lock = self._get_or_create_llm("json_compiler", compiler_config)
|
llm, lock = self._get_or_create_llm("json_compiler", compiler_config)
|
||||||
self._json_compiler = OrchestratorAdapter(llm, system_prompt=self._prompts.get("json_compiler"), lock=lock)
|
self._json_compiler = OrchestratorAdapter(llm, system_prompt=self._prompts.get("json_compiler"), lock=lock)
|
||||||
print(f"JSON Compiler loaded: {self._json_compiler} (model: {compiler_config.get("path")})")
|
print(f"JSON Compiler loaded: {self._json_compiler} (model: {compiler_config.get('path')})")
|
||||||
|
|
||||||
print("Loading coder model...")
|
print("Loading coder model...")
|
||||||
coder_config = self.config.models.coder or {}
|
coder_config = self.config.models.coder or {}
|
||||||
if coder_config.get("path"):
|
if coder_config.get("path"):
|
||||||
llm, lock = self._get_or_create_llm("coder", coder_config)
|
llm, lock = self._get_or_create_llm("coder", coder_config)
|
||||||
self._coder = CoderAdapter(llm, system_prompt=self._prompts.get("coder"), lock=lock)
|
self._coder = CoderAdapter(llm, system_prompt=self._prompts.get("coder"), lock=lock)
|
||||||
print(f"Coder loaded: {self._coder} (model: {coder_config.get("path")})")
|
print(f"Coder loaded: {self._coder} (model: {coder_config.get('path')})")
|
||||||
|
|
||||||
print("Loading critic model...")
|
print("Loading critic model...")
|
||||||
critic_config = self.config.models.critic or {}
|
critic_config = self.config.models.critic or {}
|
||||||
if critic_config.get("path"):
|
if critic_config.get("path"):
|
||||||
llm, lock = self._get_or_create_llm("critic", critic_config)
|
llm, lock = self._get_or_create_llm("critic", critic_config)
|
||||||
self._critic = CriticAdapter(llm, system_prompt=self._prompts.get("critic"), lock=lock)
|
self._critic = CriticAdapter(llm, system_prompt=self._prompts.get("critic"), lock=lock)
|
||||||
print(f"Critic loaded: {self._critic} (model: {critic_config.get("path")})")
|
print(f"Critic loaded: {self._critic} (model: {critic_config.get('path')})")
|
||||||
|
|
||||||
print("Loading sys_util model...")
|
print("Loading sys_util model...")
|
||||||
sys_util_config = self.config.models.sys_util or {}
|
sys_util_config = self.config.models.sys_util or {}
|
||||||
if sys_util_config.get("path"):
|
if sys_util_config.get("path"):
|
||||||
llm, lock = self._get_or_create_llm("sys_util", sys_util_config)
|
llm, lock = self._get_or_create_llm("sys_util", sys_util_config)
|
||||||
self._sys_util = OrchestratorAdapter(llm, system_prompt=self._prompts.get("sys_util"), lock=lock)
|
self._sys_util = OrchestratorAdapter(llm, system_prompt=self._prompts.get("sys_util"), lock=lock)
|
||||||
print(f"Sys_util loaded: {self._sys_util} (model: {sys_util_config.get("path")})")
|
print(f"Sys_util loaded: {self._sys_util} (model: {sys_util_config.get('path')})")
|
||||||
|
|
||||||
print("All models loaded successfully")
|
print("All models loaded successfully")
|
||||||
|
|
||||||
|
|
@ -241,6 +251,28 @@ class RuntimeController:
|
||||||
if async_coder:
|
if async_coder:
|
||||||
self.execution_engine.set_coder(async_coder)
|
self.execution_engine.set_coder(async_coder)
|
||||||
|
|
||||||
|
# Create MemoryRecallService using the configured model (default: sys_util)
|
||||||
|
# Reuses already-loaded async adapter — no duplicate model loading
|
||||||
|
recall_model_name = self.config.runtime.recall_model
|
||||||
|
recall_async_model = {
|
||||||
|
"sys_util": async_sys_util,
|
||||||
|
"thinker": async_thinker,
|
||||||
|
"json_compiler": async_compiler,
|
||||||
|
"critic": async_critic,
|
||||||
|
"coder": async_coder,
|
||||||
|
}.get(recall_model_name, async_sys_util)
|
||||||
|
|
||||||
|
self._recall_service = MemoryRecallService(
|
||||||
|
memory_interface=self._memory_interface,
|
||||||
|
recall_model=recall_async_model,
|
||||||
|
)
|
||||||
|
self.runtime_loop.set_recall_service(self._recall_service)
|
||||||
|
print(f"MemoryRecallService initialized with model: {recall_model_name}")
|
||||||
|
|
||||||
|
# Set memory policy in runtime loop
|
||||||
|
self.runtime_loop.set_memory_policy(self._memory_policy)
|
||||||
|
print(f"MemoryWritePolicy set: {self._memory_policy is not None}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Failed to load models at startup: {e}")
|
print(f"Failed to load models at startup: {e}")
|
||||||
raise RuntimeError(f"Model loading failed: {e}") from e
|
raise RuntimeError(f"Model loading failed: {e}") from e
|
||||||
|
|
@ -375,21 +407,76 @@ class RuntimeController:
|
||||||
def handle_task(self, task: UserTask) -> dict[str, object]:
|
def handle_task(self, task: UserTask) -> dict[str, object]:
|
||||||
return self.runtime_loop.run_task(task)
|
return self.runtime_loop.run_task(task)
|
||||||
|
|
||||||
|
def submit_task(self, task: UserTask) -> dict[str, object]:
|
||||||
|
self._background_tasks[task.task_id] = self._background_executor.submit(
|
||||||
|
self.handle_task,
|
||||||
|
task,
|
||||||
|
)
|
||||||
|
return {"task_id": task.task_id, "status": "accepted"}
|
||||||
|
|
||||||
def resolve_permission(self, task_id: str, decision: str) -> dict[str, object]:
|
def resolve_permission(self, task_id: str, decision: str) -> dict[str, object]:
|
||||||
return self.runtime_loop.resolve_permission(
|
return self.runtime_loop.resolve_permission(
|
||||||
task_id=task_id, decision=decision
|
task_id=task_id, decision=decision
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def submit_permission_resolution(self, task_id: str, decision: str) -> dict[str, object]:
|
||||||
|
if not self.task_state_store.get_task(task_id):
|
||||||
|
return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
|
||||||
|
self._background_tasks[task_id] = self._background_executor.submit(
|
||||||
|
self.resolve_permission,
|
||||||
|
task_id,
|
||||||
|
decision,
|
||||||
|
)
|
||||||
|
return {"task_id": task_id, "status": "accepted"}
|
||||||
|
|
||||||
def resolve_secret(self, task_id: str, secret: str) -> dict[str, object]:
|
def resolve_secret(self, task_id: str, secret: str) -> dict[str, object]:
|
||||||
return self.runtime_loop.resolve_secret(
|
return self.runtime_loop.resolve_secret(
|
||||||
task_id=task_id, secret=secret
|
task_id=task_id, secret=secret
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def submit_secret_resolution(self, task_id: str, secret: str) -> dict[str, object]:
|
||||||
|
if not self.task_state_store.get_task(task_id):
|
||||||
|
return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
|
||||||
|
self._background_tasks[task_id] = self._background_executor.submit(
|
||||||
|
self.resolve_secret,
|
||||||
|
task_id,
|
||||||
|
secret,
|
||||||
|
)
|
||||||
|
return {"task_id": task_id, "status": "accepted"}
|
||||||
|
|
||||||
def resolve_password(self, task_id: str, password: str) -> dict[str, object]:
|
def resolve_password(self, task_id: str, password: str) -> dict[str, object]:
|
||||||
return self.runtime_loop.resolve_password(
|
return self.runtime_loop.resolve_password(
|
||||||
task_id=task_id, password=password
|
task_id=task_id, password=password
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def resolve_review(self, task_id: str, decision: str, correction: str | None = None) -> dict[str, object]:
|
||||||
|
return self.runtime_loop.resolve_review(
|
||||||
|
task_id=task_id,
|
||||||
|
decision=decision,
|
||||||
|
correction=correction,
|
||||||
|
)
|
||||||
|
|
||||||
|
def submit_review_resolution(self, task_id: str, decision: str, correction: str | None = None) -> dict[str, object]:
|
||||||
|
if not self.task_state_store.get_task(task_id):
|
||||||
|
return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
|
||||||
|
self._background_tasks[task_id] = self._background_executor.submit(
|
||||||
|
self.resolve_review,
|
||||||
|
task_id,
|
||||||
|
decision,
|
||||||
|
correction,
|
||||||
|
)
|
||||||
|
return {"task_id": task_id, "status": "accepted"}
|
||||||
|
|
||||||
|
def submit_password_resolution(self, task_id: str, password: str) -> dict[str, object]:
|
||||||
|
if not self.task_state_store.get_task(task_id):
|
||||||
|
return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
|
||||||
|
self._background_tasks[task_id] = self._background_executor.submit(
|
||||||
|
self.resolve_password,
|
||||||
|
task_id,
|
||||||
|
password,
|
||||||
|
)
|
||||||
|
return {"task_id": task_id, "status": "accepted"}
|
||||||
|
|
||||||
def handle_critic_feedback(
|
def handle_critic_feedback(
|
||||||
self,
|
self,
|
||||||
feedback: str,
|
feedback: str,
|
||||||
|
|
|
||||||
|
|
@ -3,16 +3,46 @@ from __future__ import annotations
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
from app.core.context_builder import ContextBuilder
|
from app.core.context_builder import ContextBuilder
|
||||||
from app.core.contracts import ExecutionDirective, PermissionDecision, PermissionRequest, RuntimeEvent, SecretRequest, TaskCheckpoint, UserTask
|
from app.core.contracts import CriticScore, ExecutionDirective, PermissionDecision, PermissionRequest, RuntimeEvent, SecretRequest, TaskCheckpoint, UserTask
|
||||||
from app.core.execution_engine import ExecutionEngine
|
from app.core.execution_engine import ExecutionEngine
|
||||||
from app.core.async_router import AsyncRouter
|
from app.core.async_router import AsyncRouter
|
||||||
from app.events.event_bus import EventBus
|
from app.events.event_bus import EventBus
|
||||||
from app.events.event_types import CHECKPOINT_SAVED, CONTEXT_BUILT, TASK_AWAITING_INPUT, TASK_AWAITING_PERMISSION, TASK_COMPLETED, TASK_FAILED, TASK_RECEIVED
|
from app.events.event_types import CHECKPOINT_SAVED, CONTEXT_BUILT, MEMORY_RECALL_USED, MEMORY_WRITE_DECIDED, REVIEW_RESOLVED, TASK_AWAITING_INPUT, TASK_AWAITING_PERMISSION, TASK_AWAITING_REVIEW, TASK_COMPLETED, TASK_FAILED, TASK_RECEIVED
|
||||||
from app.core.permission_service import PermissionService
|
from app.core.permission_service import PermissionService
|
||||||
|
from app.memory.recall import MemoryRecallService
|
||||||
|
from app.memory.write_policy import MemoryWritePolicy
|
||||||
from app.state.checkpoint_store import SQLiteCheckpointStore
|
from app.state.checkpoint_store import SQLiteCheckpointStore
|
||||||
from app.state.task_state_store import SQLiteTaskStateStore
|
from app.state.task_state_store import SQLiteTaskStateStore
|
||||||
|
|
||||||
|
|
||||||
|
def _build_response_directive(execution_result: dict) -> dict | None:
|
||||||
|
"""Build a response_directive from step_results or direct output for the client."""
|
||||||
|
result = execution_result.get("result", {})
|
||||||
|
|
||||||
|
# Case 1: step_results from plan execution
|
||||||
|
step_results = result.get("step_results")
|
||||||
|
if step_results:
|
||||||
|
response_parts = []
|
||||||
|
for step in step_results:
|
||||||
|
result_data = step.get("result", {})
|
||||||
|
tool_result = result_data.get("result", result_data)
|
||||||
|
if tool_result.get("ok") and tool_result.get("output"):
|
||||||
|
response_parts.append(str(tool_result["output"]))
|
||||||
|
if response_parts:
|
||||||
|
response_text = "\n\n".join(response_parts)
|
||||||
|
return ExecutionDirective(
|
||||||
|
type="respond", payload={"text": response_text}
|
||||||
|
).model_dump(mode="json")
|
||||||
|
|
||||||
|
# Case 2: direct tool output (e.g. from resolve_secret -> execute_tool)
|
||||||
|
if result.get("ok") and result.get("output"):
|
||||||
|
return ExecutionDirective(
|
||||||
|
type="respond", payload={"text": str(result["output"])}
|
||||||
|
).model_dump(mode="json")
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class RuntimeLoop:
|
class RuntimeLoop:
|
||||||
"""Central control loop skeleton coordinating task state and events."""
|
"""Central control loop skeleton coordinating task state and events."""
|
||||||
|
|
||||||
|
|
@ -26,6 +56,8 @@ class RuntimeLoop:
|
||||||
execution_engine: ExecutionEngine,
|
execution_engine: ExecutionEngine,
|
||||||
permission_service: PermissionService,
|
permission_service: PermissionService,
|
||||||
memory_interface=None,
|
memory_interface=None,
|
||||||
|
recall_service: MemoryRecallService | None = None,
|
||||||
|
memory_policy: MemoryWritePolicy | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._event_bus = event_bus
|
self._event_bus = event_bus
|
||||||
self._task_state_store = task_state_store
|
self._task_state_store = task_state_store
|
||||||
|
|
@ -35,6 +67,14 @@ class RuntimeLoop:
|
||||||
self._execution_engine = execution_engine
|
self._execution_engine = execution_engine
|
||||||
self._permission_service = permission_service
|
self._permission_service = permission_service
|
||||||
self._memory_interface = memory_interface
|
self._memory_interface = memory_interface
|
||||||
|
self._recall_service = recall_service
|
||||||
|
self._memory_policy = memory_policy
|
||||||
|
|
||||||
|
def set_recall_service(self, recall_service: MemoryRecallService) -> None:
|
||||||
|
self._recall_service = recall_service
|
||||||
|
|
||||||
|
def set_memory_policy(self, policy: MemoryWritePolicy | None) -> None:
|
||||||
|
self._memory_policy = policy
|
||||||
|
|
||||||
def run_task(self, task: UserTask) -> dict[str, object]:
|
def run_task(self, task: UserTask) -> dict[str, object]:
|
||||||
# Check input for hard-stop commands BEFORE processing
|
# Check input for hard-stop commands BEFORE processing
|
||||||
|
|
@ -82,6 +122,23 @@ class RuntimeLoop:
|
||||||
context = self._context_builder.build(task=task, checkpoint=checkpoint)
|
context = self._context_builder.build(task=task, checkpoint=checkpoint)
|
||||||
self._publish(task, CONTEXT_BUILT, {"keys": sorted(context.keys())})
|
self._publish(task, CONTEXT_BUILT, {"keys": sorted(context.keys())})
|
||||||
|
|
||||||
|
# Active memory recall: system decides if it needs to search memory
|
||||||
|
recall_result = asyncio.run(self._run_recall(task))
|
||||||
|
if recall_result["should_recall"]:
|
||||||
|
context["memory_recall"] = {
|
||||||
|
"query": recall_result["query"],
|
||||||
|
"summary": recall_result["summary"],
|
||||||
|
"entries": [
|
||||||
|
{"text": e.text, "kind": e.kind, "weight": e.weight}
|
||||||
|
for e in recall_result["results"]
|
||||||
|
],
|
||||||
|
}
|
||||||
|
self._publish(task, MEMORY_RECALL_USED, {
|
||||||
|
"query": recall_result["query"],
|
||||||
|
"results_count": len(recall_result["results"]),
|
||||||
|
"reason": recall_result["reason"],
|
||||||
|
})
|
||||||
|
|
||||||
directive = asyncio.run(
|
directive = asyncio.run(
|
||||||
self._router.decide(state=state, context=context, task_id=task.task_id, session_id=task.session_id)
|
self._router.decide(state=state, context=context, task_id=task.task_id, session_id=task.session_id)
|
||||||
)
|
)
|
||||||
|
|
@ -104,15 +161,21 @@ class RuntimeLoop:
|
||||||
"reason": "Permission denied - требуется sudo пароль",
|
"reason": "Permission denied - требуется sudo пароль",
|
||||||
"attempts": 0,
|
"attempts": 0,
|
||||||
}
|
}
|
||||||
|
elif execution_result["status"] == "awaiting_review":
|
||||||
|
state_patch["pending_permission_request"] = None
|
||||||
|
state_patch["pending_secret_request"] = None
|
||||||
|
state_patch["resolved_permission_decision"] = None
|
||||||
|
state_patch["pending_review"] = execution_result["result"]["review"]
|
||||||
else:
|
else:
|
||||||
state_patch["pending_permission_request"] = None
|
state_patch["pending_permission_request"] = None
|
||||||
state_patch["pending_secret_request"] = None
|
state_patch["pending_secret_request"] = None
|
||||||
state_patch["resolved_permission_decision"] = None
|
state_patch["resolved_permission_decision"] = None
|
||||||
|
state_patch["pending_review"] = None
|
||||||
self._task_state_store.update_task(task.task_id, state_patch)
|
self._task_state_store.update_task(task.task_id, state_patch)
|
||||||
final_status = str(execution_result["status"])
|
final_status = str(execution_result["status"])
|
||||||
|
|
||||||
# For awaiting states - do NOT mark task as completed, keep it in pending state
|
# For awaiting states - do NOT mark task as completed, keep it in pending state
|
||||||
if final_status in ("awaiting_permission", "awaiting_input", "awaiting_password"):
|
if final_status in ("awaiting_permission", "awaiting_input", "awaiting_password", "awaiting_review"):
|
||||||
# Task stays in pending state, don't update to completed
|
# Task stays in pending state, don't update to completed
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
|
@ -125,9 +188,9 @@ class RuntimeLoop:
|
||||||
)
|
)
|
||||||
self._checkpoint_store.save(final_checkpoint)
|
self._checkpoint_store.save(final_checkpoint)
|
||||||
|
|
||||||
# Generate response after plan execution
|
# Generate response for user
|
||||||
|
# Case 1: step_results from plan execution
|
||||||
if final_status == "completed" and execution_result.get("result", {}).get("step_results"):
|
if final_status == "completed" and execution_result.get("result", {}).get("step_results"):
|
||||||
# Format tool results into response
|
|
||||||
step_results = execution_result["result"]["step_results"]
|
step_results = execution_result["result"]["step_results"]
|
||||||
response_parts = []
|
response_parts = []
|
||||||
for step in step_results:
|
for step in step_results:
|
||||||
|
|
@ -135,16 +198,21 @@ class RuntimeLoop:
|
||||||
tool_result = result_data.get("result", result_data)
|
tool_result = result_data.get("result", result_data)
|
||||||
if tool_result.get("ok") and tool_result.get("output"):
|
if tool_result.get("ok") and tool_result.get("output"):
|
||||||
response_parts.append(tool_result["output"])
|
response_parts.append(tool_result["output"])
|
||||||
|
|
||||||
if response_parts:
|
if response_parts:
|
||||||
# Create respond directive
|
|
||||||
response_text = "\n\n".join(response_parts)
|
response_text = "\n\n".join(response_parts)
|
||||||
respond_directive = ExecutionDirective(
|
execution_result["response_directive"] = ExecutionDirective(
|
||||||
type="respond",
|
type="respond", payload={"text": response_text}
|
||||||
payload={"text": response_text},
|
).model_dump(mode="json")
|
||||||
)
|
|
||||||
# Add to execution result
|
# Case 2: respond directive from orchestrator (direct response, no steps)
|
||||||
execution_result["response_directive"] = respond_directive.model_dump(mode="json")
|
if final_status == "completed" and not execution_result.get("response_directive"):
|
||||||
|
# Use the original directive from router.decide()
|
||||||
|
if hasattr(directive, "type") and directive.type == "respond":
|
||||||
|
if directive.payload.get("text"):
|
||||||
|
execution_result["response_directive"] = directive.model_dump(mode="json")
|
||||||
|
elif isinstance(directive, dict) and directive.get("type") == "respond":
|
||||||
|
if directive.get("payload", {}).get("text"):
|
||||||
|
execution_result["response_directive"] = directive
|
||||||
|
|
||||||
# Map status to terminal event type
|
# Map status to terminal event type
|
||||||
if final_status == "completed":
|
if final_status == "completed":
|
||||||
|
|
@ -155,6 +223,8 @@ class RuntimeLoop:
|
||||||
terminal_event_type = TASK_AWAITING_PERMISSION
|
terminal_event_type = TASK_AWAITING_PERMISSION
|
||||||
elif final_status == "awaiting_input":
|
elif final_status == "awaiting_input":
|
||||||
terminal_event_type = TASK_AWAITING_INPUT
|
terminal_event_type = TASK_AWAITING_INPUT
|
||||||
|
elif final_status == "awaiting_review":
|
||||||
|
terminal_event_type = TASK_AWAITING_REVIEW
|
||||||
elif final_status == "awaiting_password":
|
elif final_status == "awaiting_password":
|
||||||
terminal_event_type = TASK_AWAITING_PERMISSION
|
terminal_event_type = TASK_AWAITING_PERMISSION
|
||||||
else:
|
else:
|
||||||
|
|
@ -175,7 +245,10 @@ class RuntimeLoop:
|
||||||
"task_id": task.task_id,
|
"task_id": task.task_id,
|
||||||
"status": final_status,
|
"status": final_status,
|
||||||
"directive": directive.model_dump(mode="json"),
|
"directive": directive.model_dump(mode="json"),
|
||||||
"result": execution_result["result"],
|
"result": {
|
||||||
|
**execution_result["result"],
|
||||||
|
"response_directive": execution_result.get("response_directive"),
|
||||||
|
},
|
||||||
"events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
|
"events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -254,6 +327,9 @@ class RuntimeLoop:
|
||||||
"pending_secret_request": execution_result["result"].get("secret_request")
|
"pending_secret_request": execution_result["result"].get("secret_request")
|
||||||
if final_status == "awaiting_input"
|
if final_status == "awaiting_input"
|
||||||
else None,
|
else None,
|
||||||
|
"pending_review": execution_result["result"].get("review")
|
||||||
|
if final_status == "awaiting_review"
|
||||||
|
else None,
|
||||||
"resolved_permission_decision": resolved,
|
"resolved_permission_decision": resolved,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
@ -266,6 +342,8 @@ class RuntimeLoop:
|
||||||
terminal_event_type = TASK_AWAITING_INPUT
|
terminal_event_type = TASK_AWAITING_INPUT
|
||||||
elif final_status == "awaiting_permission":
|
elif final_status == "awaiting_permission":
|
||||||
terminal_event_type = TASK_AWAITING_PERMISSION
|
terminal_event_type = TASK_AWAITING_PERMISSION
|
||||||
|
elif final_status == "awaiting_review":
|
||||||
|
terminal_event_type = TASK_AWAITING_REVIEW
|
||||||
else:
|
else:
|
||||||
terminal_event_type = TASK_FAILED
|
terminal_event_type = TASK_FAILED
|
||||||
self._publish(
|
self._publish(
|
||||||
|
|
@ -283,7 +361,10 @@ class RuntimeLoop:
|
||||||
return {
|
return {
|
||||||
"task_id": task.task_id,
|
"task_id": task.task_id,
|
||||||
"status": final_status,
|
"status": final_status,
|
||||||
"result": execution_result["result"],
|
"result": {
|
||||||
|
**execution_result["result"],
|
||||||
|
"response_directive": _build_response_directive(execution_result),
|
||||||
|
},
|
||||||
"events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
|
"events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -314,12 +395,15 @@ class RuntimeLoop:
|
||||||
secret_override=secret,
|
secret_override=secret,
|
||||||
)
|
)
|
||||||
final_status = str(execution_result["status"])
|
final_status = str(execution_result["status"])
|
||||||
|
pending_review = execution_result["result"].get("review") if final_status == "awaiting_review" else None
|
||||||
|
pending_secret = execution_result["result"].get("secret_request") if final_status == "awaiting_input" else None
|
||||||
self._task_state_store.update_task(
|
self._task_state_store.update_task(
|
||||||
task.task_id,
|
task.task_id,
|
||||||
{
|
{
|
||||||
"status": final_status,
|
"status": final_status,
|
||||||
"pending_secret_request": None,
|
"pending_secret_request": pending_secret,
|
||||||
"resolved_permission_decision": None,
|
"resolved_permission_decision": resolved_permission_payload if final_status == "awaiting_input" else None,
|
||||||
|
"pending_review": pending_review,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
checkpoint = TaskCheckpoint(task_id=task.task_id, status=final_status)
|
checkpoint = TaskCheckpoint(task_id=task.task_id, status=final_status)
|
||||||
|
|
@ -331,6 +415,8 @@ class RuntimeLoop:
|
||||||
terminal_event_type = TASK_AWAITING_INPUT
|
terminal_event_type = TASK_AWAITING_INPUT
|
||||||
elif final_status == "awaiting_permission":
|
elif final_status == "awaiting_permission":
|
||||||
terminal_event_type = TASK_AWAITING_PERMISSION
|
terminal_event_type = TASK_AWAITING_PERMISSION
|
||||||
|
elif final_status == "awaiting_review":
|
||||||
|
terminal_event_type = TASK_AWAITING_REVIEW
|
||||||
else:
|
else:
|
||||||
terminal_event_type = TASK_FAILED
|
terminal_event_type = TASK_FAILED
|
||||||
self._publish(
|
self._publish(
|
||||||
|
|
@ -344,10 +430,55 @@ class RuntimeLoop:
|
||||||
return {
|
return {
|
||||||
"task_id": task.task_id,
|
"task_id": task.task_id,
|
||||||
"status": final_status,
|
"status": final_status,
|
||||||
"result": execution_result["result"],
|
"result": {
|
||||||
|
**execution_result["result"],
|
||||||
|
"response_directive": _build_response_directive(execution_result),
|
||||||
|
},
|
||||||
"events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
|
"events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def resolve_review(self, task_id: str, decision: str, correction: str | None = None) -> dict[str, object]:
|
||||||
|
state = self._task_state_store.get_task(task_id)
|
||||||
|
if not state:
|
||||||
|
return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
|
||||||
|
pending_review = state.get("pending_review")
|
||||||
|
if not pending_review:
|
||||||
|
return {"task_id": task_id, "status": "failed", "result": {"error": "No pending review"}}
|
||||||
|
|
||||||
|
task = UserTask(
|
||||||
|
task_id=task_id,
|
||||||
|
session_id=state["session_id"],
|
||||||
|
input=state["task_input"],
|
||||||
|
context={
|
||||||
|
**state.get("task_context", {}),
|
||||||
|
"previous_action_review": {
|
||||||
|
"decision": decision,
|
||||||
|
"correction": correction,
|
||||||
|
"review": pending_review,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
self._publish(task, REVIEW_RESOLVED, {
|
||||||
|
"decision": decision,
|
||||||
|
"correction": correction,
|
||||||
|
"review": pending_review,
|
||||||
|
})
|
||||||
|
if self._memory_interface:
|
||||||
|
try:
|
||||||
|
self._memory_interface.insert(
|
||||||
|
text=f"User reviewed model action as {decision}. Correction: {correction or ''}. Review: {pending_review}",
|
||||||
|
kind="critique",
|
||||||
|
source="user",
|
||||||
|
task_id=task_id,
|
||||||
|
session_id=state["session_id"],
|
||||||
|
weight=0.9 if decision == "wrong_action" else 0.5,
|
||||||
|
metadata={"decision": decision, "review": pending_review},
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self._task_state_store.update_task(task_id, {"pending_review": None, "status": "replanning"})
|
||||||
|
return self.run_task(task)
|
||||||
|
|
||||||
def resolve_password(self, task_id: str, password: str) -> dict[str, object]:
|
def resolve_password(self, task_id: str, password: str) -> dict[str, object]:
|
||||||
state = self._task_state_store.get_task(task_id)
|
state = self._task_state_store.get_task(task_id)
|
||||||
if not state:
|
if not state:
|
||||||
|
|
@ -445,7 +576,10 @@ class RuntimeLoop:
|
||||||
return {
|
return {
|
||||||
"task_id": task.task_id,
|
"task_id": task.task_id,
|
||||||
"status": final_status,
|
"status": final_status,
|
||||||
"result": execution_result["result"],
|
"result": {
|
||||||
|
**execution_result["result"],
|
||||||
|
"response_directive": _build_response_directive(execution_result),
|
||||||
|
},
|
||||||
"events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
|
"events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -459,22 +593,61 @@ class RuntimeLoop:
|
||||||
)
|
)
|
||||||
self._event_bus.publish(event)
|
self._event_bus.publish(event)
|
||||||
|
|
||||||
|
async def _run_recall(self, task: UserTask) -> dict:
|
||||||
|
"""Run active memory recall before orchestration."""
|
||||||
|
if not self._recall_service:
|
||||||
|
return {"should_recall": False, "reason": "no_recall_service", "query": "", "results": [], "summary": ""}
|
||||||
|
try:
|
||||||
|
return await self._recall_service.recall(task_input=task.input)
|
||||||
|
except Exception as e:
|
||||||
|
return {"should_recall": False, "reason": f"recall_error: {e}", "query": "", "results": [], "summary": ""}
|
||||||
|
|
||||||
def _save_to_memory(self, task: UserTask, execution_result: dict, status: str) -> None:
|
def _save_to_memory(self, task: UserTask, execution_result: dict, status: str) -> None:
|
||||||
"""Save task input and result to memory for session context."""
|
"""Save task input and result to memory for session context, using MemoryWritePolicy."""
|
||||||
if not self._memory_interface:
|
if not self._memory_interface:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# Build a synthetic critic_score for policy based on task status
|
||||||
|
# For summary/tool_result without real critic, we derive from execution outcome
|
||||||
|
if status == "completed":
|
||||||
|
synthetic_score = CriticScore(
|
||||||
|
correctness=0.9, usefulness=0.8, safety=0.95,
|
||||||
|
memory_store=True, weight=0.85, explanation="Task completed successfully"
|
||||||
|
)
|
||||||
|
elif status == "failed":
|
||||||
|
synthetic_score = CriticScore(
|
||||||
|
correctness=0.2, usefulness=0.3, safety=0.7,
|
||||||
|
memory_store=True, weight=0.5, explanation="Task failed — store for learning"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
synthetic_score = CriticScore(
|
||||||
|
correctness=0.5, usefulness=0.5, safety=0.8,
|
||||||
|
memory_store=False, weight=0.3, explanation=f"Status: {status}"
|
||||||
|
)
|
||||||
|
|
||||||
# Save task input as summary
|
# Save task input as summary
|
||||||
self._memory_interface.insert(
|
decision = "store"
|
||||||
text=f"User request: {task.input}",
|
if self._memory_policy:
|
||||||
kind="summary",
|
decision = self._memory_policy.decide(
|
||||||
source="user",
|
critic_score=synthetic_score,
|
||||||
task_id=task.task_id,
|
memory_type="summary",
|
||||||
session_id=task.session_id,
|
session_id=task.session_id,
|
||||||
weight=0.8,
|
)
|
||||||
metadata={"status": status},
|
if decision in ("store", "store_with_weight"):
|
||||||
)
|
weight = synthetic_score.weight if decision == "store_with_weight" else 0.8
|
||||||
|
self._memory_interface.insert(
|
||||||
|
text=f"User request: {task.input}",
|
||||||
|
kind="summary",
|
||||||
|
source="user",
|
||||||
|
task_id=task.task_id,
|
||||||
|
session_id=task.session_id,
|
||||||
|
weight=weight,
|
||||||
|
metadata={"status": status, "policy_decision": decision},
|
||||||
|
)
|
||||||
|
self._publish(task, MEMORY_WRITE_DECIDED, {
|
||||||
|
"kind": "summary", "decision": decision, "text_preview": task.input[:80]
|
||||||
|
})
|
||||||
|
|
||||||
# Save execution result
|
# Save execution result
|
||||||
result_text = ""
|
result_text = ""
|
||||||
|
|
@ -489,16 +662,27 @@ class RuntimeLoop:
|
||||||
result_text = f" | Error: {execution_result.get('result', {}).get('error', 'Unknown')}"
|
result_text = f" | Error: {execution_result.get('result', {}).get('error', 'Unknown')}"
|
||||||
|
|
||||||
if result_text:
|
if result_text:
|
||||||
self._memory_interface.insert(
|
decision = "store"
|
||||||
text=f"Result: {status}{result_text}",
|
if self._memory_policy:
|
||||||
kind="tool_result",
|
decision = self._memory_policy.decide(
|
||||||
source="system",
|
critic_score=synthetic_score,
|
||||||
task_id=task.task_id,
|
memory_type="tool_result",
|
||||||
session_id=task.session_id,
|
session_id=task.session_id,
|
||||||
weight=0.7,
|
)
|
||||||
metadata={"status": status},
|
if decision in ("store", "store_with_weight"):
|
||||||
)
|
weight = synthetic_score.weight if decision == "store_with_weight" else 0.7
|
||||||
|
self._memory_interface.insert(
|
||||||
|
text=f"Result: {status}{result_text}",
|
||||||
|
kind="tool_result",
|
||||||
|
source="system",
|
||||||
|
task_id=task.task_id,
|
||||||
|
session_id=task.session_id,
|
||||||
|
weight=weight,
|
||||||
|
metadata={"status": status, "policy_decision": decision},
|
||||||
|
)
|
||||||
|
self._publish(task, MEMORY_WRITE_DECIDED, {
|
||||||
|
"kind": "tool_result", "decision": decision, "text_preview": result_text[:80]
|
||||||
|
})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Log but don't fail the task
|
|
||||||
import logging
|
import logging
|
||||||
logging.getLogger(__name__).warning(f"Failed to save to memory: {e}")
|
logging.getLogger(__name__).warning(f"Failed to save to memory: {e}")
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from app.core.contracts import RuntimeEvent
|
from app.core.contracts import RuntimeEvent
|
||||||
from app.events.event_bus import EventBus
|
from app.events.event_bus import EventBus
|
||||||
|
|
@ -12,7 +13,7 @@ class StreamingManager:
|
||||||
|
|
||||||
def __init__(self, event_bus: EventBus) -> None:
|
def __init__(self, event_bus: EventBus) -> None:
|
||||||
self._event_bus = event_bus
|
self._event_bus = event_bus
|
||||||
self._subscribers: dict[str, list[asyncio.Queue[RuntimeEvent]]] = defaultdict(list)
|
self._subscribers: dict[str, list[StreamSubscriber]] = defaultdict(list)
|
||||||
self._event_bus.subscribe(self._on_event)
|
self._event_bus.subscribe(self._on_event)
|
||||||
|
|
||||||
def replay_events(self, task_id: str) -> list[RuntimeEvent]:
|
def replay_events(self, task_id: str) -> list[RuntimeEvent]:
|
||||||
|
|
@ -20,17 +21,26 @@ class StreamingManager:
|
||||||
|
|
||||||
def subscribe(self, task_id: str) -> asyncio.Queue[RuntimeEvent]:
|
def subscribe(self, task_id: str) -> asyncio.Queue[RuntimeEvent]:
|
||||||
queue: asyncio.Queue[RuntimeEvent] = asyncio.Queue()
|
queue: asyncio.Queue[RuntimeEvent] = asyncio.Queue()
|
||||||
self._subscribers[task_id].append(queue)
|
self._subscribers[task_id].append(
|
||||||
|
StreamSubscriber(loop=asyncio.get_running_loop(), queue=queue)
|
||||||
|
)
|
||||||
return queue
|
return queue
|
||||||
|
|
||||||
def unsubscribe(self, task_id: str, queue: asyncio.Queue[RuntimeEvent]) -> None:
|
def unsubscribe(self, task_id: str, queue: asyncio.Queue[RuntimeEvent]) -> None:
|
||||||
listeners = self._subscribers.get(task_id, [])
|
listeners = self._subscribers.get(task_id, [])
|
||||||
if queue in listeners:
|
for listener in list(listeners):
|
||||||
listeners.remove(queue)
|
if listener.queue is queue:
|
||||||
|
listeners.remove(listener)
|
||||||
|
break
|
||||||
if not listeners and task_id in self._subscribers:
|
if not listeners and task_id in self._subscribers:
|
||||||
del self._subscribers[task_id]
|
del self._subscribers[task_id]
|
||||||
|
|
||||||
def _on_event(self, event: RuntimeEvent) -> None:
|
def _on_event(self, event: RuntimeEvent) -> None:
|
||||||
for queue in self._subscribers.get(event.task_id, []):
|
for listener in list(self._subscribers.get(event.task_id, [])):
|
||||||
queue.put_nowait(event)
|
listener.loop.call_soon_threadsafe(listener.queue.put_nowait, event)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class StreamSubscriber:
|
||||||
|
loop: asyncio.AbstractEventLoop
|
||||||
|
queue: asyncio.Queue[RuntimeEvent]
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,20 @@ from app.tools.base import BaseTool
|
||||||
from app.tools.sandbox import ToolSandbox
|
from app.tools.sandbox import ToolSandbox
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_sudo_auth_failure(output: str) -> bool:
|
||||||
|
normalized = output.lower()
|
||||||
|
return any(
|
||||||
|
marker in normalized
|
||||||
|
for marker in (
|
||||||
|
"incorrect password",
|
||||||
|
"incorrect password attempt",
|
||||||
|
"sudo: no password was provided",
|
||||||
|
"sorry, try again",
|
||||||
|
"authentication failure",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Tool(BaseTool):
|
class Tool(BaseTool):
|
||||||
name = "shell_exec"
|
name = "shell_exec"
|
||||||
description = "Execute shell commands"
|
description = "Execute shell commands"
|
||||||
|
|
@ -18,16 +32,24 @@ class Tool(BaseTool):
|
||||||
return ToolResult(tool=self.name, ok=False, error="Missing command", metadata={"exit_code": -1})
|
return ToolResult(tool=self.name, ok=False, error="Missing command", metadata={"exit_code": -1})
|
||||||
cwd = args.get("cwd")
|
cwd = args.get("cwd")
|
||||||
stdin_secret = args.get("stdin_secret")
|
stdin_secret = args.get("stdin_secret")
|
||||||
|
output_callback = args.get("__output_callback")
|
||||||
completed = self._sandbox.run_shell(
|
completed = self._sandbox.run_shell(
|
||||||
command=command,
|
command=command,
|
||||||
cwd=str(cwd) if cwd else None,
|
cwd=str(cwd) if cwd else None,
|
||||||
stdin_data=str(stdin_secret) if stdin_secret is not None else None,
|
stdin_data=str(stdin_secret) if stdin_secret is not None else None,
|
||||||
|
output_callback=output_callback if callable(output_callback) else None,
|
||||||
)
|
)
|
||||||
output = completed.stdout if completed.returncode == 0 else completed.stderr or completed.stdout
|
output = completed.stdout if completed.returncode == 0 else completed.stderr or completed.stdout
|
||||||
|
sudo_auth_failed = completed.returncode != 0 and _detect_sudo_auth_failure(
|
||||||
|
f"{completed.stdout}\n{completed.stderr}"
|
||||||
|
)
|
||||||
return ToolResult(
|
return ToolResult(
|
||||||
tool=self.name,
|
tool=self.name,
|
||||||
ok=completed.returncode == 0,
|
ok=completed.returncode == 0,
|
||||||
output=output,
|
output=output,
|
||||||
error=None if completed.returncode == 0 else f"Command failed with exit code {completed.returncode}",
|
error=None if completed.returncode == 0 else f"Command failed with exit code {completed.returncode}",
|
||||||
metadata={"exit_code": completed.returncode},
|
metadata={
|
||||||
|
"exit_code": completed.returncode,
|
||||||
|
"sudo_auth_failed": sudo_auth_failed,
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,28 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import signal
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
|
|
||||||
class ToolSandbox:
|
class ToolSandbox:
|
||||||
"""Applies simple working directory and timeout restrictions."""
|
"""Applies simple working directory and timeout restrictions."""
|
||||||
|
|
||||||
def __init__(self, allowed_root: str | Path, timeout_ms: int) -> None:
|
def __init__(
|
||||||
|
self,
|
||||||
|
allowed_root: str | Path,
|
||||||
|
timeout_ms: int,
|
||||||
|
command_timeout_ms: int | None = None,
|
||||||
|
idle_timeout_ms: int | None = None,
|
||||||
|
) -> None:
|
||||||
self._allowed_root = Path(allowed_root).resolve()
|
self._allowed_root = Path(allowed_root).resolve()
|
||||||
self._timeout_seconds = max(timeout_ms / 1000, 1)
|
self._timeout_seconds = max(timeout_ms / 1000, 0.001)
|
||||||
|
self._command_timeout_seconds = max((command_timeout_ms or timeout_ms) / 1000, 0.001)
|
||||||
|
self._idle_timeout_seconds = max((idle_timeout_ms or timeout_ms) / 1000, 0.001)
|
||||||
|
|
||||||
def ensure_path_allowed(self, path: str | Path) -> Path:
|
def ensure_path_allowed(self, path: str | Path) -> Path:
|
||||||
resolved = Path(path).expanduser().resolve()
|
resolved = Path(path).expanduser().resolve()
|
||||||
|
|
@ -23,17 +35,105 @@ class ToolSandbox:
|
||||||
command: str,
|
command: str,
|
||||||
cwd: str | Path | None = None,
|
cwd: str | Path | None = None,
|
||||||
stdin_data: str | None = None,
|
stdin_data: str | None = None,
|
||||||
|
output_callback: Callable[[str, str], None] | None = None,
|
||||||
) -> subprocess.CompletedProcess[str]:
|
) -> subprocess.CompletedProcess[str]:
|
||||||
working_directory = self.ensure_path_allowed(cwd or self._allowed_root)
|
working_directory = self.ensure_path_allowed(cwd or self._allowed_root)
|
||||||
env = {"PATH": os.environ.get("PATH", "")}
|
env = {"PATH": os.environ.get("PATH", "")}
|
||||||
return subprocess.run(
|
if output_callback is None:
|
||||||
|
return subprocess.run(
|
||||||
|
command,
|
||||||
|
shell=True,
|
||||||
|
cwd=str(working_directory),
|
||||||
|
env=env,
|
||||||
|
text=True,
|
||||||
|
capture_output=True,
|
||||||
|
input=stdin_data,
|
||||||
|
timeout=self._command_timeout_seconds,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
process = subprocess.Popen(
|
||||||
command,
|
command,
|
||||||
shell=True,
|
shell=True,
|
||||||
cwd=str(working_directory),
|
cwd=str(working_directory),
|
||||||
env=env,
|
env=env,
|
||||||
text=True,
|
text=True,
|
||||||
capture_output=True,
|
stdin=subprocess.PIPE if stdin_data is not None else None,
|
||||||
input=stdin_data,
|
stdout=subprocess.PIPE,
|
||||||
timeout=self._timeout_seconds,
|
stderr=subprocess.PIPE,
|
||||||
check=False,
|
start_new_session=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
stdout_chunks: list[str] = []
|
||||||
|
stderr_chunks: list[str] = []
|
||||||
|
output_lock = threading.Lock()
|
||||||
|
last_output_at = time.monotonic()
|
||||||
|
|
||||||
|
if stdin_data is not None and process.stdin is not None:
|
||||||
|
process.stdin.write(stdin_data)
|
||||||
|
process.stdin.close()
|
||||||
|
|
||||||
|
def read_stream(stream_name: str) -> None:
|
||||||
|
stream = process.stdout if stream_name == "stdout" else process.stderr
|
||||||
|
if stream is None:
|
||||||
|
return
|
||||||
|
chunks = stdout_chunks if stream_name == "stdout" else stderr_chunks
|
||||||
|
try:
|
||||||
|
for line in iter(stream.readline, ""):
|
||||||
|
if not line:
|
||||||
|
break
|
||||||
|
chunks.append(line)
|
||||||
|
nonlocal last_output_at
|
||||||
|
with output_lock:
|
||||||
|
last_output_at = time.monotonic()
|
||||||
|
output_callback(stream_name, line)
|
||||||
|
finally:
|
||||||
|
stream.close()
|
||||||
|
|
||||||
|
stdout_thread = threading.Thread(target=read_stream, args=("stdout",), daemon=True)
|
||||||
|
stderr_thread = threading.Thread(target=read_stream, args=("stderr",), daemon=True)
|
||||||
|
stdout_thread.start()
|
||||||
|
stderr_thread.start()
|
||||||
|
|
||||||
|
timed_out = False
|
||||||
|
timeout_reason: str | None = None
|
||||||
|
started_at = time.monotonic()
|
||||||
|
return_code: int | None = None
|
||||||
|
while return_code is None:
|
||||||
|
return_code = process.poll()
|
||||||
|
if return_code is not None:
|
||||||
|
break
|
||||||
|
|
||||||
|
now = time.monotonic()
|
||||||
|
with output_lock:
|
||||||
|
idle_for = now - last_output_at
|
||||||
|
if now - started_at > self._command_timeout_seconds:
|
||||||
|
timed_out = True
|
||||||
|
timeout_reason = f"Command timed out after {self._command_timeout_seconds:.0f}s"
|
||||||
|
break
|
||||||
|
if idle_for > self._idle_timeout_seconds:
|
||||||
|
timed_out = True
|
||||||
|
timeout_reason = f"Command produced no output for {self._idle_timeout_seconds:.0f}s"
|
||||||
|
break
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
if timed_out:
|
||||||
|
try:
|
||||||
|
os.killpg(process.pid, signal.SIGKILL)
|
||||||
|
except ProcessLookupError:
|
||||||
|
pass
|
||||||
|
except PermissionError:
|
||||||
|
process.kill()
|
||||||
|
return_code = process.wait()
|
||||||
|
timeout_message = f"{timeout_reason}\n"
|
||||||
|
stderr_chunks.append(timeout_message)
|
||||||
|
output_callback("stderr", timeout_message)
|
||||||
|
|
||||||
|
stdout_thread.join(timeout=1)
|
||||||
|
stderr_thread.join(timeout=1)
|
||||||
|
return subprocess.CompletedProcess(
|
||||||
|
args=command,
|
||||||
|
returncode=return_code if not timed_out else -9,
|
||||||
|
stdout="".join(stdout_chunks),
|
||||||
|
stderr="".join(stderr_chunks),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,22 @@ from app.tools.base import BaseTool
|
||||||
from app.tools.sandbox import ToolSandbox
|
from app.tools.sandbox import ToolSandbox
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_sudo_auth_failure(output: str) -> bool:
|
||||||
|
normalized = output.lower()
|
||||||
|
return any(
|
||||||
|
marker in normalized
|
||||||
|
for marker in (
|
||||||
|
"incorrect password",
|
||||||
|
"incorrect password attempt",
|
||||||
|
"sudo: no password was provided",
|
||||||
|
"sudo: password incorrect",
|
||||||
|
"sorry, try again",
|
||||||
|
"authentication failure",
|
||||||
|
"wrong password",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ShellExecTool(BaseTool):
|
class ShellExecTool(BaseTool):
|
||||||
name = "shell_exec"
|
name = "shell_exec"
|
||||||
|
|
||||||
|
|
@ -18,6 +34,7 @@ class ShellExecTool(BaseTool):
|
||||||
cwd = args.get("cwd")
|
cwd = args.get("cwd")
|
||||||
stdin_secret = args.get("stdin_secret")
|
stdin_secret = args.get("stdin_secret")
|
||||||
password = args.get("password")
|
password = args.get("password")
|
||||||
|
output_callback = args.get("__output_callback")
|
||||||
|
|
||||||
if password:
|
if password:
|
||||||
command = f'echo "{password}" | sudo -S {command}'
|
command = f'echo "{password}" | sudo -S {command}'
|
||||||
|
|
@ -26,21 +43,23 @@ class ShellExecTool(BaseTool):
|
||||||
command=command,
|
command=command,
|
||||||
cwd=str(cwd) if cwd else None,
|
cwd=str(cwd) if cwd else None,
|
||||||
stdin_data=str(stdin_secret) if stdin_secret is not None else None,
|
stdin_data=str(stdin_secret) if stdin_secret is not None else None,
|
||||||
|
output_callback=output_callback if callable(output_callback) else None,
|
||||||
)
|
)
|
||||||
output = completed.stdout if completed.returncode == 0 else completed.stderr or completed.stdout
|
output = completed.stdout if completed.returncode == 0 else completed.stderr or completed.stdout
|
||||||
error_output = completed.stderr or completed.stdout
|
error_output = completed.stderr or completed.stdout
|
||||||
is_sudo_error = (
|
sudo_auth_failed = completed.returncode != 0 and _detect_sudo_auth_failure(
|
||||||
completed.returncode != 0 and
|
f"{completed.stdout}\n{completed.stderr}"
|
||||||
("permission denied" in error_output.lower() or
|
|
||||||
"incorrect password" in error_output.lower() or
|
|
||||||
"sudo: password incorrect" in error_output.lower() or
|
|
||||||
"wrong password" in error_output.lower())
|
|
||||||
)
|
)
|
||||||
|
needs_sudo = completed.returncode != 0 and "permission denied" in error_output.lower() and not sudo_auth_failed
|
||||||
|
|
||||||
return ToolResult(
|
return ToolResult(
|
||||||
tool=self.name,
|
tool=self.name,
|
||||||
ok=completed.returncode == 0,
|
ok=completed.returncode == 0,
|
||||||
output=output,
|
output=output,
|
||||||
error=None if completed.returncode == 0 else f"Command failed with exit code {completed.returncode}",
|
error=None if completed.returncode == 0 else f"Command failed with exit code {completed.returncode}",
|
||||||
metadata={"exit_code": completed.returncode, "needs_sudo": is_sudo_error},
|
metadata={
|
||||||
|
"exit_code": completed.returncode,
|
||||||
|
"needs_sudo": needs_sudo,
|
||||||
|
"sudo_auth_failed": sudo_auth_failed,
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,42 @@
|
||||||
|
{
|
||||||
|
"thinker": {
|
||||||
|
"path": "Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf",
|
||||||
|
"backend": "vulkan",
|
||||||
|
"n_gpu_layers": -1,
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"temperature": 0.3
|
||||||
|
},
|
||||||
|
"json_compiler": {
|
||||||
|
"path": "gemma-4-E4B-it-Q4_K_M.gguf",
|
||||||
|
"backend": "cpu",
|
||||||
|
"n_gpu_layers": 0,
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"temperature": 0.1
|
||||||
|
},
|
||||||
|
"coder": {
|
||||||
|
"path": "X-Coder-SFT-Qwen3-8B.Q6_K.gguf",
|
||||||
|
"backend": "cpu",
|
||||||
|
"n_gpu_layers": 0,
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"temperature": 0.2
|
||||||
|
},
|
||||||
|
"critic": {
|
||||||
|
"path": "gemma-4-E4B-it-Q4_K_M.gguf",
|
||||||
|
"backend": "cpu",
|
||||||
|
"n_gpu_layers": 0,
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"temperature": 0.1
|
||||||
|
},
|
||||||
|
"sys_util": {
|
||||||
|
"path": "Menlo_Lucy-Q4_K_M.gguf",
|
||||||
|
"backend": "cpu",
|
||||||
|
"n_gpu_layers": 0,
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"temperature": 0.1
|
||||||
|
},
|
||||||
|
"embeddings": {
|
||||||
|
"path": "all-MiniLM-L6-v2",
|
||||||
|
"model_name": "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
|
"embedding_dim": 384
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,42 @@
|
||||||
|
{
|
||||||
|
"thinker": {
|
||||||
|
"path": "Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf",
|
||||||
|
"backend": "vulkan",
|
||||||
|
"n_gpu_layers": -1,
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"temperature": 0.3
|
||||||
|
},
|
||||||
|
"json_compiler": {
|
||||||
|
"path": "gemma-4-E4B-it-Q4_K_M.gguf",
|
||||||
|
"backend": "cpu",
|
||||||
|
"n_gpu_layers": 0,
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"temperature": 0.1
|
||||||
|
},
|
||||||
|
"coder": {
|
||||||
|
"path": "X-Coder-SFT-Qwen3-8B.Q6_K.gguf",
|
||||||
|
"backend": "cpu",
|
||||||
|
"n_gpu_layers": 0,
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"temperature": 0.2
|
||||||
|
},
|
||||||
|
"critic": {
|
||||||
|
"path": "gemma-4-E4B-it-Q4_K_M.gguf",
|
||||||
|
"backend": "cpu",
|
||||||
|
"n_gpu_layers": 0,
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"temperature": 0.1
|
||||||
|
},
|
||||||
|
"sys_util": {
|
||||||
|
"path": "Menlo_Lucy-Q4_K_M.gguf",
|
||||||
|
"backend": "cpu",
|
||||||
|
"n_gpu_layers": 0,
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"temperature": 0.1
|
||||||
|
},
|
||||||
|
"embeddings": {
|
||||||
|
"path": "all-MiniLM-L6-v2",
|
||||||
|
"model_name": "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
|
"embedding_dim": 384
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -35,6 +35,12 @@
|
||||||
"chmod -R 000",
|
"chmod -R 000",
|
||||||
"chmod -R 777",
|
"chmod -R 777",
|
||||||
"chown -R",
|
"chown -R",
|
||||||
|
"apt",
|
||||||
|
"apt-get",
|
||||||
|
"dpkg",
|
||||||
|
"yum",
|
||||||
|
"dnf",
|
||||||
|
"pacman",
|
||||||
"shutdown",
|
"shutdown",
|
||||||
"reboot",
|
"reboot",
|
||||||
"halt",
|
"halt",
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,15 @@
|
||||||
{
|
{
|
||||||
|
"thinker": "You are the orchestrator of a local AI agent runtime. Your job is to analyze the user's task and decide how to execute it.\n\n## Decision Types\n\n1. **Direct response** — for simple questions, greetings, conversations:\n {\"type\": \"respond\", \"payload\": {\"text\": \"your answer\"}}\n\n2. **Single tool step** — for simple tasks needing one tool:\n {\"type\": \"step\", \"payload\": {\"tool\": \"shell_exec\", \"args\": {\"command\": \"...\"}}}\n\n3. **Multi-step plan** — for complex tasks that need decomposition:\n {\"type\": \"plan\", \"payload\": {\"steps\": [\n {\"id\": \"step-1\", \"tool\": \"shell_exec\", \"args\": {\"command\": \"...\"}, \"description\": \"...\", \"depends_on\": []},\n {\"id\": \"step-2\", \"tool\": \"file_read\", \"args\": {\"path\": \"...\"}, \"description\": \"...\", \"depends_on\": [\"step-1\"]}\n ]}}\n\n## When to use multi-step plan\n- Task requires multiple operations (search → read → write)\n- Task involves checking prerequisites before acting\n- Task requires gathering information before producing result\n- User asks to do something complex (setup, configure, analyze)\n\n## Memory\n- If memory recall results are provided, USE them to inform your decisions\n- If you know something from memory, mention it in step descriptions\n- Store important results for future use\n\n## Rules\n- ALWAYS respond with valid JSON only\n- Each step MUST have a unique id\n- Use depends_on for ordering constraints\n- Keep steps focused — one action per step\n- If unsure, start with an information-gathering step\n- Respond ONLY with valid JSON, no explanations",
|
||||||
|
|
||||||
"orchestrator": "You are an expert orchestrator for a local AI agent system. Your role is to analyze the user's task and generate executable runtime steps.\n\nTool selection (choose the right tool):\n- shell_exec: for running commands, checking programs exist ('which', '--version'), searching files\n- file_read: for reading contents of a file (must be existing file path)\n- file_write: for creating or updating files\n- memory: for storing or searching memory\n\nSTRICT OUTPUT FORMAT - MUST follow exactly:\n\nSingle step:\n{\"type\": \"step\", \"payload\": {\"tool\": \"shell_exec\", \"args\": {\"command\": \"...\"}}}\n{\"type\": \"step\", \"payload\": {\"tool\": \"file_read\", \"args\": {\"path\": \"...\"}}}\n{\"type\": \"step\", \"payload\": {\"tool\": \"file_write\", \"args\": {\"path\": \"...\", \"content\": \"...\"}}}\n\nMulti-step plan:\n{\"type\": \"plan\", \"payload\": {\"steps\": [{\"tool\": \"file_read\", \"args\": {\"path\": \"...\"}, \"description\": \"...\", \"depends_on\": []}]}}\n\nDirect response:\n{\"type\": \"respond\", \"payload\": {\"text\": \"...\"}}\n\nIMPORTANT:\n- Use exactly {\"type\": \"step|plan|respond\", \"payload\": {...}} format\n- Do NOT output array alone\n- Do NOT use \"kind\" - use \"type\"\n- Respond ONLY with valid JSON\n- Your response MUST be complete valid JSON - the closing brace } MUST be present\n- Do NOT truncate your response - if you cannot fit all steps, use a single step\n\nTool selection:\n- For checking if a program/command exists: use shell_exec with 'which <program>' or '<program> --version'\n- For reading file contents: use file_read with path to file (NOT command)\n- For executing any command: use shell_exec\n- Previous experience (from memory) may help - consider it but YOU decide how to proceed",
|
"orchestrator": "You are an expert orchestrator for a local AI agent system. Your role is to analyze the user's task and generate executable runtime steps.\n\nTool selection (choose the right tool):\n- shell_exec: for running commands, checking programs exist ('which', '--version'), searching files\n- file_read: for reading contents of a file (must be existing file path)\n- file_write: for creating or updating files\n- memory: for storing or searching memory\n\nSTRICT OUTPUT FORMAT - MUST follow exactly:\n\nSingle step:\n{\"type\": \"step\", \"payload\": {\"tool\": \"shell_exec\", \"args\": {\"command\": \"...\"}}}\n{\"type\": \"step\", \"payload\": {\"tool\": \"file_read\", \"args\": {\"path\": \"...\"}}}\n{\"type\": \"step\", \"payload\": {\"tool\": \"file_write\", \"args\": {\"path\": \"...\", \"content\": \"...\"}}}\n\nMulti-step plan:\n{\"type\": \"plan\", \"payload\": {\"steps\": [{\"tool\": \"file_read\", \"args\": {\"path\": \"...\"}, \"description\": \"...\", \"depends_on\": []}]}}\n\nDirect response:\n{\"type\": \"respond\", \"payload\": {\"text\": \"...\"}}\n\nIMPORTANT:\n- Use exactly {\"type\": \"step|plan|respond\", \"payload\": {...}} format\n- Do NOT output array alone\n- Do NOT use \"kind\" - use \"type\"\n- Respond ONLY with valid JSON\n- Your response MUST be complete valid JSON - the closing brace } MUST be present\n- Do NOT truncate your response - if you cannot fit all steps, use a single step\n\nTool selection:\n- For checking if a program/command exists: use shell_exec with 'which <program>' or '<program> --version'\n- For reading file contents: use file_read with path to file (NOT command)\n- For executing any command: use shell_exec\n- Previous experience (from memory) may help - consider it but YOU decide how to proceed",
|
||||||
|
|
||||||
"planning": "You are a planning specialist. Generate execution plans.\n\nOutput MUST be:\n{\"type\": \"plan\", \"version\": \"1.0\", \"payload\": {\"steps\": [{\"tool\": \"\", \"args\": {}, \"description\": \"...\", \"depends_on\": []}]}}\n\nRules:\n- Each step must have unique id (auto-generated)\n- Use \"depends_on\" for step ordering\n- Use \"tool\" for tool operations\n- Respond ONLY with valid JSON",
|
"planning": "You are a planning specialist. Generate execution plans.\n\nOutput MUST be:\n{\"type\": \"plan\", \"version\": \"1.0\", \"payload\": {\"steps\": [{\"tool\": \"\", \"args\": {}, \"description\": \"...\", \"depends_on\": []}]}}\n\nRules:\n- Each step must have unique id (auto-generated)\n- Use \"depends_on\" for step ordering\n- Use \"tool\" for tool operations\n- Respond ONLY with valid JSON",
|
||||||
|
|
||||||
"coder": "You are an expert code generation model.\n\nOutput format:\n{\"type\": \"code\", \"payload\": {\"language\": \"python\", \"content\": \"...\"}}\n\nOR for completion:\n{\"type\": \"respond\", \"payload\": {\"text\": \"...\"}}\n\nGenerate clean, working code. Respond ONLY with valid JSON.",
|
"coder": "You are an expert code generation model.\n\nOutput format:\n{\"type\": \"code\", \"payload\": {\"language\": \"python\", \"content\": \"...\"}}\n\nOR for completion:\n{\"type\": \"respond\", \"payload\": {\"text\": \"...\"}}\n\nGenerate clean, working code. Respond ONLY with valid JSON.",
|
||||||
|
|
||||||
"critic": "You are a critic model. Evaluate tool execution results.\n\nScoring criteria:\n- correctness: 0-1 (does result accomplish task?)\n- usefulness: 0-1 (is result useful?)\n- safety: 0-1 (is result safe?)\n- suggest_memory: boolean (should this be stored in memory?)\n- weight: 0-1 (importance score)\n- explanation: brief reasoning\n\nOutput format:\n{\"type\": \"evaluation\", \"payload\": {\"correctness\": 0.0-1.0, \"usefulness\": 0.0-1.0, \"safety\": 0.0-1.0, \"suggest_memory\": true|false, \"weight\": 0.0-1.0, \"explanation\": \"...\"}}\n\nRespond ONLY with valid JSON.",
|
"critic": "You are a critic model. Evaluate tool execution results.\n\nScoring criteria:\n- correctness: 0-1 (does result accomplish task?)\n- usefulness: 0-1 (is result useful?)\n- safety: 0-1 (is result safe?)\n- suggest_memory: boolean (should this be stored in memory?)\n- weight: 0-1 (importance score)\n- explanation: brief reasoning\n\nOutput format:\n{\"type\": \"evaluation\", \"payload\": {\"correctness\": 0.0-1.0, \"usefulness\": 0.0-1.0, \"safety\": 0.0-1.0, \"suggest_memory\": true|false, \"weight\": 0.0-1.0, \"explanation\": \"...\"}}\n\nRespond ONLY with valid JSON.",
|
||||||
|
|
||||||
"system": "You are ducklm, a local AI agent runtime.\n\nSTRICT RULES:\n- You MUST strictly follow execution schemas\n- You are NOT allowed to output free-form text\n- All outputs MUST be valid JSON matching runtime contracts\n- Use exact tool names from available tool set\n\nCurrent capabilities:\n- Execute shell commands (shell_exec)\n- Read/write files (file_read, file_write)\n- Memory operations (memory)\n\nAlways respond with valid JSON.",
|
"system": "You are ducklm, a local AI agent runtime.\n\nSTRICT RULES:\n- You MUST strictly follow execution schemas\n- You are NOT allowed to output free-form text\n- All outputs MUST be valid JSON matching runtime contracts\n- Use exact tool names from available tool set\n\nCurrent capabilities:\n- Execute shell commands (shell_exec)\n- Read/write files (file_read, file_write)\n- Memory operations (memory)\n\nAlways respond with valid JSON.",
|
||||||
"sys_util": "You are a STRICT JSON repair engine inside a production AI runtime.\nYour job is ONLY to fix invalid JSON syntax.\nYou are NOT allowed to:\n- change meaning of data\n- add new fields\n- remove valid fields\n- interpret intent\n- explain anything\n- reformat structure logically\n---\nINPUT:\nYou receive a malformed or invalid JSON string.\n---\nOUTPUT RULES:\n- Output ONLY valid JSON\n- No markdown\n- No comments\n- No explanations\n- No extra text\n---\nREPAIR RULES (STRICT):\nFix ONLY syntax issues:\n- missing or extra commas\n- missing quotes\n- incorrect brackets\n- trailing commas\n- invalid escaping\n- broken strings\n- unbalanced braces\nDO NOT:\n- rename keys\n- reorder fields intentionally\n- guess missing semantic data\n- \"improve\" structure\n---\nIMPORTANT:\nIf multiple valid repairs exist:\n\u2192 choose the minimal change that makes JSON valid\n---\nOUTPUT MUST BE VALID JSON OR NOTHING ELSE\nInvalid JSON:"
|
|
||||||
|
"sys_util": "You are a STRICT JSON repair engine inside a production AI runtime.\nYour job is ONLY to fix invalid JSON syntax.\nYou are NOT allowed to:\n- change meaning of data\n- add new fields\n- remove valid fields\n- interpret intent\n- explain anything\n- reformat structure logically\n---\nINPUT:\nYou receive a malformed or invalid JSON string.\n---\nOUTPUT RULES:\n- Output ONLY valid JSON\n- No markdown\n- No comments\n- No explanations\n- No extra text\n---\nREPAIR RULES (STRICT):\nFix ONLY syntax issues:\n- missing or extra commas\n- missing quotes\n- incorrect brackets\n- trailing commas\n- invalid escaping\n- broken strings\n- unbalanced braces\nDO NOT:\n- rename keys\n- reorder fields intentionally\n- guess missing semantic data\n- \"improve\" structure\n---\nIMPORTANT:\nIf multiple valid repairs exist:\n→ choose the minimal change that makes JSON valid\n---\nOUTPUT MUST BE VALID JSON OR NOTHING ELSE\nInvalid JSON:"
|
||||||
}
|
}
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
{
|
{
|
||||||
"step_timeout_ms": 30000,
|
"step_timeout_ms": 30000,
|
||||||
"task_timeout_ms": 300000,
|
"task_timeout_ms": 300000,
|
||||||
|
"shell_command_timeout_ms": 3600000,
|
||||||
|
"shell_idle_timeout_ms": 600000,
|
||||||
"planner_retry_limit": 2,
|
"planner_retry_limit": 2,
|
||||||
"tool_retry_limit": 1,
|
"tool_retry_limit": 1,
|
||||||
"replan_limit": 1,
|
"replan_limit": 1,
|
||||||
|
|
@ -34,5 +36,7 @@
|
||||||
"debug_orchestrator_log_length": 500,
|
"debug_orchestrator_log_length": 500,
|
||||||
"json_fix_retry_limit": 2,
|
"json_fix_retry_limit": 2,
|
||||||
"json_fix_use_sys_util": true,
|
"json_fix_use_sys_util": true,
|
||||||
"intent_classifier": "thinker"
|
"intent_classifier": "thinker",
|
||||||
|
"recall_model": "json_compiler",
|
||||||
|
"critic_retry_limit": 2
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,24 @@
|
||||||
|
# UI Bootstrap And Review Flow Plan
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Move the web chat UI to Bootstrap 5.3 with Bootswatch themes and improve review/password/terminal-output ergonomics.
|
||||||
|
|
||||||
|
## Required Changes
|
||||||
|
|
||||||
|
- Replace the current hand-written visual system in `app/api/static/index.html` with Bootstrap 5.3 layout/components.
|
||||||
|
- Add Bootswatch theme support with a visible theme selector and persistent localStorage choice.
|
||||||
|
- Password/secret input must submit on Enter as well as the "Отправить" button.
|
||||||
|
- Console/tool output must render inside a collapsed Bootstrap accordion item.
|
||||||
|
- The accordion body must contain terminal-style output inside `<pre></pre>`.
|
||||||
|
- The terminal accordion must expand only when the user clicks it.
|
||||||
|
- Review UI must show critic/system assessment and user voting buttons:
|
||||||
|
- `Ошибочное действие`
|
||||||
|
- `Всё верно`
|
||||||
|
- optional correction/comment text.
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- Keep runtime event handling WebSocket-driven.
|
||||||
|
- Do not mix console output with assistant prose.
|
||||||
|
- Keep raw tool output available for debugging, but collapsed by default.
|
||||||
Binary file not shown.
|
After Width: | Height: | Size: 17 KiB |
|
|
@ -0,0 +1,274 @@
|
||||||
|
Loading weights: 0%| | 0/103 [00:00<?, ?it/s]
Loading weights: 100%|██████████| 103/103 [00:00<00:00, 5627.96it/s]
|
||||||
|
INFO: Started server process [221205]
|
||||||
|
INFO: Waiting for application startup.
|
||||||
|
llama_context: n_ctx_seq (4096) < n_ctx_train (262144) -- the full capacity of the model will not be utilized
|
||||||
|
llama_context: n_ctx_seq (4096) < n_ctx_train (131072) -- the full capacity of the model will not be utilized
|
||||||
|
llama_kv_cache_iswa: using full-size SWA cache (ref: https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
|
||||||
|
llama_kv_cache: the V embeddings have different sizes across layers and FA is not enabled - padding V cache to 1024
|
||||||
|
llama_kv_cache: the V embeddings have different sizes across layers and FA is not enabled - padding V cache to 1024
|
||||||
|
llama_context: n_ctx_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
|
||||||
|
llama_context: n_ctx_seq (4096) < n_ctx_train (40960) -- the full capacity of the model will not be utilized
|
||||||
|
INFO: Application startup complete.
|
||||||
|
INFO: Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
|
||||||
|
ERROR: Exception in ASGI application
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/uvicorn/protocols/http/h11_impl.py", line 415, in run_asgi
|
||||||
|
result = await app( # type: ignore[func-returns-value]
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
self.scope, self.receive, self.send
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
|
||||||
|
return await self.app(scope, receive, send)
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/applications.py", line 1159, in __call__
|
||||||
|
await super().__call__(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/applications.py", line 90, in __call__
|
||||||
|
await self.middleware_stack(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/errors.py", line 186, in __call__
|
||||||
|
raise exc
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/errors.py", line 164, in __call__
|
||||||
|
await self.app(scope, receive, _send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/exceptions.py", line 63, in __call__
|
||||||
|
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
|
||||||
|
raise exc
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
|
||||||
|
await app(scope, receive, sender)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/middleware/asyncexitstack.py", line 18, in __call__
|
||||||
|
await self.app(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 660, in __call__
|
||||||
|
await self.middleware_stack(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 680, in app
|
||||||
|
await route.handle(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 276, in handle
|
||||||
|
await self.app(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 134, in app
|
||||||
|
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
|
||||||
|
raise exc
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
|
||||||
|
await app(scope, receive, sender)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 120, in app
|
||||||
|
response = await f(request)
|
||||||
|
^^^^^^^^^^^^^^^^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 674, in app
|
||||||
|
raw_response = await run_endpoint_function(
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
...<3 lines>...
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 330, in run_endpoint_function
|
||||||
|
return await run_in_threadpool(dependant.call, **values)
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/concurrency.py", line 32, in run_in_threadpool
|
||||||
|
return await anyio.to_thread.run_sync(func)
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/to_thread.py", line 63, in run_sync
|
||||||
|
return await get_async_backend().run_sync_in_worker_thread(
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
func, args, abandon_on_cancel=abandon_on_cancel, limiter=limiter
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 2518, in run_sync_in_worker_thread
|
||||||
|
return await future
|
||||||
|
^^^^^^^^^^^^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 1002, in run
|
||||||
|
result = context.run(func, *args)
|
||||||
|
File "/home/mirivlad/git/ducklm/app/api/server.py", line 103, in resolve_secret
|
||||||
|
return runtime.resolve_secret(task_id=request.task_id, secret=request.secret)
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/home/mirivlad/git/ducklm/app/runtime/runtime_controller.py", line 408, in resolve_secret
|
||||||
|
return self.runtime_loop.resolve_secret(
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
|
||||||
|
task_id=task_id, secret=secret
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "/home/mirivlad/git/ducklm/app/runtime/runtime_loop.py", line 378, in resolve_secret
|
||||||
|
execution_result = self._execution_engine.execute(
|
||||||
|
task=task,
|
||||||
|
...<2 lines>...
|
||||||
|
secret_override=secret,
|
||||||
|
)
|
||||||
|
File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 87, in execute
|
||||||
|
return self._execute_plan(
|
||||||
|
~~~~~~~~~~~~~~~~~~^
|
||||||
|
task=task,
|
||||||
|
^^^^^^^^^^
|
||||||
|
...<3 lines>...
|
||||||
|
password_override=password_override,
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 211, in _execute_plan
|
||||||
|
result = self._execute_tool(
|
||||||
|
task=task,
|
||||||
|
...<3 lines>...
|
||||||
|
password_override=password_override,
|
||||||
|
)
|
||||||
|
File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 824, in _execute_tool
|
||||||
|
tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args)
|
||||||
|
File "/home/mirivlad/git/ducklm/app/tools/plugins/shell_exec/__init__.py", line 21, in execute
|
||||||
|
completed = self._sandbox.run_shell(
|
||||||
|
command=command,
|
||||||
|
cwd=str(cwd) if cwd else None,
|
||||||
|
stdin_data=str(stdin_secret) if stdin_secret is not None else None,
|
||||||
|
)
|
||||||
|
File "/home/mirivlad/git/ducklm/app/tools/sandbox.py", line 29, in run_shell
|
||||||
|
return subprocess.run(
|
||||||
|
~~~~~~~~~~~~~~^
|
||||||
|
command,
|
||||||
|
^^^^^^^^
|
||||||
|
...<7 lines>...
|
||||||
|
check=False,
|
||||||
|
^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "/usr/lib/python3.13/subprocess.py", line 556, in run
|
||||||
|
stdout, stderr = process.communicate(input, timeout=timeout)
|
||||||
|
~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/usr/lib/python3.13/subprocess.py", line 1222, in communicate
|
||||||
|
stdout, stderr = self._communicate(input, endtime, timeout)
|
||||||
|
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/usr/lib/python3.13/subprocess.py", line 2129, in _communicate
|
||||||
|
self._check_timeout(endtime, orig_timeout, stdout, stderr)
|
||||||
|
~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/usr/lib/python3.13/subprocess.py", line 1269, in _check_timeout
|
||||||
|
raise TimeoutExpired(
|
||||||
|
...<2 lines>...
|
||||||
|
stderr=b''.join(stderr_seq) if stderr_seq else None)
|
||||||
|
subprocess.TimeoutExpired: Command 'sudo -S -p '' apt update && apt upgrade -y' timed out after 30.0 seconds
|
||||||
|
ERROR: Exception in ASGI application
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/uvicorn/protocols/http/h11_impl.py", line 415, in run_asgi
|
||||||
|
result = await app( # type: ignore[func-returns-value]
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
self.scope, self.receive, self.send
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
|
||||||
|
return await self.app(scope, receive, send)
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/applications.py", line 1159, in __call__
|
||||||
|
await super().__call__(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/applications.py", line 90, in __call__
|
||||||
|
await self.middleware_stack(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/errors.py", line 186, in __call__
|
||||||
|
raise exc
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/errors.py", line 164, in __call__
|
||||||
|
await self.app(scope, receive, _send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/exceptions.py", line 63, in __call__
|
||||||
|
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
|
||||||
|
raise exc
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
|
||||||
|
await app(scope, receive, sender)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/middleware/asyncexitstack.py", line 18, in __call__
|
||||||
|
await self.app(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 660, in __call__
|
||||||
|
await self.middleware_stack(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 680, in app
|
||||||
|
await route.handle(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 276, in handle
|
||||||
|
await self.app(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 134, in app
|
||||||
|
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
|
||||||
|
raise exc
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
|
||||||
|
await app(scope, receive, sender)
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 120, in app
|
||||||
|
response = await f(request)
|
||||||
|
^^^^^^^^^^^^^^^^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 674, in app
|
||||||
|
raw_response = await run_endpoint_function(
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
...<3 lines>...
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 330, in run_endpoint_function
|
||||||
|
return await run_in_threadpool(dependant.call, **values)
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/concurrency.py", line 32, in run_in_threadpool
|
||||||
|
return await anyio.to_thread.run_sync(func)
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/to_thread.py", line 63, in run_sync
|
||||||
|
return await get_async_backend().run_sync_in_worker_thread(
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
func, args, abandon_on_cancel=abandon_on_cancel, limiter=limiter
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 2518, in run_sync_in_worker_thread
|
||||||
|
return await future
|
||||||
|
^^^^^^^^^^^^
|
||||||
|
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 1002, in run
|
||||||
|
result = context.run(func, *args)
|
||||||
|
File "/home/mirivlad/git/ducklm/app/api/server.py", line 103, in resolve_secret
|
||||||
|
return runtime.resolve_secret(task_id=request.task_id, secret=request.secret)
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/home/mirivlad/git/ducklm/app/runtime/runtime_controller.py", line 408, in resolve_secret
|
||||||
|
return self.runtime_loop.resolve_secret(
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
|
||||||
|
task_id=task_id, secret=secret
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "/home/mirivlad/git/ducklm/app/runtime/runtime_loop.py", line 378, in resolve_secret
|
||||||
|
execution_result = self._execution_engine.execute(
|
||||||
|
task=task,
|
||||||
|
...<2 lines>...
|
||||||
|
secret_override=secret,
|
||||||
|
)
|
||||||
|
File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 87, in execute
|
||||||
|
return self._execute_plan(
|
||||||
|
~~~~~~~~~~~~~~~~~~^
|
||||||
|
task=task,
|
||||||
|
^^^^^^^^^^
|
||||||
|
...<3 lines>...
|
||||||
|
password_override=password_override,
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 211, in _execute_plan
|
||||||
|
result = self._execute_tool(
|
||||||
|
task=task,
|
||||||
|
...<3 lines>...
|
||||||
|
password_override=password_override,
|
||||||
|
)
|
||||||
|
File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 824, in _execute_tool
|
||||||
|
tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args)
|
||||||
|
File "/home/mirivlad/git/ducklm/app/tools/plugins/shell_exec/__init__.py", line 21, in execute
|
||||||
|
completed = self._sandbox.run_shell(
|
||||||
|
command=command,
|
||||||
|
cwd=str(cwd) if cwd else None,
|
||||||
|
stdin_data=str(stdin_secret) if stdin_secret is not None else None,
|
||||||
|
)
|
||||||
|
File "/home/mirivlad/git/ducklm/app/tools/sandbox.py", line 29, in run_shell
|
||||||
|
return subprocess.run(
|
||||||
|
~~~~~~~~~~~~~~^
|
||||||
|
command,
|
||||||
|
^^^^^^^^
|
||||||
|
...<7 lines>...
|
||||||
|
check=False,
|
||||||
|
^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "/usr/lib/python3.13/subprocess.py", line 556, in run
|
||||||
|
stdout, stderr = process.communicate(input, timeout=timeout)
|
||||||
|
~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/usr/lib/python3.13/subprocess.py", line 1222, in communicate
|
||||||
|
stdout, stderr = self._communicate(input, endtime, timeout)
|
||||||
|
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/usr/lib/python3.13/subprocess.py", line 2129, in _communicate
|
||||||
|
self._check_timeout(endtime, orig_timeout, stdout, stderr)
|
||||||
|
~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/usr/lib/python3.13/subprocess.py", line 1269, in _check_timeout
|
||||||
|
raise TimeoutExpired(
|
||||||
|
...<2 lines>...
|
||||||
|
stderr=b''.join(stderr_seq) if stderr_seq else None)
|
||||||
|
subprocess.TimeoutExpired: Command 'sudo -S -p '' apt update && apt upgrade -y' timed out after 30.0 seconds
|
||||||
|
|
@ -0,0 +1,254 @@
|
||||||
|
Models policy ready
|
||||||
|
Registered tool: file_write
|
||||||
|
Registered tool: shell_exec
|
||||||
|
Registered tool: memory
|
||||||
|
Registered tool: file_read
|
||||||
|
Lifespan: Starting model loading...
|
||||||
|
Lifespan: Loading models...
|
||||||
|
Loading thinker model...
|
||||||
|
Thinker loaded: <app.models.orchestrator.OrchestratorAdapter object at 0x7f1db5b6cc20> (model: Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf)
|
||||||
|
Loading json_compiler model...
|
||||||
|
JSON Compiler loaded: <app.models.orchestrator.OrchestratorAdapter object at 0x7f1db5b7bb10> (model: gemma-4-E4B-it-Q4_K_M.gguf)
|
||||||
|
Loading coder model...
|
||||||
|
Coder loaded: <app.models.coder.CoderAdapter object at 0x7f1db5b6d2b0> (model: X-Coder-SFT-Qwen3-8B.Q6_K.gguf)
|
||||||
|
Loading critic model...
|
||||||
|
Reusing model instance: gemma-4-E4B-it-Q4_K_M.gguf for critic
|
||||||
|
Critic loaded: <app.models.critic.CriticAdapter object at 0x7f1db5b6d160> (model: gemma-4-E4B-it-Q4_K_M.gguf)
|
||||||
|
Loading sys_util model...
|
||||||
|
Sys_util loaded: <app.models.orchestrator.OrchestratorAdapter object at 0x7f1db30ec2d0> (model: Menlo_Lucy-Q4_K_M.gguf)
|
||||||
|
All models loaded successfully
|
||||||
|
MemoryRecallService initialized with model: json_compiler
|
||||||
|
MemoryWritePolicy set: True
|
||||||
|
Lifespan: Models loaded
|
||||||
|
Lifespan: Rebuilding vector index (289 entries)...
|
||||||
|
Lifespan: Vector index rebuilt
|
||||||
|
INFO: 127.0.0.1:47236 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:47238 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:47240 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:45740 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:45754 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:41296 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:41304 - "GET / HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:41304 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:41304 - "GET /favicon.ico HTTP/1.1" 404 Not Found
|
||||||
|
INFO: 127.0.0.1:41318 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:41310 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:40504 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:45288 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:45302 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:47488 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:47498 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:48888 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:48898 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44008 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44024 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44008 - "POST /chat HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50236 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50246 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:57020 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:57032 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:36982 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:36996 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:35350 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:35358 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:38442 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:38456 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:38442 - "POST /permissions/resolve HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:35664 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:35666 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:41680 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:41682 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:55484 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:55486 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:53136 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:53142 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50412 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50412 - "POST /secrets/resolve HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50416 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50384 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50396 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:35882 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:35890 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:34008 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:34012 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:38358 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:38366 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:39500 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:39516 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:52800 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:52812 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:60246 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:60256 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:55192 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:55208 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:55192 - "POST /secrets/resolve HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50170 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50184 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:60392 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:60404 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:42626 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:42630 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:37478 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:37480 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:59892 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:59902 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50284 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50290 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:59488 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:59492 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:53584 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:53590 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50978 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50990 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:43110 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:43118 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:39906 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:39908 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:39100 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:39110 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:43436 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:43448 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:60214 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:60228 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:56192 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:45580 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:59680 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:52038 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:34120 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:54374 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:41916 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:48474 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:58570 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:58284 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:47014 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:37884 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:56196 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:60026 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:48534 - "POST /secrets/resolve HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:48536 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:46114 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:49446 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:33518 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:40316 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:47326 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:36022 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:36806 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:54232 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:54248 - "GET / HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:54248 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:54248 - "GET /favicon.ico HTTP/1.1" 404 Not Found
|
||||||
|
INFO: 127.0.0.1:38470 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:54264 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50474 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50490 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44644 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44652 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:41856 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:57392 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:45778 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:59094 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:39508 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:51214 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:54724 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:41204 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:33686 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:38154 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44658 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:56664 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:33906 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:36934 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:48746 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50876 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:38912 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:40786 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:51882 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:40002 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:43176 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:49824 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44316 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:58994 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:47794 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:37642 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:32882 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:53578 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:35804 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:47732 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:34050 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:55386 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:43992 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:43998 - "GET / HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:43998 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:43998 - "GET /favicon.ico HTTP/1.1" 404 Not Found
|
||||||
|
INFO: 127.0.0.1:39194 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:33540 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:53022 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:41056 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44000 - "POST /chat HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44000 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44000 - "POST /permissions/resolve HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:57534 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:60834 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:59886 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:42774 - "POST /secrets/resolve HTTP/1.1" 500 Internal Server Error
|
||||||
|
INFO: 127.0.0.1:50140 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:52360 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:57882 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44816 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:37956 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:37956 - "GET / HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:37956 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:37956 - "GET /favicon.ico HTTP/1.1" 404 Not Found
|
||||||
|
INFO: 127.0.0.1:50254 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:46082 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:56836 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:35716 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:37656 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:45248 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:50242 - "POST /chat HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44868 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44882 - "POST /permissions/resolve HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44882 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:48796 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:60814 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:53286 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44882 - "POST /secrets/resolve HTTP/1.1" 500 Internal Server Error
|
||||||
|
INFO: 127.0.0.1:53816 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:39450 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:53198 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:58340 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:58686 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:47278 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:46400 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:58580 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:35014 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:43342 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:34798 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:41652 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:36938 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:58066 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:45948 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:45656 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:33986 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:52016 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:55700 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:48468 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:33002 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:43004 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:43014 - "POST /secrets/resolve HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:43014 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:36870 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:45970 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:60292 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:53738 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:49414 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:56572 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:51224 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:53742 - "POST /secrets/resolve HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:42496 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:54868 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:57530 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:60898 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:54112 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:44548 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:37414 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
INFO: 127.0.0.1:45064 - "GET /health HTTP/1.1" 200 OK
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
844579
|
||||||
|
|
@ -0,0 +1,314 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Тестовый скрипт для проверки работы ducklm.
|
||||||
|
Позволяет ИИ-кодеру тестировать систему через отправку запросов и проверку выполнения.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import requests
|
||||||
|
import sys
|
||||||
|
from typing import Dict, Any, Optional
|
||||||
|
|
||||||
|
|
||||||
|
class DuckLMTester:
|
||||||
|
def __init__(self, base_url: str = "http://127.0.0.1:8000"):
|
||||||
|
self.base_url = base_url
|
||||||
|
self.session = requests.Session()
|
||||||
|
self.test_results = []
|
||||||
|
|
||||||
|
def log_test(self, test_name: str, passed: bool, details: str = ""):
|
||||||
|
"""Записать результат теста"""
|
||||||
|
result = {
|
||||||
|
"test": test_name,
|
||||||
|
"passed": passed,
|
||||||
|
"details": details,
|
||||||
|
"timestamp": time.time()
|
||||||
|
}
|
||||||
|
self.test_results.append(result)
|
||||||
|
status = "✓ PASS" if passed else "✗ FAIL"
|
||||||
|
print(f"{status}: {test_name}")
|
||||||
|
if details:
|
||||||
|
print(f" Details: {details}")
|
||||||
|
|
||||||
|
def test_health(self) -> bool:
|
||||||
|
"""Проверить эндпоинт здоровья"""
|
||||||
|
try:
|
||||||
|
response = self.session.get(f"{self.base_url}/health", timeout=5)
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
if data.get("status") == "ok":
|
||||||
|
self.log_test("Health Check", True, "Server is healthy")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.log_test("Health Check", False, f"Unexpected response: {data}")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
self.log_test("Health Check", False, f"HTTP {response.status_code}")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
self.log_test("Health Check", False, f"Connection error: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_simple_chat(self) -> bool:
|
||||||
|
"""Простой тест чата"""
|
||||||
|
try:
|
||||||
|
payload = {"input": "Привет, как дела?"}
|
||||||
|
response = self.session.post(
|
||||||
|
f"{self.base_url}/chat",
|
||||||
|
json=payload,
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
status = data.get("status")
|
||||||
|
if status in ["completed", "awaiting_permission", "awaiting_input"]:
|
||||||
|
self.log_test(
|
||||||
|
"Simple Chat",
|
||||||
|
True,
|
||||||
|
f"Status: {status}, Response received"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.log_test(
|
||||||
|
"Simple Chat",
|
||||||
|
False,
|
||||||
|
f"Unexpected status: {status}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
self.log_test(
|
||||||
|
"Simple Chat",
|
||||||
|
False,
|
||||||
|
f"HTTP {response.status_code}: {response.text}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
self.log_test("Simple Chat", False, f"Request error: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_tool_execution(self) -> bool:
|
||||||
|
"""Тест выполнения инструмента"""
|
||||||
|
try:
|
||||||
|
# Тест простой команды shell
|
||||||
|
payload = {
|
||||||
|
"input": "Выполни простую команду",
|
||||||
|
"context": {
|
||||||
|
"requested_tool": "shell_exec",
|
||||||
|
"tool_args": {"command": "echo 'test'"}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
response = self.session.post(
|
||||||
|
f"{self.base_url}/chat",
|
||||||
|
json=payload,
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
status = data.get("status")
|
||||||
|
if status == "completed":
|
||||||
|
output = data.get("result", {}).get("output", "")
|
||||||
|
if "test" in output:
|
||||||
|
self.log_test(
|
||||||
|
"Tool Execution",
|
||||||
|
True,
|
||||||
|
f"Command executed successfully: {output.strip()}"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.log_test(
|
||||||
|
"Tool Execution",
|
||||||
|
False,
|
||||||
|
f"Unexpected output: {output}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
elif status == "awaiting_permission":
|
||||||
|
self.log_test(
|
||||||
|
"Tool Execution",
|
||||||
|
True,
|
||||||
|
"Permission required (expected for some commands)"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.log_test(
|
||||||
|
"Tool Execution",
|
||||||
|
False,
|
||||||
|
f"Unexpected status: {status}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
self.log_test(
|
||||||
|
"Tool Execution",
|
||||||
|
False,
|
||||||
|
f"HTTP {response.status_code}: {response.text}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
self.log_test("Tool Execution", False, f"Request error: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_permission_flow(self) -> bool:
|
||||||
|
"""Тест потока разрешений"""
|
||||||
|
try:
|
||||||
|
# Сначала отправляем задачу, требующую разрешения
|
||||||
|
payload = {
|
||||||
|
"input": "Запусти команду, требующую разрешения",
|
||||||
|
"context": {
|
||||||
|
"requested_tool": "shell_exec",
|
||||||
|
"tool_args": {"command": "whoami"}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
response = self.session.post(
|
||||||
|
f"{self.base_url}/chat",
|
||||||
|
json=payload,
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
self.log_test(
|
||||||
|
"Permission Flow",
|
||||||
|
False,
|
||||||
|
f"Initial request failed: HTTP {response.status_code}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
if data.get("status") == "awaiting_permission":
|
||||||
|
task_id = data.get("task_id")
|
||||||
|
if not task_id:
|
||||||
|
self.log_test(
|
||||||
|
"Permission Flow",
|
||||||
|
False,
|
||||||
|
"No task_id in response"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Теперь разрешаем разрешение
|
||||||
|
resolve_payload = {
|
||||||
|
"task_id": task_id,
|
||||||
|
"decision": "allow_once"
|
||||||
|
}
|
||||||
|
resolve_response = self.session.post(
|
||||||
|
f"{self.base_url}/permissions/resolve",
|
||||||
|
json=resolve_payload,
|
||||||
|
timeout=10
|
||||||
|
)
|
||||||
|
|
||||||
|
if resolve_response.status_code == 200:
|
||||||
|
resolve_data = resolve_response.json()
|
||||||
|
final_status = resolve_data.get("status")
|
||||||
|
if final_status in ["completed", "failed"]:
|
||||||
|
self.log_test(
|
||||||
|
"Permission Flow",
|
||||||
|
True,
|
||||||
|
f"Permission resolved, final status: {final_status}"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.log_test(
|
||||||
|
"Permission Flow",
|
||||||
|
False,
|
||||||
|
f"Unexpected final status: {final_status}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
self.log_test(
|
||||||
|
"Permission Flow",
|
||||||
|
False,
|
||||||
|
f"Permission resolution failed: HTTP {resolve_response.status_code}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# Если разрешение не потребовалось, это тоже нормально для некоторых систем
|
||||||
|
self.log_test(
|
||||||
|
"Permission Flow",
|
||||||
|
True,
|
||||||
|
f"No permission required, status: {data.get('status')}"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.log_test("Permission Flow", False, f"Request error: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def run_all_tests(self) -> Dict[str, Any]:
|
||||||
|
"""Запустить все тесты"""
|
||||||
|
print("Starting ducklm tests...")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
# Ждем немного, чтобы сервер успел запуститься
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
tests = [
|
||||||
|
self.test_health,
|
||||||
|
self.test_simple_chat,
|
||||||
|
self.test_tool_execution,
|
||||||
|
self.test_permission_flow,
|
||||||
|
]
|
||||||
|
|
||||||
|
passed = 0
|
||||||
|
total = len(tests)
|
||||||
|
|
||||||
|
for test in tests:
|
||||||
|
if test():
|
||||||
|
passed += 1
|
||||||
|
time.sleep(1) # Небольшая пауза между тестами для слабого железа
|
||||||
|
|
||||||
|
print("=" * 50)
|
||||||
|
print(f"Tests completed: {passed}/{total} passed")
|
||||||
|
|
||||||
|
# Сводка результатов
|
||||||
|
summary = {
|
||||||
|
"total_tests": total,
|
||||||
|
"passed_tests": passed,
|
||||||
|
"failed_tests": total - passed,
|
||||||
|
"success_rate": passed / total if total > 0 else 0,
|
||||||
|
"test_results": self.test_results
|
||||||
|
}
|
||||||
|
|
||||||
|
return summary
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Основная функция"""
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Тест ducklm системы")
|
||||||
|
parser.add_argument("--url", default="http://127.0.0.1:8000", help="Base URL for ducklm server")
|
||||||
|
parser.add_argument("--test", choices=["health", "chat", "tool", "permission", "all"],
|
||||||
|
default="all", help="Specific test to run")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
tester = DuckLMTester(args.url)
|
||||||
|
|
||||||
|
if args.test == "all":
|
||||||
|
results = tester.run_all_tests()
|
||||||
|
print("\nFINAL RESULTS:")
|
||||||
|
print(f"Passed: {results['passed_tests']}/{results['total_tests']}")
|
||||||
|
print(f"Success Rate: {results['success_rate']*100:.1f}%")
|
||||||
|
|
||||||
|
# Возвращаем код выхода basado на результатах
|
||||||
|
sys.exit(0 if results['failed_tests'] == 0 else 1)
|
||||||
|
else:
|
||||||
|
# Запуск конкретного теста
|
||||||
|
test_map = {
|
||||||
|
"health": tester.test_health,
|
||||||
|
"chat": tester.test_simple_chat,
|
||||||
|
"tool": tester.test_tool_execution,
|
||||||
|
"permission": tester.test_permission_flow,
|
||||||
|
}
|
||||||
|
|
||||||
|
test_func = test_map[args.test]
|
||||||
|
if test_func():
|
||||||
|
print(f"Test {args.test}: PASSED")
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
print(f"Test {args.test}: FAILED")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
@ -0,0 +1,409 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Прямой тест ducklm через RuntimeController (без HTTP сервера).
|
||||||
|
Позволяет ИИ-кодеру тестировать систему через отправку запросов и проверку выполнения.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
# Добавляем текущую директорию в путь для импорта app
|
||||||
|
sys.path.insert(0, '.')
|
||||||
|
|
||||||
|
from app.runtime.runtime_controller import RuntimeController
|
||||||
|
from app.core.contracts import UserTask
|
||||||
|
|
||||||
|
|
||||||
|
class DuckLMDirectTester:
|
||||||
|
def __init__(self, base_dir: str = "."):
|
||||||
|
self.base_dir = Path(base_dir)
|
||||||
|
self.test_results = []
|
||||||
|
self.controller = None
|
||||||
|
|
||||||
|
def setup(self):
|
||||||
|
"""Инициализировать контроллер"""
|
||||||
|
try:
|
||||||
|
print("Инициализация RuntimeController...")
|
||||||
|
self.controller = RuntimeController(base_dir=self.base_dir)
|
||||||
|
print("RuntimeController инициализирован успешно")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Ошибка инициализации RuntimeController: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def log_test(self, test_name: str, passed: bool, details: str = ""):
|
||||||
|
"""Записать результат теста"""
|
||||||
|
result = {
|
||||||
|
"test": test_name,
|
||||||
|
"passed": passed,
|
||||||
|
"details": details,
|
||||||
|
"timestamp": time.time()
|
||||||
|
}
|
||||||
|
self.test_results.append(result)
|
||||||
|
status = "✓ PASS" if passed else "✗ FAIL"
|
||||||
|
print(f"{status}: {test_name}")
|
||||||
|
if details:
|
||||||
|
print(f" Details: {details}")
|
||||||
|
|
||||||
|
def test_health(self) -> bool:
|
||||||
|
"""Проверить что контроллер работает"""
|
||||||
|
try:
|
||||||
|
if self.controller is None:
|
||||||
|
self.log_test("Health Check", False, "Controller not initialized")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Проверяем что основные компоненты присутствуют
|
||||||
|
components = [
|
||||||
|
("event_bus", self.controller.event_bus),
|
||||||
|
("permission_service", self.controller.permission_service),
|
||||||
|
("task_state_store", self.controller.task_state_store),
|
||||||
|
("checkpoint_store", self.controller.checkpoint_store),
|
||||||
|
("context_builder", self.controller.context_builder),
|
||||||
|
("router", self.controller.router),
|
||||||
|
("execution_engine", self.controller.execution_engine),
|
||||||
|
]
|
||||||
|
|
||||||
|
missing = []
|
||||||
|
for name, component in components:
|
||||||
|
if component is None:
|
||||||
|
missing.append(name)
|
||||||
|
|
||||||
|
if missing:
|
||||||
|
self.log_test("Health Check", False, f"Missing components: {missing}")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
self.log_test("Health Check", True, "Все компоненты инициализированы")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.log_test("Health Check", False, f"Error: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_simple_task(self) -> bool:
|
||||||
|
"""Простой тест задачи"""
|
||||||
|
try:
|
||||||
|
if self.controller is None:
|
||||||
|
self.log_test("Simple Task", False, "Controller not initialized")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Создаем простую задачу
|
||||||
|
task = UserTask(input="Привет, как дела?")
|
||||||
|
|
||||||
|
# Выполняем задачу через контроллер
|
||||||
|
result = self.controller.handle_task(task)
|
||||||
|
|
||||||
|
status = result.get("status")
|
||||||
|
if status in ["completed", "awaiting_permission", "awaiting_input"]:
|
||||||
|
self.log_test(
|
||||||
|
"Simple Task",
|
||||||
|
True,
|
||||||
|
f"Status: {status}, Task ID: {result.get('task_id')}"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.log_test(
|
||||||
|
"Simple Task",
|
||||||
|
False,
|
||||||
|
f"Unexpected status: {status}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.log_test("Simple Task", False, f"Request error: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_tool_task(self) -> bool:
|
||||||
|
"""Тест задачи с инструментом"""
|
||||||
|
try:
|
||||||
|
if self.controller is None:
|
||||||
|
self.log_test("Tool Task", False, "Controller not initialized")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Тест простой команды shell через контекст
|
||||||
|
task = UserTask(
|
||||||
|
input="Выполни простую команду",
|
||||||
|
context={
|
||||||
|
"requested_tool": "shell_exec",
|
||||||
|
"tool_args": {"command": "echo 'hello from test'"}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
result = self.controller.handle_task(task)
|
||||||
|
|
||||||
|
status = result.get("status")
|
||||||
|
if status == "completed":
|
||||||
|
output = result.get("result", {}).get("output", "")
|
||||||
|
if "hello from test" in output:
|
||||||
|
self.log_test(
|
||||||
|
"Tool Task",
|
||||||
|
True,
|
||||||
|
f"Command executed successfully: {output.strip()}"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.log_test(
|
||||||
|
"Tool Task",
|
||||||
|
False,
|
||||||
|
f"Unexpected output: {output}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
elif status == "awaiting_permission":
|
||||||
|
self.log_test(
|
||||||
|
"Tool Task",
|
||||||
|
True,
|
||||||
|
"Permission required (expected for some commands)"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.log_test(
|
||||||
|
"Tool Task",
|
||||||
|
False,
|
||||||
|
f"Unexpected status: {status}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.log_test("Tool Task", False, f"Request error: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_memory_tools(self) -> bool:
|
||||||
|
"""Тест инструментов памяти"""
|
||||||
|
try:
|
||||||
|
if self.controller is None:
|
||||||
|
self.log_test("Memory Tools", False, "Controller not initialized")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Тест вставки в память
|
||||||
|
task_insert = UserTask(
|
||||||
|
input="Запомни эту информацию: тестовое значение 123",
|
||||||
|
context={
|
||||||
|
"requested_tool": "memory",
|
||||||
|
"tool_args": {
|
||||||
|
"operation": "insert",
|
||||||
|
"text": "тестовое значение 123",
|
||||||
|
"kind": "fact",
|
||||||
|
"weight": 0.8
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
result_insert = self.controller.handle_task(task_insert)
|
||||||
|
|
||||||
|
if result_insert.get("status") != "completed":
|
||||||
|
self.log_test(
|
||||||
|
"Memory Tools Insert",
|
||||||
|
False,
|
||||||
|
f"Insert failed: {result_insert.get('status')}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Тест поиска в памяти
|
||||||
|
task_search = UserTask(
|
||||||
|
input="Найди запомненную информацию",
|
||||||
|
context={
|
||||||
|
"requested_tool": "memory",
|
||||||
|
"tool_args": {
|
||||||
|
"operation": "search",
|
||||||
|
"query": "тестовое значение",
|
||||||
|
"limit": 5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
result_search = self.controller.handle_task(task_search)
|
||||||
|
|
||||||
|
if result_search.get("status") == "completed":
|
||||||
|
output = result_search.get("result", {}).get("output", "")
|
||||||
|
self.log_test(
|
||||||
|
"Memory Tools",
|
||||||
|
True,
|
||||||
|
f"Memory search successful: {output[:100]}..."
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.log_test(
|
||||||
|
"Memory Tools Search",
|
||||||
|
False,
|
||||||
|
f"Search failed: {result_search.get('status')}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.log_test("Memory Tools", False, f"Request error: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_file_operations(self) -> bool:
|
||||||
|
"""Тест операций с файлами"""
|
||||||
|
try:
|
||||||
|
if self.controller is None:
|
||||||
|
self.log_test("File Operations", False, "Controller not initialized")
|
||||||
|
return False
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Создаем временный файл для теста
|
||||||
|
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
|
||||||
|
temp_path = f.name
|
||||||
|
f.write("initial content for testing")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Тест чтения файла
|
||||||
|
task_read = UserTask(
|
||||||
|
input="Прочитай файл",
|
||||||
|
context={
|
||||||
|
"requested_tool": "file_read",
|
||||||
|
"tool_args": {"path": temp_path}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
result_read = self.controller.handle_task(task_read)
|
||||||
|
|
||||||
|
if result_read.get("status") != "completed":
|
||||||
|
self.log_test(
|
||||||
|
"File Read",
|
||||||
|
False,
|
||||||
|
f"Read failed: {result_read.get('status')}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Тест записи файла
|
||||||
|
new_content = "updated content from test"
|
||||||
|
task_write = UserTask(
|
||||||
|
input="Запиши в файл",
|
||||||
|
context={
|
||||||
|
"requested_tool": "file_write",
|
||||||
|
"tool_args": {
|
||||||
|
"path": temp_path,
|
||||||
|
"content": new_content
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
result_write = self.controller.handle_task(task_write)
|
||||||
|
|
||||||
|
if result_write.get("status") != "completed":
|
||||||
|
self.log_test(
|
||||||
|
"File Write",
|
||||||
|
False,
|
||||||
|
f"Write failed: {result_write.get('status')}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Проверяем что файл действительно обновился
|
||||||
|
with open(temp_path, 'r') as f:
|
||||||
|
actual_content = f.read()
|
||||||
|
|
||||||
|
if actual_content == new_content:
|
||||||
|
self.log_test(
|
||||||
|
"File Operations",
|
||||||
|
True,
|
||||||
|
f"File read/write successful: {actual_content}"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.log_test(
|
||||||
|
"File Operations",
|
||||||
|
False,
|
||||||
|
f"File content mismatch. Expected: {new_content}, Got: {actual_content}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Очищаем временный файл
|
||||||
|
if os.path.exists(temp_path):
|
||||||
|
os.unlink(temp_path)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.log_test("File Operations", False, f"Request error: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def run_all_tests(self) -> Dict[str, Any]:
|
||||||
|
"""Запустить все тесты"""
|
||||||
|
print("Starting direct ducklm tests...")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
if not self.setup():
|
||||||
|
print("Failed to setup controller")
|
||||||
|
return {"error": "Setup failed"}
|
||||||
|
|
||||||
|
tests = [
|
||||||
|
self.test_health,
|
||||||
|
self.test_simple_task,
|
||||||
|
self.test_tool_task,
|
||||||
|
self.test_memory_tools,
|
||||||
|
self.test_file_operations,
|
||||||
|
]
|
||||||
|
|
||||||
|
passed = 0
|
||||||
|
total = len(tests)
|
||||||
|
|
||||||
|
for test in tests:
|
||||||
|
if test():
|
||||||
|
passed += 1
|
||||||
|
time.sleep(0.5) # Небольшая пауза между тестами
|
||||||
|
|
||||||
|
print("=" * 50)
|
||||||
|
print(f"Tests completed: {passed}/{total} passed")
|
||||||
|
|
||||||
|
# Сводка результатов
|
||||||
|
summary = {
|
||||||
|
"total_tests": total,
|
||||||
|
"passed_tests": passed,
|
||||||
|
"failed_tests": total - passed,
|
||||||
|
"success_rate": passed / total if total > 0 else 0,
|
||||||
|
"test_results": self.test_results
|
||||||
|
}
|
||||||
|
|
||||||
|
return summary
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Основная функция"""
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Тест ducklm системы (прямой доступ)")
|
||||||
|
parser.add_argument("--basedir", default=".", help="Base directory for ducklm")
|
||||||
|
parser.add_argument("--test", choices=["health", "simple", "tool", "memory", "file", "all"],
|
||||||
|
default="all", help="Specific test to run")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
tester = DuckLMDirectTester(args.basedir)
|
||||||
|
|
||||||
|
if args.test == "all":
|
||||||
|
results = tester.run_all_tests()
|
||||||
|
print("\nFINAL RESULTS:")
|
||||||
|
print(f"Passed: {results['passed_tests']}/{results['total_tests']}")
|
||||||
|
print(f"Success Rate: {results['success_rate']*100:.1f}%")
|
||||||
|
|
||||||
|
# Возвращаем код выхода basado на результатах
|
||||||
|
sys.exit(0 if results['failed_tests'] == 0 else 1)
|
||||||
|
else:
|
||||||
|
# Запуск конкретного теста
|
||||||
|
if not tester.setup():
|
||||||
|
print("Failed to setup controller")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
test_map = {
|
||||||
|
"health": tester.test_health,
|
||||||
|
"simple": tester.test_simple_task,
|
||||||
|
"tool": tester.test_tool_task,
|
||||||
|
"memory": tester.test_memory_tools,
|
||||||
|
"file": tester.test_file_operations,
|
||||||
|
}
|
||||||
|
|
||||||
|
test_func = test_map[args.test]
|
||||||
|
if test_func():
|
||||||
|
print(f"Test {args.test}: PASSED")
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
print(f"Test {args.test}: FAILED")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
@ -1,5 +1,9 @@
|
||||||
from app.api.server import chat, critic_feedback, health, list_events, resolve_permission, resolve_secret
|
import asyncio
|
||||||
from app.core.permission_resolution import PermissionResolutionRequest, SecretResolutionRequest
|
import time
|
||||||
|
|
||||||
|
import app.api.server as server
|
||||||
|
from app.api.server import chat, critic_feedback, health, list_events, resolve_permission, resolve_review, resolve_secret
|
||||||
|
from app.core.permission_resolution import PermissionResolutionRequest, ReviewResolutionRequest, SecretResolutionRequest
|
||||||
from app.api.server import CriticFeedbackRequest
|
from app.api.server import CriticFeedbackRequest
|
||||||
from app.core.contracts import UserTask
|
from app.core.contracts import UserTask
|
||||||
|
|
||||||
|
|
@ -16,8 +20,52 @@ def test_events_handler_returns_event_list() -> None:
|
||||||
|
|
||||||
def test_chat_handler_returns_runtime_events() -> None:
|
def test_chat_handler_returns_runtime_events() -> None:
|
||||||
body = chat(UserTask(input="hello from handler test"))
|
body = chat(UserTask(input="hello from handler test"))
|
||||||
assert body["status"] == "completed"
|
assert body["status"] in {"accepted", "completed"}
|
||||||
assert body["events"][0]["type"] == "task_received"
|
if body["status"] == "completed":
|
||||||
|
assert body["events"][0]["type"] == "task_received"
|
||||||
|
|
||||||
|
|
||||||
|
def test_chat_handler_submits_task_without_waiting_for_completion(monkeypatch) -> None:
|
||||||
|
class SlowRuntime:
|
||||||
|
def submit_task(self, task):
|
||||||
|
return {"task_id": task.task_id, "status": "accepted"}
|
||||||
|
|
||||||
|
def handle_task(self, task):
|
||||||
|
time.sleep(0.25)
|
||||||
|
return {"task_id": task.task_id, "status": "completed", "events": []}
|
||||||
|
|
||||||
|
monkeypatch.setattr("app.api.server.runtime", SlowRuntime())
|
||||||
|
started = time.monotonic()
|
||||||
|
body = chat(UserTask(input="long task"))
|
||||||
|
|
||||||
|
assert time.monotonic() - started < 0.1
|
||||||
|
assert body["status"] == "accepted"
|
||||||
|
|
||||||
|
|
||||||
|
def test_lifespan_loads_models_without_threadpool_executor(monkeypatch) -> None:
|
||||||
|
class FakeRuntime:
|
||||||
|
_memory_interface = None
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.loaded = False
|
||||||
|
|
||||||
|
def load_models_at_startup(self) -> None:
|
||||||
|
self.loaded = True
|
||||||
|
|
||||||
|
class FailingLoop:
|
||||||
|
def run_in_executor(self, *args, **kwargs):
|
||||||
|
raise AssertionError("lifespan must not load llama models via run_in_executor")
|
||||||
|
|
||||||
|
fake_runtime = FakeRuntime()
|
||||||
|
monkeypatch.setattr(server, "runtime", fake_runtime)
|
||||||
|
monkeypatch.setattr(server.asyncio, "get_event_loop", lambda: FailingLoop())
|
||||||
|
|
||||||
|
async def run_lifespan() -> None:
|
||||||
|
async with server.lifespan(None):
|
||||||
|
pass
|
||||||
|
|
||||||
|
asyncio.run(run_lifespan())
|
||||||
|
assert fake_runtime.loaded is True
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_permission_handler_allows_completion() -> None:
|
def test_resolve_permission_handler_allows_completion() -> None:
|
||||||
|
|
@ -34,6 +82,29 @@ def test_resolve_secret_handler_requires_pending_request() -> None:
|
||||||
assert body["status"] == "failed"
|
assert body["status"] == "failed"
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_review_handler_submits_review_resolution(monkeypatch) -> None:
|
||||||
|
class ReviewRuntime:
|
||||||
|
def submit_review_resolution(self, task_id, decision, correction=None):
|
||||||
|
return {
|
||||||
|
"task_id": task_id,
|
||||||
|
"status": "accepted",
|
||||||
|
"decision": decision,
|
||||||
|
"correction": correction,
|
||||||
|
}
|
||||||
|
|
||||||
|
monkeypatch.setattr("app.api.server.runtime", ReviewRuntime())
|
||||||
|
body = resolve_review(
|
||||||
|
ReviewResolutionRequest(
|
||||||
|
task_id="task-1",
|
||||||
|
decision="wrong_action",
|
||||||
|
correction="replan",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert body["status"] == "accepted"
|
||||||
|
assert body["decision"] == "wrong_action"
|
||||||
|
|
||||||
|
|
||||||
def test_structured_feedback_can_be_accepted_without_memory_write() -> None:
|
def test_structured_feedback_can_be_accepted_without_memory_write() -> None:
|
||||||
initial = chat(UserTask(input="feedback target"))
|
initial = chat(UserTask(input="feedback target"))
|
||||||
body = critic_feedback(
|
body = critic_feedback(
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
from app.core.command_analyzer import CommandAnalyzer
|
||||||
|
from app.core.permission_service import PermissionService
|
||||||
|
|
||||||
|
|
||||||
|
def _permission_service() -> PermissionService:
|
||||||
|
return PermissionService(
|
||||||
|
config={
|
||||||
|
"settings": {},
|
||||||
|
"command_categories": {
|
||||||
|
"no_always": {
|
||||||
|
"allow_once": True,
|
||||||
|
"allow_always": False,
|
||||||
|
"commands": ["apt", "apt-get", "dpkg", "systemctl"],
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"path_settings": {},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_detects_unelevated_root_required_segment_after_sudo_chain() -> None:
|
||||||
|
analyzer = CommandAnalyzer(_permission_service())
|
||||||
|
|
||||||
|
diagnosis = analyzer.analyze(
|
||||||
|
command="sudo apt update && apt upgrade -y",
|
||||||
|
task_id="task-1",
|
||||||
|
session_id="session-1",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert diagnosis["type"] == "privilege_scope_error"
|
||||||
|
assert diagnosis["root_required_segments"] == ["apt update", "apt upgrade -y"]
|
||||||
|
assert diagnosis["elevated_segments"] == ["apt update"]
|
||||||
|
assert diagnosis["unelevated_root_segments"] == ["apt upgrade -y"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_accepts_each_root_required_segment_when_each_is_elevated() -> None:
|
||||||
|
analyzer = CommandAnalyzer(_permission_service())
|
||||||
|
|
||||||
|
diagnosis = analyzer.analyze(
|
||||||
|
command="sudo apt update && sudo apt upgrade -y",
|
||||||
|
task_id="task-1",
|
||||||
|
session_id="session-1",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert diagnosis["type"] == "ok"
|
||||||
|
assert diagnosis["unelevated_root_segments"] == []
|
||||||
|
|
@ -14,12 +14,25 @@ def test_runtime_loop_emits_basic_events() -> None:
|
||||||
|
|
||||||
|
|
||||||
def test_runtime_loop_routes_natural_language_shell_request_to_permission_flow() -> None:
|
def test_runtime_loop_routes_natural_language_shell_request_to_permission_flow() -> None:
|
||||||
|
import os, shutil
|
||||||
|
# Clear permission cache to ensure clean state
|
||||||
|
cache_file = os.path.join(os.path.dirname(__file__), '..', 'data', 'runtime', 'allowed_commands.json')
|
||||||
|
if os.path.exists(cache_file):
|
||||||
|
os.remove(cache_file)
|
||||||
|
|
||||||
controller = RuntimeController()
|
controller = RuntimeController()
|
||||||
result = controller.handle_task(UserTask(input="запусти sudo apt update"))
|
result = controller.handle_task(UserTask(input="запусти sudo apt update"))
|
||||||
event_types = [event["type"] for event in result["events"]]
|
event_types = [event["type"] for event in result["events"]]
|
||||||
|
# sudo commands require both permission and password
|
||||||
|
# First step: permission request
|
||||||
assert result["status"] == "awaiting_permission"
|
assert result["status"] == "awaiting_permission"
|
||||||
assert result["directive"]["type"] == "tool"
|
assert result["directive"]["type"] == "tool"
|
||||||
assert result["directive"]["payload"]["tool"] == "shell_exec"
|
assert result["directive"]["payload"]["tool"] == "shell_exec"
|
||||||
assert "permission_requested" in event_types
|
assert "permission_requested" in event_types
|
||||||
assert "task_awaiting_permission" in event_types
|
assert "task_awaiting_permission" in event_types
|
||||||
assert result["result"]["error"] == "Permission required before execution."
|
assert result["result"]["error"] == "Permission required before execution."
|
||||||
|
|
||||||
|
# After granting permission, should request sudo password
|
||||||
|
resumed = controller.resolve_permission(task_id=result["task_id"], decision="allow_once")
|
||||||
|
assert resumed["status"] == "awaiting_input"
|
||||||
|
assert resumed["result"]["secret_request"]["kind"] == "sudo_password"
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,11 @@ import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from app.core.contracts import ExecutionDirective, UserTask
|
from app.core.contracts import ExecutionDirective, UserTask
|
||||||
|
from app.core.contracts import PermissionDecision
|
||||||
|
from app.core.contracts import ToolResult
|
||||||
|
from app.events.event_types import TOOL_OUTPUT_CHUNK
|
||||||
from app.runtime.runtime_controller import RuntimeController
|
from app.runtime.runtime_controller import RuntimeController
|
||||||
|
from app.tools.sandbox import ToolSandbox
|
||||||
|
|
||||||
|
|
||||||
def _write_config_tree(base_dir: Path) -> None:
|
def _write_config_tree(base_dir: Path) -> None:
|
||||||
|
|
@ -27,9 +31,38 @@ def _write_config_tree(base_dir: Path) -> None:
|
||||||
"critic_prompt": "",
|
"critic_prompt": "",
|
||||||
},
|
},
|
||||||
"permissions.json": {
|
"permissions.json": {
|
||||||
"dangerous_commands": {"rm": "ask_always", "sudo": "ask_always"},
|
"settings": {
|
||||||
"sensitive_paths": ["/etc", "/usr", "/var"],
|
"allow_caching": True,
|
||||||
"default_approval_behavior": "ask_always",
|
"cache_file": str(base_dir / "data/runtime/allowed_commands.json"),
|
||||||
|
"normalize_commands": True,
|
||||||
|
"split_chained": True
|
||||||
|
},
|
||||||
|
"command_categories": {
|
||||||
|
"hard_stop": {
|
||||||
|
"commands": ["rm -rf /", "rm -rf /*", "dd if=/dev/zero of=/dev/sd*"]
|
||||||
|
},
|
||||||
|
"no_always": {
|
||||||
|
"allow_once": True,
|
||||||
|
"allow_always": False,
|
||||||
|
"commands": [
|
||||||
|
"rm -rf *", "rm -rf .*", "shutdown", "reboot", "halt",
|
||||||
|
"apt", "apt-get", "dpkg", "yum", "dnf", "pacman",
|
||||||
|
"systemctl stop", "systemctl start", "systemctl restart",
|
||||||
|
"service stop", "service start", "killall", "pkill -9"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"normal": {
|
||||||
|
"allow_once": True,
|
||||||
|
"allow_always": True,
|
||||||
|
"commands": ["shell_exec", "file_write"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"path_settings": {
|
||||||
|
"allow_read_outside": True,
|
||||||
|
"allow_write_paths": [str(base_dir), "/tmp"],
|
||||||
|
"require_confirmation_for_write": True,
|
||||||
|
"require_confirmation_for_shell": True
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"runtime.json": {
|
"runtime.json": {
|
||||||
"step_timeout_ms": 5000,
|
"step_timeout_ms": 5000,
|
||||||
|
|
@ -92,6 +125,8 @@ def test_shell_exec_requires_permission_for_dangerous_command(tmp_path: Path) ->
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
# rm -rf /tmp/nonexistent is not hard_stop (only exact "rm -rf /" is)
|
||||||
|
# but it matches "rm -rf *" in no_always category
|
||||||
assert result["status"] == "awaiting_permission"
|
assert result["status"] == "awaiting_permission"
|
||||||
assert "permission_request" in result["result"]
|
assert "permission_request" in result["result"]
|
||||||
|
|
||||||
|
|
@ -108,8 +143,87 @@ def test_shell_exec_allows_safe_command(tmp_path: Path) -> None:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
# Even safe commands require permission in the new permission model
|
||||||
|
assert result["status"] == "awaiting_permission"
|
||||||
|
assert "permission_request" in result["result"]
|
||||||
|
# Grant permission and verify execution
|
||||||
|
resumed = controller.resolve_permission(task_id=result["task_id"], decision="allow_once")
|
||||||
|
assert resumed["status"] == "completed"
|
||||||
|
assert str(tmp_path) in resumed["result"]["output"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_shell_exec_publishes_output_chunks_before_completion(tmp_path: Path) -> None:
|
||||||
|
_write_config_tree(tmp_path)
|
||||||
|
controller = RuntimeController(base_dir=tmp_path)
|
||||||
|
perm_override = PermissionDecision(
|
||||||
|
action_type="shell_command",
|
||||||
|
pattern="printf",
|
||||||
|
decision="allow_always",
|
||||||
|
)
|
||||||
|
|
||||||
|
task = UserTask(
|
||||||
|
input="stream shell output",
|
||||||
|
context={
|
||||||
|
"requested_tool": "shell_exec",
|
||||||
|
"tool_args": {"command": "printf 'first\\n'; sleep 0.1; printf 'second\\n'"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
result = controller.execution_engine.execute(
|
||||||
|
task,
|
||||||
|
ExecutionDirective(
|
||||||
|
type="tool",
|
||||||
|
payload={
|
||||||
|
"tool": "shell_exec",
|
||||||
|
"args": {"command": "printf 'first\\n'; sleep 0.1; printf 'second\\n'"},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
permission_override=perm_override,
|
||||||
|
)
|
||||||
|
|
||||||
|
events = controller.event_bus.list_for_task(task.task_id)
|
||||||
|
chunk_events = [event for event in events if event.type == TOOL_OUTPUT_CHUNK]
|
||||||
|
completed_index = next(index for index, event in enumerate(events) if event.type == "tool_completed")
|
||||||
|
first_chunk_index = next(index for index, event in enumerate(events) if event.type == TOOL_OUTPUT_CHUNK)
|
||||||
assert result["status"] == "completed"
|
assert result["status"] == "completed"
|
||||||
assert str(tmp_path) in result["result"]["output"]
|
assert [event.payload["chunk"] for event in chunk_events] == ["first\n", "second\n"]
|
||||||
|
assert first_chunk_index < completed_index
|
||||||
|
|
||||||
|
|
||||||
|
def test_streaming_shell_uses_idle_timeout_not_step_timeout(tmp_path: Path) -> None:
|
||||||
|
sandbox = ToolSandbox(
|
||||||
|
allowed_root=tmp_path,
|
||||||
|
timeout_ms=100,
|
||||||
|
command_timeout_ms=2000,
|
||||||
|
idle_timeout_ms=500,
|
||||||
|
)
|
||||||
|
chunks: list[str] = []
|
||||||
|
|
||||||
|
result = sandbox.run_shell(
|
||||||
|
command="printf 'first\\n'; sleep 0.2; printf 'second\\n'",
|
||||||
|
output_callback=lambda _stream, chunk: chunks.append(chunk),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result.returncode == 0
|
||||||
|
assert result.stdout == "first\nsecond\n"
|
||||||
|
assert chunks == ["first\n", "second\n"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_streaming_shell_timeout_kills_child_process_group(tmp_path: Path) -> None:
|
||||||
|
marker = tmp_path / "child-survived"
|
||||||
|
sandbox = ToolSandbox(
|
||||||
|
allowed_root=tmp_path,
|
||||||
|
timeout_ms=100,
|
||||||
|
command_timeout_ms=100,
|
||||||
|
idle_timeout_ms=1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = sandbox.run_shell(
|
||||||
|
command=f"sh -c 'sleep 1; touch {marker}'",
|
||||||
|
output_callback=lambda _stream, _chunk: None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result.returncode == -9
|
||||||
|
assert not marker.exists()
|
||||||
|
|
||||||
|
|
||||||
class _RecoveryCritic:
|
class _RecoveryCritic:
|
||||||
|
|
@ -122,6 +236,13 @@ def test_failed_shell_step_can_recover_and_continue(tmp_path: Path) -> None:
|
||||||
controller = RuntimeController(base_dir=tmp_path)
|
controller = RuntimeController(base_dir=tmp_path)
|
||||||
controller.execution_engine.set_critic(_RecoveryCritic())
|
controller.execution_engine.set_critic(_RecoveryCritic())
|
||||||
controller.execution_engine._recovery_limit = 1
|
controller.execution_engine._recovery_limit = 1
|
||||||
|
# Bypass permission check for this test — we're testing recovery, not permissions
|
||||||
|
from app.core.contracts import PermissionDecision
|
||||||
|
perm_override = PermissionDecision(
|
||||||
|
action_type="shell_command",
|
||||||
|
pattern="grep",
|
||||||
|
decision="allow_always",
|
||||||
|
)
|
||||||
result = controller.execution_engine.execute(
|
result = controller.execution_engine.execute(
|
||||||
UserTask(
|
UserTask(
|
||||||
input="run grep with no matches and recover",
|
input="run grep with no matches and recover",
|
||||||
|
|
@ -139,12 +260,177 @@ def test_failed_shell_step_can_recover_and_continue(tmp_path: Path) -> None:
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|
permission_override=perm_override,
|
||||||
)
|
)
|
||||||
assert result["status"] == "completed"
|
assert result["status"] == "completed"
|
||||||
failed_result = result["result"]["step_results"][0]["result"]["result"]
|
failed_result = result["result"]["step_results"][0]["result"]["result"]
|
||||||
assert failed_result["metadata"]["exit_code"] == 1
|
assert failed_result["metadata"]["exit_code"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_privilege_scope_failure_awaits_user_review_before_replan(tmp_path: Path) -> None:
|
||||||
|
_write_config_tree(tmp_path)
|
||||||
|
controller = RuntimeController(base_dir=tmp_path)
|
||||||
|
task = UserTask(
|
||||||
|
input="обнови систему",
|
||||||
|
context={
|
||||||
|
"requested_tool": "shell_exec",
|
||||||
|
"tool_args": {"command": "sudo apt update && apt upgrade -y"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
class FailingShellTool:
|
||||||
|
def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
|
||||||
|
return ToolResult(
|
||||||
|
tool="shell_exec",
|
||||||
|
ok=False,
|
||||||
|
output="Error: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?",
|
||||||
|
error="Command failed with exit code 100",
|
||||||
|
metadata={"exit_code": 100},
|
||||||
|
)
|
||||||
|
|
||||||
|
controller.tool_registry._tools["shell_exec"] = FailingShellTool()
|
||||||
|
|
||||||
|
initial = controller.handle_task(task)
|
||||||
|
assert initial["status"] == "awaiting_permission"
|
||||||
|
controller.resolve_permission(task_id=task.task_id, decision="allow_once")
|
||||||
|
result = controller.resolve_secret(task_id=task.task_id, secret="secret")
|
||||||
|
|
||||||
|
assert result["status"] == "awaiting_review"
|
||||||
|
assert result["result"]["review"]["diagnosis"]["type"] == "privilege_scope_error"
|
||||||
|
assert result["result"]["review"]["critic_assessment"]["classification"] == "model_planning_error"
|
||||||
|
|
||||||
|
|
||||||
|
def test_plan_pauses_on_privilege_scope_review_instead_of_completing(tmp_path: Path) -> None:
|
||||||
|
_write_config_tree(tmp_path)
|
||||||
|
controller = RuntimeController(base_dir=tmp_path)
|
||||||
|
|
||||||
|
class FailingShellTool:
|
||||||
|
def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
|
||||||
|
return ToolResult(
|
||||||
|
tool="shell_exec",
|
||||||
|
ok=False,
|
||||||
|
output="Error: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?",
|
||||||
|
error="Command failed with exit code 100",
|
||||||
|
metadata={"exit_code": 100},
|
||||||
|
)
|
||||||
|
|
||||||
|
controller.tool_registry._tools["shell_exec"] = FailingShellTool()
|
||||||
|
result = controller.execution_engine.execute(
|
||||||
|
UserTask(input="обнови систему"),
|
||||||
|
ExecutionDirective(
|
||||||
|
type="plan",
|
||||||
|
payload={
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"id": "1",
|
||||||
|
"tool": "shell_exec",
|
||||||
|
"args": {"command": "sudo apt update && apt upgrade -y"},
|
||||||
|
"depends_on": [],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
),
|
||||||
|
permission_override=PermissionDecision(
|
||||||
|
action_type="shell_command",
|
||||||
|
pattern="apt",
|
||||||
|
decision="allow_once",
|
||||||
|
),
|
||||||
|
secret_override="secret",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["status"] == "awaiting_review"
|
||||||
|
assert result["result"]["review"]["diagnosis"]["type"] == "privilege_scope_error"
|
||||||
|
|
||||||
|
|
||||||
|
def test_sudo_auth_failure_requests_secret_retry_not_review(tmp_path: Path) -> None:
|
||||||
|
_write_config_tree(tmp_path)
|
||||||
|
controller = RuntimeController(base_dir=tmp_path)
|
||||||
|
|
||||||
|
class BadPasswordShellTool:
|
||||||
|
def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
|
||||||
|
return ToolResult(
|
||||||
|
tool="shell_exec",
|
||||||
|
ok=False,
|
||||||
|
output="Sorry, try again.\nsudo: no password was provided\nsudo: 1 incorrect password attempt\n",
|
||||||
|
error="Command failed with exit code 1",
|
||||||
|
metadata={"exit_code": 1, "sudo_auth_failed": True},
|
||||||
|
)
|
||||||
|
|
||||||
|
controller.tool_registry._tools["shell_exec"] = BadPasswordShellTool()
|
||||||
|
result = controller.execution_engine.execute(
|
||||||
|
UserTask(input="обнови систему"),
|
||||||
|
ExecutionDirective(
|
||||||
|
type="plan",
|
||||||
|
payload={
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"id": "1",
|
||||||
|
"tool": "shell_exec",
|
||||||
|
"args": {"command": "sudo apt update && apt upgrade -y"},
|
||||||
|
"depends_on": [],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
),
|
||||||
|
permission_override=PermissionDecision(
|
||||||
|
action_type="shell_command",
|
||||||
|
pattern="apt",
|
||||||
|
decision="allow_once",
|
||||||
|
),
|
||||||
|
secret_override="wrong",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["status"] == "awaiting_input"
|
||||||
|
assert result["result"]["secret_request"]["kind"] == "sudo_password"
|
||||||
|
assert result["result"]["secret_request"]["prompt"] == "Sudo password incorrect. Try again"
|
||||||
|
assert result["result"]["attempt_failed"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_runtime_keeps_secret_state_after_bad_sudo_password(tmp_path: Path) -> None:
|
||||||
|
_write_config_tree(tmp_path)
|
||||||
|
controller = RuntimeController(base_dir=tmp_path)
|
||||||
|
|
||||||
|
class RetryPasswordShellTool:
|
||||||
|
calls = 0
|
||||||
|
|
||||||
|
def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
|
||||||
|
self.calls += 1
|
||||||
|
if self.calls == 1:
|
||||||
|
return ToolResult(
|
||||||
|
tool="shell_exec",
|
||||||
|
ok=False,
|
||||||
|
output="Sorry, try again.\nsudo: no password was provided\nsudo: 1 incorrect password attempt\n",
|
||||||
|
error="Command failed with exit code 1",
|
||||||
|
metadata={"exit_code": 1, "sudo_auth_failed": True},
|
||||||
|
)
|
||||||
|
return ToolResult(
|
||||||
|
tool="shell_exec",
|
||||||
|
ok=True,
|
||||||
|
output="root\n",
|
||||||
|
metadata={"exit_code": 0},
|
||||||
|
)
|
||||||
|
|
||||||
|
controller.tool_registry._tools["shell_exec"] = RetryPasswordShellTool()
|
||||||
|
task = UserTask(
|
||||||
|
input="кто root",
|
||||||
|
context={
|
||||||
|
"requested_tool": "shell_exec",
|
||||||
|
"tool_args": {"command": "sudo whoami"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
initial = controller.handle_task(task)
|
||||||
|
assert initial["status"] == "awaiting_permission"
|
||||||
|
allowed = controller.resolve_permission(task_id=task.task_id, decision="allow_once")
|
||||||
|
assert allowed["status"] == "awaiting_input"
|
||||||
|
|
||||||
|
retry = controller.resolve_secret(task_id=task.task_id, secret="wrong")
|
||||||
|
assert retry["status"] == "awaiting_input"
|
||||||
|
assert retry["result"]["attempt_failed"] is True
|
||||||
|
|
||||||
|
final = controller.resolve_secret(task_id=task.task_id, secret="correct")
|
||||||
|
assert final["status"] == "completed"
|
||||||
|
assert final["result"]["output"] == "root\n"
|
||||||
|
|
||||||
|
|
||||||
def test_permission_resolution_can_resume_task(tmp_path: Path) -> None:
|
def test_permission_resolution_can_resume_task(tmp_path: Path) -> None:
|
||||||
_write_config_tree(tmp_path)
|
_write_config_tree(tmp_path)
|
||||||
controller = RuntimeController(base_dir=tmp_path)
|
controller = RuntimeController(base_dir=tmp_path)
|
||||||
|
|
@ -169,12 +455,35 @@ def test_sudo_permission_resolution_requests_secret_input(tmp_path: Path) -> Non
|
||||||
assert resumed["result"]["secret_request"]["kind"] == "sudo_password"
|
assert resumed["result"]["secret_request"]["kind"] == "sudo_password"
|
||||||
|
|
||||||
|
|
||||||
|
def test_implicit_sudo_command_requests_password(tmp_path: Path) -> None:
|
||||||
|
"""Commands like 'apt list --upgradable' that require sudo but don't start with 'sudo'
|
||||||
|
should also trigger password request after permission is granted."""
|
||||||
|
_write_config_tree(tmp_path)
|
||||||
|
controller = RuntimeController(base_dir=tmp_path)
|
||||||
|
# apt list --upgradable requires root but doesn't start with 'sudo'
|
||||||
|
initial = controller.handle_task(
|
||||||
|
UserTask(
|
||||||
|
input="проверь обновления",
|
||||||
|
context={
|
||||||
|
"requested_tool": "shell_exec",
|
||||||
|
"tool_args": {"command": "apt list --upgradable"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert initial["status"] == "awaiting_permission"
|
||||||
|
# Grant permission — should request sudo password since apt requires root
|
||||||
|
resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once")
|
||||||
|
assert resumed["status"] == "awaiting_input"
|
||||||
|
assert resumed["result"]["secret_request"]["kind"] == "sudo_password"
|
||||||
|
|
||||||
|
|
||||||
def test_secret_resolution_continues_after_pending_secret_saved(tmp_path: Path) -> None:
|
def test_secret_resolution_continues_after_pending_secret_saved(tmp_path: Path) -> None:
|
||||||
_write_config_tree(tmp_path)
|
_write_config_tree(tmp_path)
|
||||||
controller = RuntimeController(base_dir=tmp_path)
|
controller = RuntimeController(base_dir=tmp_path)
|
||||||
initial = controller.handle_task(UserTask(input="запусти sudo apt update"))
|
initial = controller.handle_task(UserTask(input="запусти sudo apt update"))
|
||||||
|
assert initial["status"] == "awaiting_permission"
|
||||||
resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once")
|
resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once")
|
||||||
assert resumed["status"] == "awaiting_input"
|
assert resumed["status"] == "awaiting_input"
|
||||||
final = controller.resolve_secret(task_id=initial["task_id"], secret="wrongpass")
|
final = controller.resolve_secret(task_id=initial["task_id"], secret="wrongpass")
|
||||||
assert final["status"] in {"completed", "failed"}
|
assert final["status"] in {"completed", "failed", "awaiting_input"}
|
||||||
assert "error" in final["result"] or "output" in final["result"]
|
assert "error" in final["result"] or "output" in final["result"]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue