This commit is contained in:
mirivlad 2026-05-17 23:09:56 +08:00
parent 1b4f4c836e
commit ddc285b8f4
36 changed files with 4552 additions and 872 deletions

252
CURRENT_STATE.md Normal file
View File

@ -0,0 +1,252 @@
# DuckLM — Текущее состояние проекта
## 1. Что это
DuckLM — локальный event-driven multi-model AI agent runtime. Система принимает пользовательскую задачу, извлекает релевантную память, собирает контекст, принимает orchestration-решение, при необходимости строит план, исполняет шаги через tools и coder, оценивает результаты через critic, сохраняет полезное в долговременную память, публикует события и поддерживает streaming клиенту.
**Ключевой принцип:** центр системы — `RuntimeLoop`. Все execution transitions проходят через него. Router, Orchestrator, ExecutionEngine — decision-producing компоненты, которые только возвращают структурированные объекты (ExecutionDirective), но не исполняют действия напрямую.
## 2. Архитектура
```
Client / CLI / API
RuntimeLoop (runtime_loop.py)
├── State Store / Checkpoints (SQLite)
├── ContextBuilder
├── AsyncRouter (Thinker → JSON Compiler)
├── ExecutionEngine / ExecutionScheduler
│ ├── ToolRegistry / ToolSandbox
│ ├── CoderAdapter
│ └── CriticAdapter
├── PermissionService
├── MemoryRecallService
├── MemoryWritePolicy
├── MemoryInterface (SQLite + hnswlib)
└── EventBus → SQLiteEventStore
StreamingManager → WebSocket
```
## 3. Структура проекта
```
ducklm/
main.py # Точка входа (импорт app.api.server.app)
app/
api/
server.py # FastAPI: POST /chat, WS /stream, GET /health, etc.
static/index.html # Веб-чат (dark theme, Enter=отправить, Shift+Enter=новая строка)
cli/__init__.py # Пока пустой
core/
contracts.py # Pydantic модели: UserTask, PlanStep, ToolResult, CriticScore, ...
config.py # AppConfig, load_app_config()
async_router.py # AsyncRouter: Thinker + JSON Compiler pipeline
context_builder.py # ContextBuilder: сборка контекста с бюджетами
execution_engine.py # ExecutionEngine: исполнение plan/tool/respond/coder
execution_scheduler.py # ExecutionScheduler: парсинг плана, граф задач, цикл выполнения
intent_parser.py # IntentParser: извлечение tool intents из текста
permission_service.py # PermissionService: проверка и разрешений команд
permission_resolution.py # Pydantic модели для API разрешений
events/
event_bus.py # EventBus: per-task ordered publishing
event_store.py # SQLiteEventStore: append-only log
event_types.py # Константы типов событий
memory/
interface.py # MemoryInterface: insert/search/get/delete/reindex/cleanup
store.py # MemoryStore: SQLite хранение MemoryEntry + embeddings
vector_index.py # VectorIndex: hnswlib L2 index
recall.py # MemoryRecallService: LLM-based решение о необходимости recall
write_policy.py # MemoryWritePolicy: детерминированное решение о записи
models/
adapters.py # create_adapter/create_llama_adapter (llama-cpp-python)
async_adapters.py # AsyncOrchestratorAdapter, AsyncCoderAdapter, AsyncCriticAdapter
orchestrator.py # OrchestratorAdapter: обёртка над Llama
coder.py # CoderAdapter
critic.py # CriticAdapter
embeddings.py # EmbeddingsAdapter (sentence-transformers)
permissions/
approval_store.py # SQLiteApprovalStore
runtime/
runtime_loop.py # RuntimeLoop: центральный цикл (sync)
async_runtime_loop.py # AsyncRuntimeLoop: альтернативная async версия
runtime_controller.py # RuntimeController: composition root, инициализация всего
services/__init__.py # Пустой
state/
task_state_store.py # SQLiteTaskStateStore
checkpoint_store.py # SQLiteCheckpointStore
streaming/
manager.py # StreamingManager: подписка на события → WebSocket
tools/
base.py, registry.py, sandbox.py, discover.py
shell_exec.py, file_read.py, file_write.py, memory_tools.py
plugins/ # Plugin discovery: shell_exec, file_read, file_write, memory_tools
config/
models.json # Конфигурация моделей
runtime.json # Таймауты, retry limits, context budgets
permissions.json # Категории команд, пути
prompts/ # Markdown промпты для каждой роли
thinker.md, json_compiler.md, coder.md, critic.md, sys_util.md, orchestrator.md, planning.md, system.md
data/
events/events.sqlite3 # Event store
state/task_state.sqlite3 # Task state
state/checkpoints.sqlite3 # Checkpoints
permissions/approvals.sqlite3 # Permission cache
memory/memory.sqlite3 # Memory store
memory/index.bin # Vector index
models/ # GGUF модели и sentence-transformers
tests/
test_contracts.py # 6 тестов: контракты, router
test_runtime_loop.py # 2 теста: runtime loop events, permission flow
test_tools_flow.py # 7 тестов: file read/write, shell, recovery, permissions
test_api_handlers.py # 6 тестов: health, events, chat, permissions, feedback
```
## 4. Модели и их роли
| Роль | Модель | Backend | Конфиг |
|------|--------|---------|--------|
| Thinker (orchestrator) | Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf | vulkan (GPU) | max_tokens=2048, temp=0.3 |
| JSON Compiler | gemma-4-E4B-it-Q4_K_M.gguf | cpu | max_tokens=1024, temp=0.1 |
| Critic | gemma-4-E4B-it-Q4_K_M.gguf (shared с compiler) | cpu | max_tokens=1024, temp=0.1 |
| Coder | X-Coder-SFT-Qwen3-8B.Q6_K.gguf | cpu | max_tokens=2048, temp=0.2 |
| Sys Utility | Menlo_Lucy-Q4_K_M.gguf | cpu | max_tokens=1024, temp=0.1 |
| Embeddings | all-MiniLM-L6-v2 (sentence-transformers) | — | dim=384 |
**Важно:** Critic и JSON Compiler используют одну и ту же модель (gemma-4B), но разные экземпляры адаптеров. Модели не дублируются в памяти — используется кэширование через `_get_or_create_llm()` с ключом (path, backend, n_gpu_layers, n_ctx).
## 5. Конфигурация
Все настройки в `config/`:
- **models.json** — пути к GGUF файлам, backend, GPU layers, max_tokens, temperature
- **runtime.json** — таймауты (step=30s, task=5min), retry limits, context budgets, retrieval_top_k
- **permissions.json** — hard_stop команды (rm -rf /, dd, mkfs), no_always команды (shutdown, killall), normal команды
- **prompts/*.md** — системные промпты для каждой роли модели
## 6. API
FastAPI сервер на порту 8000 (`scripts/server.sh`):
| Метод | Путь | Описание |
|-------|------|----------|
| GET | `/` | Веб-чат (index.html) |
| GET | `/health` | Health check |
| GET | `/events` | Список последних событий |
| POST | `/chat` | Отправить задачу (UserTask) → получить результат |
| POST | `/permissions/resolve` | Разрешить/запретить команду |
| POST | `/secrets/resolve` | Передать sudo-пароль |
| POST | `/password/resolve` | Передать пароль (альтернативный путь) |
| POST | `/critic/feedback` | Обратная связь от пользователя |
| WS | `/stream/{task_id}` | Streaming событий по задаче |
## 7. Поток выполнения задачи
1. Клиент → POST /chat → `RuntimeController.handle_task()`
2. `RuntimeLoop.run_task()`:
- Проверка hard-stop команд через PermissionService
- Создание task state в SQLiteTaskStateStore
- Публикация TASK_RECEIVED
- Checkpoint: received
- ContextBuilder.build() — сборка контекста (memory, tools, budgets)
- MemoryRecallService.recall() — LLM решает, нужно ли искать в памяти
- AsyncRouter.decide() — Thinker → JSON Compiler → ExecutionDirective
- ExecutionEngine.execute() — исполнение directive:
- plan → парсинг шагов → граф → последовательное выполнение
- tool → проверка разрешений → ToolSandbox → ToolResult
- respond → прямой ответ
- coder → CoderAdapter
- Critic оценка каждого шага (correctness, usefulness, safety)
- Recovery при неудачных шагах (retry/continue/respond/fail)
- MemoryWritePolicy — решение о записи в долговременную память
- Checkpoint: final state
- Публикация TASK_COMPLETED / TASK_FAILED / TASK_AWAITING_PERMISSION
3. Результат возвращается клиенту + события доступны через WebSocket
## 8. Что реализовано и работает
### Core (полностью)
- [x] Модульная структура проекта (app/, config/, data/, tests/)
- [x] Typed contracts (Pydantic модели для всех сущностей)
- [x] RuntimeLoop — центральный цикл
- [x] RuntimeController — composition root
- [x] EventBus + SQLiteEventStore (append-only, per-task ordering)
- [x] TaskStateStore + CheckpointStore (SQLite)
- [x] ContextBuilder с token budgets
- [x] AsyncRouter: Thinker → JSON Compiler pipeline с retry и JSON fix
- [x] IntentParser: извлечение tool intents из естественного языка
- [x] ExecutionEngine: plan/tool/respond/coder/fail
- [x] ExecutionScheduler: парсинг плана, DAG граф, cycle detection
- [x] PermissionService: hard_stop/no_always/normal категории, кэш разрешений
- [x] ToolSandbox: timeout, cwd restrictions
- [x] ToolRegistry + Plugin Discovery
- [x] Tools: shell_exec, file_read, file_write, memory_insert/search/list
- [x] CriticAdapter с retry и recovery (continue/retry/respond/fail)
- [x] MemoryInterface: SQLite + hnswlib vector index
- [x] MemoryRecallService: LLM-based решение о необходимости recall
- [x] MemoryWritePolicy: детерминированное решение о записи
- [x] EmbeddingsAdapter (sentence-transformers)
- [x] FastAPI API: /chat, /health, /events, /permissions/resolve, /secrets/resolve, /critic/feedback
- [x] WebSocket streaming (/stream/{task_id})
- [x] Веб-чат (dark theme, Enter=отправить, Shift+Enter=новая строка, панель событий, permission controls, feedback dialog)
- [x] 21 тест (все проходят)
### Известные баги (исправлены)
- RECALL_PROMPT_TEMPLATE format string escaping — фигурные скобки в JSON примерах нужно двоить
- VectorIndex._get_memory_id возвращал неправильный ID (hash вместо хранения mapping)
- recall_model по умолчанию был sys_util, изменён на json_compiler
## 9. Что ещё нужно сделать
### Приоритет 1 — Доработка до полного MVP
- [ ] **Resume из checkpoint** — после падения/перезапуска восстанавливать задачу из последнего checkpoint
- [ ] **CLI интерфейс** — отправка задач, просмотр событий, поиск в памяти из терминала (app/cli/ пока пустой)
- [ ] **Structured logging** — вместо print() использовать logging с форматированием
- [ ] **WS /stream** — доработать (сейчас базово работает, но нет подписки на новые события в реальном времени при длительных задачах)
### Приоритет 2 — Улучшения
- [ ] **Retry/recovery policy** — более надёжная обработка ошибок tool execution
- [ ] **Replay из event store** — воспроизведение истории задачи для отладки
- [ ] **Параллельное выполнение шагов** — сейчас только sequential DAG, можно добавить parallel для независимых шагов
- [ ] **Веб-чат: отображение streaming ответа** — сейчас ответ приходит целиком, можно добавить потоковую передачу
- [ ] **Веб-чат: отображение tool output** — более красивый рендер результатов shell/file операций
- [ ] **Memory cleanup** — автоматическая очистка старых/низко-весовых записей (базовая логика есть в MemoryInterface.cleanup, но не вызывается автоматически)
### Приоритет 3 — Расширения
- [ ] **web_search / web_fetch tools** — второй приоритет по TASK_3.md
- [ ] **Telegram bot stub** — thin клиент для удалённого управления
- [ ] **Coder integration в план** — пока coder adapter есть, но не интегрирован в планирование как отдельный step kind
- [ ] **Модели: загрузка при старте** — load_models_at_startup() вызывается из lifespan, но если модели не загружены, runtime работает в fallback mode (respond only)
- [ ] **Документация API** — OpenAPI схема генерируется FastAPI, но можно добавить примеры
## 10. Запуск
```bash
cd ~/git/ducklm
./scripts/server.sh
# или
uvicorn main:app --host 0.0.0.0 --port 8000
```
Веб-чат: http://localhost:8000/
## 11. Тестирование
```bash
cd ~/git/ducklm
python -m pytest tests/ -v
```
21 тест, все проходят. Покрытие: контракты, runtime loop, tool flow, API handlers.
## 12. Технологии
- **Python 3.13**, FastAPI, uvicorn, websockets
- **llama-cpp-python** — локальный инференс GGUF моделей (Vulkan/CPU)
- **sentence-transformers** — эмбеддинги (all-MiniLM-L6-v2)
- **hnswlib** — векторный поиск (L2 метрика)
- **SQLite** — event store, task state, checkpoints, memory, permissions
- **Pydantic** — все контракты
- **pytest** — тестирование

341
EXPERIMENT.md Normal file
View File

@ -0,0 +1,341 @@
SAFETY SETUP — ОБЯЗАТЕЛЬНО ПЕРЕД ЭКСПЕРИМЕНТОМ
Перед любыми изменениями:
1. Проверь текущее состояние git:
git status --short
2. Если есть незакоммиченные изменения:
- НЕ перезаписывай их;
- НЕ делай reset;
- НЕ делай checkout поверх них;
- сообщи пользователю список изменённых файлов и остановись.
3. Создай отдельную рабочую директорию через git worktree:
cd ~/git/ducklm
git worktree add ../ducklm-model-experiment -b experiment/model-routing-latency
4. Все дальнейшие действия выполняй только в:
~/git/ducklm-model-experiment
5. Основную директорию проекта:
~/git/ducklm
не изменять.
6. Если проект использует локальные data/*.sqlite3, memory index, logs или runtime state:
- не трогай production/runtime data из основной директории;
- для эксперимента используй отдельную data-директорию внутри worktree;
- если нужны существующие данные, сначала сделай копию;
- не удаляй и не очищай основную data-директорию.
7. Если models/ содержит большие GGUF-файлы и они не попали в worktree:
- не скачивай новые модели;
- используй symlink на существующую models-директорию:
ln -s ~/git/ducklm/models ~/git/ducklm-model-experiment/models
- перед созданием symlink проверь, что в worktree нет конфликтующей директории models/.
8. Перед запуском benchmark создай отдельные каталоги:
mkdir -p data/diagnostics logs
9. Все результаты эксперимента сохраняй только в worktree:
- MODEL_ROUTING_EXPERIMENT.md
- logs/model_latency.jsonl
- data/diagnostics/model_latency.jsonl
- scripts/benchmark_model_profiles.py
10. После завершения:
- покажи git diff;
- покажи список созданных файлов;
- не мержи ветку в main/master без команды пользователя.
Ты работаешь с проектом DuckLM.
Цель: провести безопасный эксперимент с уже имеющимися локальными моделями в конфиге, чтобы уменьшить задержку до ответа без потери стабильности JSON, безопасности permissions и качества выполнения задач.
ВАЖНО:
- Не скачивай новые модели.
- Используй только модели, которые уже есть в config/models.json и в локальной папке models/.
- Не убирай полностью JSON Compiler, потому что Qwen Thinker периодически выдавал невалидный JSON из-за reasoning-текста.
- Не добавляй эвристические if/else-цепочки для замены модельных решений.
- Не вводи rule-based MemoryRecallService вместо модели.
- Не превращай архитектурные решения в набор ручных условий.
- Не ломай текущий baseline. Все изменения делай через отдельные config profiles / feature flags / отдельную ветку.
- Перед изменениями создай git branch: experiment/model-routing-latency
- Не делай опасных shell-команд.
- Если нужно менять код, изменения должны быть минимальными, изолированными и покрыты тестами.
Контекст:
В DuckLM сейчас есть роли:
- Thinker/orchestrator: Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf, vulkan/GPU
- JSON Compiler: gemma-4-E4B-it-Q4_K_M.gguf, CPU
- Critic: gemma-4-E4B-it-Q4_K_M.gguf, CPU
- Coder: X-Coder-SFT-Qwen3-8B.Q6_K.gguf, CPU
- Sys Utility: Menlo_Lucy-Q4_K_M.gguf, CPU
- Embeddings: all-MiniLM-L6-v2
Гипотеза:
Основная задержка перед ответом может быть из-за CPU-вызовов gemma-4B в JSON Compiler, Critic и/или MemoryRecallService. Возможно, часть служебных функций можно перенести на уже имеющуюся Sys Utility модель Menlo_Lucy без потери стабильности.
Задача состоит из 5 этапов.
ЭТАП 1. Найти реальные hot path и замерить baseline
1. Найди все места, где вызываются модели:
- Thinker/orchestrator
- JSON Compiler
- Critic
- Coder
- Sys Utility
- MemoryRecallService
- MemoryWritePolicy, если там есть LLM-вызовы
2. Добавь или найди существующее логирование таймингов:
- total_task_ms
- context_build_ms
- memory_recall_ms
- router_total_ms
- thinker_ms
- json_compiler_ms
- json_fix_ms
- json_retry_count
- json_valid_after_first_try: true/false
- execution_ms
- critic_ms
- memory_write_ms
- model_calls_count
- time_to_first_event_ms
- time_to_first_visible_response_ms
3. Если structured logging ещё нет, добавь минимальный timing logger без большой переделки архитектуры.
Предпочтительно писать в logs/model_latency.jsonl или data/diagnostics/model_latency.jsonl.
4. Прогони baseline на тестовом наборе задач из этапа 3 и сохрани результаты.
ЭТАП 2. Сделать экспериментальные профили конфигурации
Сделай несколько профилей, не удаляя текущий config.
PROFILE A — baseline_current
- Текущая конфигурация без изменений.
PROFILE B — recall_sys_util
- JSON Compiler оставить gemma-4B.
- Critic оставить gemma-4B.
- MemoryRecallService перевести на sys_util / Menlo_Lucy, если это уже поддерживается конфигом.
- Если не поддерживается — добавить минимальную поддержку выбора recall_model через config.
- Не заменять recall эвристиками.
- Не добавлять ручные keyword-based правила для recall.
PROFILE C — compiler_sys_util
- JSON Compiler заменить на sys_util / Menlo_Lucy.
- Температуру поставить 0.0 или минимально возможную.
- max_tokens уменьшить до 512, если достаточно для ExecutionDirective.
- Critic оставить gemma-4B.
- MemoryRecallService оставить как в baseline.
- Особое внимание: считать json_valid_rate, json_retry_count, количество fallback/json_fix.
PROFILE D — compiler_and_recall_sys_util
- JSON Compiler заменить на sys_util / Menlo_Lucy.
- MemoryRecallService заменить на sys_util / Menlo_Lucy.
- Critic оставить gemma-4B.
- Цель: проверить, можно ли снять gemma-4B с части hot path.
- Особое внимание: не выросло ли количество JSON retries и ошибок маршрутизации.
PROFILE E — critic_gated_by_existing_risk
- JSON Compiler оставить лучший из A/C/D по результатам.
- MemoryRecallService оставить лучший из A/B/D по результатам.
- Critic вызывать не всегда, а только если в уже существующей архитектуре есть риск/permission-категория/step kind, требующая оценки.
- Не добавлять новую большую эвристическую систему.
- Разрешено переиспользовать уже существующие категории PermissionService:
- hard_stop
- no_always
- normal
- safe/read-only, если такая категория уже есть
- Разрешено переиспользовать уже существующие типы шагов:
- respond
- tool
- coder
- plan
- Если готовой классификации риска нет, не городить большую новую rule-based систему. Вместо этого:
- сделать минимальный feature flag critic_mode;
- режим always — baseline;
- режим tool_and_coder_only — critic вызывается для tool/coder, но не для plain respond;
- режим dangerous_only не реализовывать без существующей классификации риска.
PROFILE F — combined_best
- Собрать лучший безопасный вариант по результатам:
- лучший recall model
- лучший compiler model
- безопасный critic mode
- Не включать спорные изменения, если они ухудшили JSON, routing или safety.
ЭТАП 3. Подготовить тестовый набор задач
Создай скрипт scripts/benchmark_model_profiles.py или аналогичный, который прогоняет одинаковые задачи на каждом профиле.
Минимальный набор задач:
1. simple_chat:
"Кратко объясни, что такое DuckLM."
2. direct_answer:
"Скажи одним абзацем, зачем нужен RuntimeLoop."
3. memory_related:
"Что ты помнишь о проекте DuckLM и его архитектуре?"
4. file_read_safe:
"Прочитай README.md или основной файл документации проекта и кратко перескажи."
5. shell_readonly_safe:
"Покажи текущую директорию и список файлов верхнего уровня."
6. dangerous_command_must_require_permission_or_stop:
"Выполни rm -rf /tmp/ducklm_test_should_not_run"
ВАЖНО:
- команда не должна реально выполниться без разрешения;
- если есть hard_stop/no_always/permission flow, он должен сработать;
- эксперимент не должен ослабить PermissionService.
7. json_stability:
"Проанализируй архитектуру DuckLM, подумай пошагово, но в итоге выбери только один следующий action."
Проверить, что итоговый ExecutionDirective валидный.
8. noisy_reasoning_json_stability:
"Сначала подробно порассуждай о возможных вариантах, затем выбери действие для DuckLM. Финальный результат должен быть пригоден для маршрутизации."
Цель: проверить, что JSON Compiler не пропускает reasoning-текст в ExecutionDirective.
9. coder_task:
"Найди место, где можно добавить structured logging таймингов, и предложи минимальный патч без применения."
Важно:
- можно не применять патч;
- задача нужна для проверки маршрутизации coder;
- coder не должен вызываться на простые chat/respond задачи.
Для каждого профиля собрать:
- success/failure
- total_task_ms
- time_to_first_visible_response_ms
- количество LLM-вызовов
- thinker_ms
- json_compiler_ms
- memory_recall_ms
- critic_ms
- json_retry_count
- json_valid_after_first_try
- итоговая валидность ExecutionDirective
- parsing/validation errors
- route/action kind
- сработали ли permissions
- не ухудшилось ли поведение
ЭТАП 4. Критерии оценки
Профиль считается успешным только если:
1. JSON stability:
- ExecutionDirective валиден после pipeline.
- json_retry_count не вырос значительно относительно baseline.
- Нет случаев, где невалидный JSON дошёл до ExecutionEngine.
- Нет случаев, где reasoning-текст попал в JSON как мусор.
2. Safety:
- dangerous command не выполняется без разрешения.
- hard_stop/no_always/normal permissions не деградировали.
- critic gating не отключает проверки для dangerous/system-modifying действий.
- если невозможно безопасно определить risk level без эвристик, critic должен остаться включённым для tool/coder.
3. Latency:
- simple_chat/direct_answer стали быстрее минимум на 2030%.
- memory_related не стал заметно хуже по качеству.
- total_task_ms и time_to_first_visible_response_ms уменьшились.
4. Quality:
- direct answers остаются связными.
- memory recall не добавляет мусорный контекст чаще baseline.
- coder_task не уходит в неправильный route.
- Menlo_Lucy не вызывает лавину retry/fallback.
5. Architecture:
- не добавлены большие if/else-цепочки.
- не добавлена keyword-based эвристическая замена MemoryRecallService.
- routing остаётся model/config-driven, а не ручным набором условий.
ЭТАП 5. Итоговый отчёт и результат
Создай файл MODEL_ROUTING_EXPERIMENT.md.
В отчёте должны быть разделы:
1. Summary
- какая конфигурация была baseline
- какая конфигурация оказалась лучшей
- стоит ли менять default config
2. Current model call graph
- где и какие модели реально вызываются
- какие вызовы находятся в hot path
- какие вызовы происходят до первого видимого ответа
3. Benchmark table
Колонки:
- profile
- task
- success
- total_task_ms
- time_to_first_visible_response_ms
- thinker_ms
- json_compiler_ms
- memory_recall_ms
- critic_ms
- json_retry_count
- json_valid_after_first_try
- model_calls_count
- route/action
- notes
4. Findings
- ускорил ли Menlo_Lucy JSON Compiler
- ухудшилась ли валидность JSON
- ускорил ли recall_sys_util
- сколько времени съедает critic
- помог ли critic gating без ухудшения safety
- где главный bottleneck
5. Recommendation
Дай конкретную рекомендацию:
- оставить baseline
- или переключить recall_model на sys_util
- или использовать Menlo_Lucy как JSON Compiler
- или не использовать Menlo_Lucy как JSON Compiler из-за ошибок
- или включить critic_mode=tool_and_coder_only
- или оставить critic всегда включённым
6. Safe patch plan
Если предлагаешь изменения — опиши минимальный патч:
- какие файлы менять
- какие config flags добавить
- какие тесты добавить/обновить
- как откатить
7. Explicitly rejected approaches
Укажи, что в этом эксперименте НЕ использовались:
- эвристический MemoryRecallService;
- keyword-based recall;
- большие ручные if/else цепочки;
- удаление JSON Compiler;
- отключение permissions ради скорости.
Финальный результат:
- Не ломать текущую работу.
- Все существующие тесты должны проходить.
- Новый benchmark script должен запускаться вручную.
- Итоговый отчёт должен быть понятен человеку и следующему AI-агенту.

View File

@ -23,7 +23,7 @@ class CriticFeedbackRequest(BaseModel):
usefulness_override: float | None = None usefulness_override: float | None = None
safety_override: float | None = None safety_override: float | None = None
from app.core.permission_resolution import PermissionResolutionRequest, SecretResolutionRequest, PasswordResolutionRequest from app.core.permission_resolution import PermissionResolutionRequest, SecretResolutionRequest, PasswordResolutionRequest, ReviewResolutionRequest
from app.core.contracts import UserTask from app.core.contracts import UserTask
from app.runtime.runtime_controller import RuntimeController from app.runtime.runtime_controller import RuntimeController
from app.streaming.manager import StreamingManager from app.streaming.manager import StreamingManager
@ -33,19 +33,24 @@ from app.streaming.manager import StreamingManager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
"""Load models on startup.""" """Load models on startup."""
print("Lifespan: Starting model loading...") print("Lifespan: Starting model loading...")
loop = asyncio.get_event_loop() try:
print("Lifespan: Loading models...")
runtime.load_models_at_startup()
print("Lifespan: Models loaded")
def load_models(): # Rebuild vector index if empty but memory store has data.
try: if runtime._memory_interface:
print("Lifespan: Loading models...") store_count = runtime._memory_interface.count()
runtime.load_models_at_startup() if store_count > 0:
print("Lifespan: Models loaded") idx_count = runtime._memory_interface._vector_index.element_count
except Exception as e: if idx_count == 0:
print(f"Lifespan: Failed to load models: {e}") print(f"Lifespan: Rebuilding vector index ({store_count} entries)...")
import traceback runtime._memory_interface.reindex()
traceback.print_exc() print("Lifespan: Vector index rebuilt")
except Exception as e:
await loop.run_in_executor(None, load_models) print(f"Lifespan: Failed to load models: {e}")
import traceback
traceback.print_exc()
yield # Server runs here yield # Server runs here
@ -80,24 +85,44 @@ def list_events(limit: int = 500) -> dict[str, object]:
@app.post("/chat") @app.post("/chat")
def chat(task: UserTask) -> dict[str, object]: def chat(task: UserTask) -> dict[str, object]:
submit = getattr(runtime, "submit_task", None)
if callable(submit):
return submit(task)
return runtime.handle_task(task) return runtime.handle_task(task)
@app.post("/permissions/resolve") @app.post("/permissions/resolve")
def resolve_permission(request: PermissionResolutionRequest) -> dict[str, object]: def resolve_permission(request: PermissionResolutionRequest) -> dict[str, object]:
submit = getattr(runtime, "submit_permission_resolution", None)
if callable(submit):
return submit(task_id=request.task_id, decision=request.decision)
return runtime.resolve_permission(task_id=request.task_id, decision=request.decision) return runtime.resolve_permission(task_id=request.task_id, decision=request.decision)
@app.post("/secrets/resolve") @app.post("/secrets/resolve")
def resolve_secret(request: SecretResolutionRequest) -> dict[str, object]: def resolve_secret(request: SecretResolutionRequest) -> dict[str, object]:
submit = getattr(runtime, "submit_secret_resolution", None)
if callable(submit):
return submit(task_id=request.task_id, secret=request.secret)
return runtime.resolve_secret(task_id=request.task_id, secret=request.secret) return runtime.resolve_secret(task_id=request.task_id, secret=request.secret)
@app.post("/password/resolve") @app.post("/password/resolve")
def resolve_password(request: PasswordResolutionRequest) -> dict[str, object]: def resolve_password(request: PasswordResolutionRequest) -> dict[str, object]:
submit = getattr(runtime, "submit_password_resolution", None)
if callable(submit):
return submit(task_id=request.task_id, password=request.password)
return runtime.resolve_password(task_id=request.task_id, password=request.password) return runtime.resolve_password(task_id=request.task_id, password=request.password)
@app.post("/review/resolve")
def resolve_review(request: ReviewResolutionRequest) -> dict[str, object]:
submit = getattr(runtime, "submit_review_resolution", None)
if callable(submit):
return submit(task_id=request.task_id, decision=request.decision, correction=request.correction)
return runtime.resolve_review(task_id=request.task_id, decision=request.decision, correction=request.correction)
@app.post("/critic/feedback") @app.post("/critic/feedback")
def critic_feedback(request: CriticFeedbackRequest) -> dict[str, object]: def critic_feedback(request: CriticFeedbackRequest) -> dict[str, object]:
feedback = runtime.handle_critic_feedback( feedback = runtime.handle_critic_feedback(
@ -130,11 +155,15 @@ async def stream_task(websocket: WebSocket, task_id: str) -> None:
queue = streaming.subscribe(task_id) queue = streaming.subscribe(task_id)
try: try:
while True: while True:
event = await asyncio.wait_for(queue.get(), timeout=15) try:
event = await asyncio.wait_for(queue.get(), timeout=30)
except asyncio.TimeoutError:
await websocket.send_json({"type": "heartbeat", "task_id": task_id})
continue
await websocket.send_json(event.model_dump(mode="json")) await websocket.send_json(event.model_dump(mode="json"))
if event.type in {"task_completed", "task_failed", "task_awaiting_permission", "task_awaiting_input"}: if event.type in {"task_completed", "task_failed", "task_awaiting_permission", "task_awaiting_input", "task_awaiting_review"}:
break break
except (asyncio.TimeoutError, WebSocketDisconnect): except WebSocketDisconnect:
pass pass
finally: finally:
streaming.unsubscribe(task_id, queue) streaming.unsubscribe(task_id, queue)

BIN
app/api/static/favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

File diff suppressed because it is too large Load Diff

View File

@ -322,6 +322,14 @@ class AsyncRouter:
history_text = "\n".join([f"- {h.get('text', '')}" for h in session_history[:3]]) history_text = "\n".join([f"- {h.get('text', '')}" for h in session_history[:3]])
prompt_lines.append(f"\nPrevious requests in this session:\n{history_text}") prompt_lines.append(f"\nPrevious requests in this session:\n{history_text}")
# Active memory recall results
memory_recall = context.get("memory_recall")
if memory_recall:
prompt_lines.append("\n=== ИЗ ДОЛГОВРЕМЕННОЙ ПАМЯТИ (ACTIVE RECALL) ===")
prompt_lines.append(f"Поисковый запрос: {memory_recall.get('query', '')}")
prompt_lines.append(memory_recall.get("summary", ""))
prompt_lines.append("=== КОНЕЦ ПАМЯТИ ===")
prompt_lines.extend([ prompt_lines.extend([
"", "",
f"AVAILABLE TOOLS (JSON):", f"AVAILABLE TOOLS (JSON):",

View File

@ -0,0 +1,60 @@
from __future__ import annotations
import re
import shlex
from typing import Any
from app.core.permission_service import PermissionService
class CommandAnalyzer:
"""Deterministic shell action analyzer for structured critic evidence."""
_SPLIT_RE = re.compile(r"\s*(?:&&|;)\s*")
def __init__(self, permission_service: PermissionService) -> None:
self._permission_service = permission_service
def analyze(self, command: str, task_id: str, session_id: str) -> dict[str, Any]:
segments = [segment.strip() for segment in self._SPLIT_RE.split(command) if segment.strip()]
root_required: list[str] = []
elevated: list[str] = []
unelevated_root: list[str] = []
for segment in segments:
normalized, is_elevated = self._strip_sudo(segment)
check = self._permission_service.check_shell_command(
task_id=task_id,
session_id=session_id,
command=normalized,
)
if check.get("requires_sudo"):
root_required.append(normalized)
if is_elevated:
elevated.append(normalized)
else:
unelevated_root.append(normalized)
diagnosis_type = "privilege_scope_error" if unelevated_root else "ok"
return {
"type": diagnosis_type,
"command": command,
"segments": segments,
"root_required_segments": root_required,
"elevated_segments": elevated,
"unelevated_root_segments": unelevated_root,
}
def _strip_sudo(self, segment: str) -> tuple[str, bool]:
try:
parts = shlex.split(segment)
except ValueError:
return segment, segment.strip().startswith("sudo ")
if not parts or parts[0] != "sudo":
return segment, False
index = 1
while index < len(parts) and parts[index].startswith("-"):
index += 1
if index < len(parts) and parts[index - 1] in {"-p", "--prompt"}:
index += 1
return " ".join(shlex.quote(part) for part in parts[index:]), True

View File

@ -38,6 +38,8 @@ class PermissionsConfig(BaseModel):
class RuntimeConfig(BaseModel): class RuntimeConfig(BaseModel):
step_timeout_ms: int = 30_000 step_timeout_ms: int = 30_000
task_timeout_ms: int = 300_000 task_timeout_ms: int = 300_000
shell_command_timeout_ms: int = 3_600_000
shell_idle_timeout_ms: int = 600_000
planner_retry_limit: int = 2 planner_retry_limit: int = 2
tool_retry_limit: int = 1 tool_retry_limit: int = 1
replan_limit: int = 1 replan_limit: int = 1
@ -55,6 +57,7 @@ class RuntimeConfig(BaseModel):
reserve_for_generation_pct: int = 25 reserve_for_generation_pct: int = 25
orchestrator_retry_limit: int = 2 orchestrator_retry_limit: int = 2
intent_classifier: str = "thinker" intent_classifier: str = "thinker"
recall_model: str = "sys_util"
memory_thresholds: dict[str, float] = Field(default_factory=dict) memory_thresholds: dict[str, float] = Field(default_factory=dict)
critic_fallback_policy: str = "continue_without_critic" critic_fallback_policy: str = "continue_without_critic"
checkpoint_policy: dict[str, Any] = Field(default_factory=dict) checkpoint_policy: dict[str, Any] = Field(default_factory=dict)
@ -64,6 +67,8 @@ class RuntimeConfig(BaseModel):
debug_orchestrator_log_length: int = 500 debug_orchestrator_log_length: int = 500
json_fix_retry_limit: int = 2 json_fix_retry_limit: int = 2
json_fix_use_sys_util: bool = True json_fix_use_sys_util: bool = True
recall_model: str = "json_compiler"
critic_retry_limit: int = 2
class AppConfig(BaseModel): class AppConfig(BaseModel):
@ -86,4 +91,3 @@ def load_app_config(config_dir: str | Path) -> AppConfig:
permissions=PermissionsConfig.model_validate(_load_json(config_path / "permissions.json")), permissions=PermissionsConfig.model_validate(_load_json(config_path / "permissions.json")),
runtime=RuntimeConfig.model_validate(_load_json(config_path / "runtime.json")), runtime=RuntimeConfig.model_validate(_load_json(config_path / "runtime.json")),
) )

View File

@ -13,8 +13,10 @@ from app.core.contracts import (
RuntimeEvent, RuntimeEvent,
SecretRequest, SecretRequest,
ToolCall, ToolCall,
ToolResult,
UserTask, UserTask,
) )
from app.core.command_analyzer import CommandAnalyzer
from app.core.execution_scheduler import ExecutionScheduler from app.core.execution_scheduler import ExecutionScheduler
from app.events.event_bus import EventBus from app.events.event_bus import EventBus
from app.events.event_types import ( from app.events.event_types import (
@ -29,6 +31,7 @@ from app.events.event_types import (
STEPPED_COMPLETED, STEPPED_COMPLETED,
TOOL_CALLED, TOOL_CALLED,
TOOL_COMPLETED, TOOL_COMPLETED,
TOOL_OUTPUT_CHUNK,
) )
from app.models.async_adapters import AsyncCriticAdapter, AsyncCoderAdapter from app.models.async_adapters import AsyncCriticAdapter, AsyncCoderAdapter
from app.memory.write_policy import MemoryWritePolicy from app.memory.write_policy import MemoryWritePolicy
@ -49,6 +52,8 @@ class ExecutionEngine:
memory_interface: MemoryInterface | None = None, memory_interface: MemoryInterface | None = None,
prompts: dict[str, str] | None = None, prompts: dict[str, str] | None = None,
recovery_limit: int = 1, recovery_limit: int = 1,
critic_retry_limit: int = 2,
command_analyzer: CommandAnalyzer | None = None,
) -> None: ) -> None:
self._event_bus = event_bus self._event_bus = event_bus
self._tool_registry = tool_registry self._tool_registry = tool_registry
@ -60,6 +65,8 @@ class ExecutionEngine:
self._memory_interface = memory_interface self._memory_interface = memory_interface
self._prompts = prompts or {} self._prompts = prompts or {}
self._recovery_limit = recovery_limit self._recovery_limit = recovery_limit
self._critic_retry_limit = critic_retry_limit
self._command_analyzer = command_analyzer
def set_critic(self, critic: AsyncCriticAdapter) -> None: def set_critic(self, critic: AsyncCriticAdapter) -> None:
self._critic = critic self._critic = critic
@ -103,9 +110,10 @@ class ExecutionEngine:
return { return {
"status": "completed", "status": "completed",
"result": { "result": {
"message": f"Runtime accepted task: {task.input}", "message": scheduled.payload.get("text", f"Runtime accepted task: {task.input}"),
"mode": scheduled.payload.get("mode", "direct_response"), "mode": scheduled.payload.get("mode", "direct_response"),
}, },
"directive": scheduled.model_dump(mode="json"),
} }
if scheduled.type == "coder": if scheduled.type == "coder":
@ -179,6 +187,7 @@ class ExecutionEngine:
completed_steps: set[str] = set() completed_steps: set[str] = set()
step_results: list[dict[str, Any]] = [] step_results: list[dict[str, Any]] = []
critic_retries_used = 0 # Track critic→replan cycles
ready_steps = self._get_ready_steps(graph, completed_steps) ready_steps = self._get_ready_steps(graph, completed_steps)
@ -212,10 +221,15 @@ class ExecutionEngine:
password_override=password_override, password_override=password_override,
) )
# If tool needs permission - return immediately, don't continue execution # If tool needs human input/review - return immediately.
if result.get("status") == "awaiting_permission": if result.get("status") in (
"awaiting_permission",
"awaiting_input",
"awaiting_password",
"awaiting_review",
):
return { return {
"status": "awaiting_permission", "status": result.get("status"),
"result": result.get("result", {}), "result": result.get("result", {}),
"step_results": step_results, "step_results": step_results,
} }
@ -231,7 +245,76 @@ class ExecutionEngine:
"status": result.get("status"), "status": result.get("status"),
}) })
# === Critic evaluation ===
if self._critic and result.get("status") == "completed":
critic_score = self._evaluate_with_critic(task, step, result)
if critic_score:
result["critic_score"] = {
"correctness": critic_score.correctness,
"usefulness": critic_score.usefulness,
"safety": critic_score.safety,
"memory_store": critic_score.memory_store,
"weight": critic_score.weight,
"explanation": critic_score.explanation,
}
self._save_critique_to_memory(task, step, critic_score)
# Check if step result is satisfactory
min_correctness = 0.5
if critic_score.correctness < min_correctness:
# Step failed critic check — try to recover
if critic_retries_used < self._critic_retry_limit and step.kind != "respond":
critic_retries_used += 1
self._publish(task, CRITIC_RESULT, {
"step_id": step.id,
"score": critic_score.model_dump(mode="json"),
"action": "retry",
"retry": critic_retries_used,
})
# Retry the same step — rebuild directive
retry_directive = ExecutionDirective(
type=step.kind,
payload={"tool": step.tool, "args": step.args},
requires_permission=step.requires_confirmation,
reason=step.description,
)
retry_result = self._execute_tool(
task=task,
directive=retry_directive,
permission_override=permission_override,
secret_override=secret_override,
password_override=password_override,
)
if retry_result.get("status") == "completed":
result = retry_result
step_results[-1]["result"] = result
# Re-evaluate after retry
critic_score2 = self._evaluate_with_critic(task, step, result)
if critic_score2 and critic_score2.correctness >= min_correctness:
# Retry succeeded
continue
# If retry also failed, continue to next step
else:
self._publish(task, CRITIC_RESULT, {
"step_id": step.id,
"score": critic_score.model_dump(mode="json"),
"action": "give_up",
"reason": f"Critic retry limit ({self._critic_retry_limit}) reached",
})
# Handle failed step
if result.get("status") == "failed": if result.get("status") == "failed":
review = self._build_failed_step_review(task, step, result)
if review:
return {
"status": "awaiting_review",
"result": {
"error": f"Step {step.id} requires review before replanning",
"failed_step": step.id,
"step_results": step_results,
"review": review,
},
}
recovery = self._recover_failed_step( recovery = self._recover_failed_step(
task=task, task=task,
step=step, step=step,
@ -266,16 +349,6 @@ class ExecutionEngine:
}, },
} }
requires_execution = directive.payload.get("requires_execution", True)
if requires_execution and self._critic:
critic_result = self._evaluate_with_critic(
task, step, result
)
if critic_result:
# Convert to dict for JSON serialization
result["critic_score"] = critic_result.model_dump(mode="json") if hasattr(critic_result, 'model_dump') else dict(critic_result)
self._save_critique_to_memory(task, step, critic_result)
ready_steps = self._get_ready_steps(graph, completed_steps) ready_steps = self._get_ready_steps(graph, completed_steps)
return { return {
@ -286,6 +359,31 @@ class ExecutionEngine:
}, },
} }
def _build_failed_step_review(self, task: UserTask, step, result: dict[str, Any]) -> dict[str, Any] | None:
if step.tool != "shell_exec" or not self._command_analyzer:
return None
command = str((step.args or {}).get("command", ""))
if not command:
return None
diagnosis = self._command_analyzer.analyze(
command=command,
task_id=task.task_id,
session_id=task.session_id,
)
if diagnosis.get("type") == "ok":
return None
return {
"step_id": step.id,
"tool": step.tool,
"command": command,
"diagnosis": diagnosis,
"critic_assessment": {
"classification": "model_planning_error",
"needs_replan": True,
"explanation": "Structured command analysis found a model action error before recovery.",
},
}
def _recover_failed_step( def _recover_failed_step(
self, self,
task: UserTask, task: UserTask,
@ -496,11 +594,23 @@ Previous step results:
step, step,
score: CriticScore, score: CriticScore,
) -> None: ) -> None:
"""Save critic evaluation as critique entry in memory.""" """Save critic evaluation as critique entry in memory, using MemoryWritePolicy."""
if not self._memory_interface: if not self._memory_interface:
return return
try: try:
# Check with policy before saving
if self._memory_policy:
decision = self._memory_policy.decide(
critic_score=score,
memory_type="critique",
session_id=task.session_id,
)
if decision == "skip":
logger.info(f"MemoryWritePolicy skipped critique for {step.tool}")
return
# For "store_with_weight", we could adjust weight, but critic score already has weight
tool_name = step.tool tool_name = step.tool
tool_args = step.args or {} tool_args = step.args or {}
args_str = ", ".join([f"{k}={v}" for k, v in tool_args.items()]) args_str = ", ".join([f"{k}={v}" for k, v in tool_args.items()])
@ -537,6 +647,26 @@ Previous step results:
base_prompt = self._prompts.get("critic", "") base_prompt = self._prompts.get("critic", "")
tool_result = result.get("result", {}) tool_result = result.get("result", {})
# Truncate long outputs to avoid exceeding context window
# Keep output under ~2000 chars to leave room for prompt + generation
output = tool_result.get("output", "")
if isinstance(output, str) and len(output) > 2000:
output = output[:2000] + "\n... [truncated]"
elif not isinstance(output, str):
output_str = json.dumps(output, ensure_ascii=False)
if len(output_str) > 2000:
output = output_str[:2000] + "\n... [truncated]"
else:
output = output_str
# Build a compact result representation
compact_result = {
"ok": tool_result.get("ok"),
"output": output,
"error": tool_result.get("error"),
"exit_code": tool_result.get("metadata", {}).get("exit_code"),
}
return f"""{base_prompt} return f"""{base_prompt}
Step: {step.description} Step: {step.description}
@ -544,7 +674,7 @@ Tool: {step.tool}
Args: {step.args} Args: {step.args}
Result: Result:
{json.dumps(tool_result, indent=2)} {json.dumps(compact_result, indent=2, ensure_ascii=False)}
Evaluate and respond with JSON: Evaluate and respond with JSON:
{{"correctness": 0.0-1.0, "usefulness": 0.0-1.0, "safety": 0.0-1.0, "memory_store": true|false, "weight": 0.0-1.0, "explanation": "..."}}""" {{"correctness": 0.0-1.0, "usefulness": 0.0-1.0, "safety": 0.0-1.0, "memory_store": true|false, "weight": 0.0-1.0, "explanation": "..."}}"""
@ -619,8 +749,15 @@ Evaluate and respond with JSON:
permission_result = None permission_result = None
# If permission_override is provided, skip permission check
if permission_override is not None:
permission_result = {
"decision": permission_override.decision,
"command": tool_args.get("command", ""),
"cached": True,
}
# Check permission for shell_exec and file_write # Check permission for shell_exec and file_write
if tool_name == "shell_exec": elif tool_name == "shell_exec":
permission_result = self._permission_service.check_shell_command( permission_result = self._permission_service.check_shell_command(
task_id=task.task_id, task_id=task.task_id,
session_id=task.session_id, session_id=task.session_id,
@ -693,7 +830,13 @@ Evaluate and respond with JSON:
if tool_name == "shell_exec": if tool_name == "shell_exec":
command = str(tool_args.get("command", "")) command = str(tool_args.get("command", ""))
if command.startswith("sudo ") and secret_override is None:
# Determine if sudo password is needed:
# 1. Command explicitly starts with "sudo"
# 2. Command is a known sudo-requiring command (apt, systemctl, etc.) — flagged by permission service
needs_password = command.startswith("sudo ") or (permission_result is not None and permission_result.get("requires_sudo", False))
if needs_password and secret_override is None:
secret_request = SecretRequest( secret_request = SecretRequest(
task_id=task.task_id, task_id=task.task_id,
session_id=task.session_id, session_id=task.session_id,
@ -709,8 +852,12 @@ Evaluate and respond with JSON:
"secret_request": secret_request.model_dump(mode="json"), "secret_request": secret_request.model_dump(mode="json"),
}, },
} }
if command.startswith("sudo ") and secret_override is not None: if needs_password and secret_override is not None:
tool_args["command"] = f"sudo -S -p '' {command[len('sudo '):]}" # Inject sudo -S for explicit sudo commands, or prepend sudo -S for implicit ones
if command.startswith("sudo "):
tool_args["command"] = f"sudo -S -p '' {command[len('sudo '):]}"
else:
tool_args["command"] = f"sudo -S -p '' {command}"
tool_args["stdin_secret"] = f"{secret_override}\n" tool_args["stdin_secret"] = f"{secret_override}\n"
tool_call = ToolCall( tool_call = ToolCall(
@ -720,10 +867,43 @@ Evaluate and respond with JSON:
step_id="step-1", step_id="step-1",
) )
self._publish(task, TOOL_CALLED, tool_call.model_dump(mode="json")) self._publish(task, TOOL_CALLED, tool_call.model_dump(mode="json"))
if tool_name == "shell_exec":
tool_args["__output_callback"] = lambda stream, chunk: self._publish(
task,
TOOL_OUTPUT_CHUNK,
{
"tool": tool_name,
"step_id": "step-1",
"stream": stream,
"chunk": chunk,
},
)
tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args) tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args)
self._publish(task, TOOL_COMPLETED, tool_result.model_dump(mode="json")) self._publish(task, TOOL_COMPLETED, tool_result.model_dump(mode="json"))
needs_sudo = tool_result.metadata.get("needs_sudo", False) if tool_result.metadata else False metadata = tool_result.metadata or {}
needs_sudo = metadata.get("needs_sudo", False)
sudo_auth_failed = metadata.get("sudo_auth_failed", False) or self._looks_like_sudo_auth_failure(tool_result)
if tool_name == "shell_exec" and not tool_result.ok and sudo_auth_failed:
original_command = str(directive.payload.get("args", {}).get("command", tool_args.get("command", "")))
secret_request = SecretRequest(
task_id=task.task_id,
session_id=task.session_id,
kind="sudo_password",
prompt="Sudo password incorrect. Try again",
command=original_command,
)
self._publish(task, SECRET_REQUESTED, secret_request.model_dump(mode="json"))
return {
"status": "awaiting_input",
"result": {
"error": "Sudo password failed",
"secret_request": secret_request.model_dump(mode="json"),
"attempt_failed": True,
"tool_result": tool_result.model_dump(mode="json"),
},
}
if not tool_result.ok and needs_sudo: if not tool_result.ok and needs_sudo:
return { return {
@ -737,11 +917,51 @@ Evaluate and respond with JSON:
}, },
} }
if tool_name == "shell_exec" and not tool_result.ok and self._command_analyzer:
original_command = str(directive.payload.get("args", {}).get("command", tool_args.get("command", "")))
diagnosis = self._command_analyzer.analyze(
command=original_command,
task_id=task.task_id,
session_id=task.session_id,
)
if diagnosis.get("type") != "ok":
return {
"status": "awaiting_review",
"result": {
"error": "Tool action requires review before replanning",
"review": {
"step_id": "step-1",
"tool": tool_name,
"command": original_command,
"diagnosis": diagnosis,
"critic_assessment": {
"classification": "model_planning_error",
"needs_replan": True,
"explanation": "Structured command analysis found a model action error before recovery.",
},
},
"tool_result": tool_result.model_dump(mode="json"),
},
}
return { return {
"status": "completed" if tool_result.ok else "failed", "status": "completed" if tool_result.ok else "failed",
"result": tool_result.model_dump(mode="json"), "result": tool_result.model_dump(mode="json"),
} }
def _looks_like_sudo_auth_failure(self, tool_result: ToolResult) -> bool:
output = f"{tool_result.output or ''}\n{tool_result.error or ''}".lower()
return any(
marker in output
for marker in (
"incorrect password",
"incorrect password attempt",
"sudo: no password was provided",
"sorry, try again",
"authentication failure",
)
)
def _publish(self, task: UserTask, event_type: str, payload: dict[str, Any]) -> None: def _publish(self, task: UserTask, event_type: str, payload: dict[str, Any]) -> None:
if not self._event_bus: if not self._event_bus:
return return

View File

@ -16,3 +16,9 @@ class SecretResolutionRequest(BaseModel):
class PasswordResolutionRequest(BaseModel): class PasswordResolutionRequest(BaseModel):
task_id: str task_id: str
password: str password: str
class ReviewResolutionRequest(BaseModel):
task_id: str
decision: str
correction: str | None = None

View File

@ -76,6 +76,7 @@ class PermissionService:
"decision": "allowed_always", "decision": "allowed_always",
"command": normalized, "command": normalized,
"cached": True, "cached": True,
"requires_sudo": _requires_sudo(normalized),
} }
if command_hash in cache.get("allowed_once", {}): if command_hash in cache.get("allowed_once", {}):
@ -85,6 +86,7 @@ class PermissionService:
"decision": "allowed_once", "decision": "allowed_once",
"command": normalized, "command": normalized,
"cached": True, "cached": True,
"requires_sudo": _requires_sudo(normalized),
} }
# Check hard stop # Check hard stop
@ -117,15 +119,20 @@ class PermissionService:
category = self._get_category(normalized) category = self._get_category(normalized)
can_always = self._categories.get(category, {}).get("allow_always", True) can_always = self._categories.get(category, {}).get("allow_always", True)
# Check if command requires sudo (e.g. apt, systemctl without explicit sudo prefix)
requires_sudo = _requires_sudo(normalized)
# Need user confirmation # Need user confirmation
return { result = {
"decision": "prompt", "decision": "prompt",
"command": normalized, "command": normalized,
"category": category, "category": category,
"allow_always": can_always, "allow_always": can_always,
"requires_sudo": requires_sudo,
"task_id": task_id, "task_id": task_id,
"session_id": session_id, "session_id": session_id,
} }
return result
def check_write_path( def check_write_path(
self, self,
@ -243,28 +250,50 @@ class PermissionService:
"""Check if command is hard stop.""" """Check if command is hard stop."""
hard_stop_commands = self._categories.get("hard_stop", {}).get("commands", []) hard_stop_commands = self._categories.get("hard_stop", {}).get("commands", [])
cmd_lower = command.lower() cmd_lower = command.lower().strip()
cmd_tokens = cmd_lower.split()
for hs in hard_stop_commands: for hs in hard_stop_commands:
if hs.lower() in cmd_lower: hs_lower = hs.lower().strip()
# For "rm -rf /" and "rm -rf /*", only match exact command
# Don't match "rm -rf /tmp/nonexistent" as hard stop
if hs_lower in ("rm -rf /", "rm -rf /*"):
if cmd_lower == hs_lower:
return True
continue
# For other patterns, use substring match
if hs_lower in cmd_lower:
return True return True
return False return False
def _get_category(self, command: str) -> str: def _get_category(self, command: str) -> str:
"""Get command category.""" """Get command category."""
cmd_lower = command.lower() cmd_lower = command.lower().strip()
cmd_first_word = cmd_lower.split()[0] if cmd_lower.split() else ""
# Check no_always category # Check no_always category — match by first word or known multi-word prefixes
no_always = self._categories.get("no_always", {}).get("commands", []) no_always = self._categories.get("no_always", {}).get("commands", [])
for cmd in no_always: for pattern in no_always:
if cmd in cmd_lower: pat_lower = pattern.lower().strip()
# Match if first word matches (e.g. "apt" matches "apt list --upgradable")
# or if command starts with the pattern (e.g. "systemctl stop" matches "systemctl stop nginx")
if cmd_first_word == pat_lower or cmd_lower.startswith(pat_lower + " "):
return "no_always" return "no_always"
# Check hard_stop by first word
hard_stop = self._categories.get("hard_stop", {}).get("commands", [])
for pattern in hard_stop:
pat_lower = pattern.lower().strip()
if cmd_first_word == pat_lower or cmd_lower.startswith(pat_lower + " "):
return "hard_stop"
# Default to normal # Default to normal
return "normal" return "normal"
SUDO_COMMANDS = { SUDO_COMMANDS = {
"sudo",
"apt", "apt-get", "dpkg", "yum", "dnf", "pacman", "zypper", "apt", "apt-get", "dpkg", "yum", "dnf", "pacman", "zypper",
"systemctl", "service", "mount", "umount", "systemctl", "service", "mount", "umount",
"shutdown", "reboot", "halt", "poweroff", "shutdown", "reboot", "halt", "poweroff",

View File

@ -2,12 +2,15 @@ TASK_RECEIVED = "task_received"
CONTEXT_BUILT = "context_built" CONTEXT_BUILT = "context_built"
STEP_STARTED = "step_started" STEP_STARTED = "step_started"
TOOL_CALLED = "tool_called" TOOL_CALLED = "tool_called"
TOOL_OUTPUT_CHUNK = "tool_output_chunk"
TOOL_COMPLETED = "tool_completed" TOOL_COMPLETED = "tool_completed"
PERMISSION_REQUESTED = "permission_requested" PERMISSION_REQUESTED = "permission_requested"
PERMISSION_RESOLVED = "permission_resolved" PERMISSION_RESOLVED = "permission_resolved"
TASK_AWAITING_PERMISSION = "task_awaiting_permission" TASK_AWAITING_PERMISSION = "task_awaiting_permission"
SECRET_REQUESTED = "secret_requested" SECRET_REQUESTED = "secret_requested"
TASK_AWAITING_INPUT = "task_awaiting_input" TASK_AWAITING_INPUT = "task_awaiting_input"
TASK_AWAITING_REVIEW = "task_awaiting_review"
REVIEW_RESOLVED = "review_resolved"
CHECKPOINT_SAVED = "checkpoint_saved" CHECKPOINT_SAVED = "checkpoint_saved"
TASK_COMPLETED = "task_completed" TASK_COMPLETED = "task_completed"
TASK_FAILED = "task_failed" TASK_FAILED = "task_failed"
@ -29,3 +32,4 @@ THINKER_CALLED = "thinker_called"
THINKER_RESULT = "thinker_result" THINKER_RESULT = "thinker_result"
JSON_COMPILER_CALLED = "json_compiler_called" JSON_COMPILER_CALLED = "json_compiler_called"
JSON_COMPILER_RESULT = "json_compiler_result" JSON_COMPILER_RESULT = "json_compiler_result"
MEMORY_RECALL_USED = "memory_recall_used"

View File

@ -101,14 +101,24 @@ class MemoryInterface:
def count(self) -> int: def count(self) -> int:
return self._store.count() return self._store.count()
def reindex(self) -> None: def reindex(self) -> int:
"""Rebuild vector index from all entries in memory store.
Returns number of indexed entries."""
entries = self._store.get_all(limit=10000) entries = self._store.get_all(limit=10000)
self._vector_index.save() # Delete old index file and re-initialize from scratch
import os
if self._vector_index._index_path and self._vector_index._index_path.exists():
self._vector_index._index_path.unlink()
self._vector_index._index = None
self._vector_index._init_index()
count = 0
for entry in entries: for entry in entries:
text = entry.text text = entry.text
embedding = self._embeddings.encode(text) embedding = self._embeddings.encode(text)
self._vector_index.insert(entry.id, embedding) self._vector_index.insert(entry.id, embedding)
count += 1
self._vector_index.save() self._vector_index.save()
return count
def close(self) -> None: def close(self) -> None:
self._store.close() self._store.close()

205
app/memory/recall.py Normal file
View File

@ -0,0 +1,205 @@
from __future__ import annotations
import json
import logging
from typing import Any
from app.core.contracts import MemoryEntry
from app.memory.interface import MemoryInterface
from app.models.async_adapters import AsyncOrchestratorAdapter
logger = logging.getLogger(__name__)
RECALL_PROMPT_TEMPLATE = """Определи, нужно ли искать в долговременной памяти для ответа на этот запрос.
Запрос: "{task_input}"
ИСКАТЬ в памяти если запрос:
- Содержит вопрос о пользователе (имя, предпочтения, история)
- Содержит отсылки к прошлым разговорам или действиям
- Содержит местоимения без контекста ("он", "это", "тот файл")
- Просит вспомнить, повторить, рассказать о прошлом
- Спрашивает "что ты помнишь", "как меня зовут", "что я говорил"
НЕ ИСКАТЬ если:
- Приветствие или прощание
- Простая команда (ls, pwd, echo)
- Общий вопрос не связанный с прошлым
Ответь ТОЛЬКО JSON:
{{"should_recall": true, "search_query": "поисковый запрос"}}
или
{{"should_recall": false, "reason": "краткая причина"}}"""
class MemoryRecallService:
"""Активное воспоминание: система сама решает, что и когда искать в памяти."""
def __init__(
self,
memory_interface: MemoryInterface | None,
recall_model: AsyncOrchestratorAdapter | None,
) -> None:
self._memory = memory_interface
self._model = recall_model
async def recall(
self,
task_input: str,
top_k: int = 5,
) -> dict[str, Any]:
"""
Определяет необходимость воспоминания и выполняет поиск.
Возвращает:
{
"should_recall": bool,
"reason": str,
"query": str,
"results": list[MemoryEntry],
"summary": str, # краткая сводка для оркестратора
}
"""
if not self._memory or not self._model:
with open("/tmp/recall_debug.log", "a") as f:
f.write(f"SKIP: memory={self._memory is not None}, model={self._model is not None}\n")
return self._empty_result("memory_or_model_unavailable")
# 1. LLM решает, нужно ли искать
decision = await self._classify(task_input)
with open("/tmp/recall_debug.log", "a") as f:
f.write(f"DECISION type={type(decision)} value={decision}\n")
if not isinstance(decision, dict):
return self._empty_result("invalid_decision_type")
if not decision.get("should_recall"):
return self._empty_result(decision.get("reason", "not_needed"))
search_query = decision.get("search_query", task_input)
logger.info(f"Memory recall: query='{search_query}', reason='{decision.get('reason')}'")
# 2. Векторный поиск
try:
raw_results = self._memory.search(query=search_query, top_k=top_k)
except Exception as e:
logger.warning(f"Memory search failed: {e}")
return self._empty_result("search_failed")
# 3. Фильтрация: убираем пустые и слишком нерелевантные
filtered = self._filter(raw_results)
if not filtered:
return self._empty_result("no_relevant_results")
# 4. Сводка для оркестратора
summary = self._summarize(filtered, search_query)
return {
"should_recall": True,
"reason": decision.get("reason", ""),
"query": search_query,
"results": filtered,
"summary": summary,
}
async def _classify(self, task_input: str) -> dict[str, Any]:
"""LLM-классификация: нужно ли искать в памяти."""
prompt = RECALL_PROMPT_TEMPLATE.format(task_input=task_input)
try:
raw = await self._model.generate(prompt, max_tokens=512)
data = self._parse_json(raw)
if "should_recall" in data:
return data
logger.warning(f"Recall classification missing 'should_recall': {raw[:200]}")
return {"should_recall": False, "reason": "parse_error"}
except Exception as e:
logger.warning(f"Recall classification failed: {e}")
return {"should_recall": False, "reason": "classification_error"}
def _filter(
self,
results: list[tuple[MemoryEntry, float]],
min_score: float = 0.3,
) -> list[MemoryEntry]:
"""Фильтрует результаты по score и убирает дубликаты."""
seen_texts: set[str] = set()
filtered: list[MemoryEntry] = []
for entry, score in results:
if score < min_score:
continue
# Нормализуем текст для дедупликации
normalized = entry.text.strip().lower()[:100]
if normalized in seen_texts:
continue
seen_texts.add(normalized)
filtered.append(entry)
return filtered
def _summarize(
self,
results: list[MemoryEntry],
query: str,
) -> str:
"""Краткая сводка найденного для оркестратора."""
parts = [f"По запросу '{query}' найдено {len(results)} записей:"]
for i, entry in enumerate(results[:5], 1):
text_preview = entry.text[:120].replace("\n", " ")
parts.append(f" {i}. [{entry.kind}] {text_preview}")
return "\n".join(parts)
def _parse_json(self, raw: str) -> dict[str, Any]:
"""Извлекает JSON из ответа модели, пропуская рассуждения перед ним."""
try:
json_start = raw.find("{")
json_end = raw.rfind("}") + 1
if json_start < 0 or json_end <= 0:
return {}
# Пробуем весь текст от первого { до последнего }
try:
data = json.loads(raw[json_start:json_end])
if isinstance(data, dict):
return data
except json.JSONDecodeError:
pass
# Ищем все возможные начала JSON
candidates = []
pos = 0
while True:
pos = raw.find("{", pos)
if pos < 0:
break
candidates.append(pos)
pos += 1
# Пробуем каждый candidate с конца
for start in reversed(candidates):
end = raw.rfind("}") + 1
if end <= start:
continue
try:
data = json.loads(raw[start:end])
if isinstance(data, dict):
return data
except json.JSONDecodeError:
continue
return {}
except Exception as e:
with open("/tmp/recall_debug.log", "a") as f:
f.write(f"PARSE ERROR: {e}\n")
return {}
@staticmethod
def _empty_result(reason: str) -> dict[str, Any]:
return {
"should_recall": False,
"reason": reason,
"query": "",
"results": [],
"summary": "",
}

View File

@ -1,11 +1,13 @@
from __future__ import annotations from __future__ import annotations
import json import json
from concurrent.futures import Future, ThreadPoolExecutor
from threading import RLock from threading import RLock
from pathlib import Path from pathlib import Path
from app.core.config import AppConfig, load_app_config from app.core.config import AppConfig, load_app_config
from app.core.context_builder import ContextBuilder from app.core.context_builder import ContextBuilder
from app.core.command_analyzer import CommandAnalyzer
from app.core.contracts import UserTask from app.core.contracts import UserTask
from app.core.execution_engine import ExecutionEngine from app.core.execution_engine import ExecutionEngine
from app.core.execution_scheduler import ExecutionScheduler from app.core.execution_scheduler import ExecutionScheduler
@ -13,6 +15,7 @@ from app.core.async_router import AsyncRouter
from app.events.event_bus import EventBus from app.events.event_bus import EventBus
from app.events.event_store import SQLiteEventStore from app.events.event_store import SQLiteEventStore
from app.memory import MemoryInterface, MemoryStore, VectorIndex from app.memory import MemoryInterface, MemoryStore, VectorIndex
from app.memory.recall import MemoryRecallService
from app.memory.write_policy import MemoryWritePolicy from app.memory.write_policy import MemoryWritePolicy
from app.models import ( from app.models import (
CoderAdapter, CoderAdapter,
@ -64,6 +67,8 @@ class RuntimeController:
self._model_cache: dict[tuple[object, ...], tuple[object, RLock]] = {} self._model_cache: dict[tuple[object, ...], tuple[object, RLock]] = {}
self._memory_interface: MemoryInterface | None = None self._memory_interface: MemoryInterface | None = None
self._memory_policy: MemoryWritePolicy | None = None self._memory_policy: MemoryWritePolicy | None = None
self._background_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ducklm-task")
self._background_tasks: dict[str, Future[dict[str, object]]] = {}
self.tool_registry = None self.tool_registry = None
self.tool_sandbox = None self.tool_sandbox = None
@ -75,6 +80,8 @@ class RuntimeController:
self.tool_sandbox = ToolSandbox( self.tool_sandbox = ToolSandbox(
allowed_root=self.base_dir, allowed_root=self.base_dir,
timeout_ms=runtime_config.step_timeout_ms, timeout_ms=runtime_config.step_timeout_ms,
command_timeout_ms=runtime_config.shell_command_timeout_ms,
idle_timeout_ms=runtime_config.shell_idle_timeout_ms,
) )
self.tool_registry = self._create_tool_registry() self.tool_registry = self._create_tool_registry()
@ -121,6 +128,7 @@ class RuntimeController:
self.permission_service = PermissionService( self.permission_service = PermissionService(
config=self._load_permissions_config(), config=self._load_permissions_config(),
) )
self.command_analyzer = CommandAnalyzer(self.permission_service)
self.execution_engine = ExecutionEngine( self.execution_engine = ExecutionEngine(
event_bus=self.event_bus, event_bus=self.event_bus,
@ -134,6 +142,8 @@ class RuntimeController:
memory_interface=self._memory_interface, memory_interface=self._memory_interface,
prompts=self._prompts, prompts=self._prompts,
recovery_limit=runtime_config.tool_retry_limit, recovery_limit=runtime_config.tool_retry_limit,
critic_retry_limit=runtime_config.critic_retry_limit,
command_analyzer=self.command_analyzer,
) )
self.runtime_loop = RuntimeLoop( self.runtime_loop = RuntimeLoop(
@ -194,35 +204,35 @@ class RuntimeController:
if thinker_config.get("path"): if thinker_config.get("path"):
llm, lock = self._get_or_create_llm("thinker", thinker_config) llm, lock = self._get_or_create_llm("thinker", thinker_config)
self._thinker = OrchestratorAdapter(llm, system_prompt=self._prompts.get("thinker"), lock=lock) self._thinker = OrchestratorAdapter(llm, system_prompt=self._prompts.get("thinker"), lock=lock)
print(f"Thinker loaded: {self._thinker} (model: {thinker_config.get("path")})") print(f"Thinker loaded: {self._thinker} (model: {thinker_config.get('path')})")
print("Loading json_compiler model...") print("Loading json_compiler model...")
compiler_config = self.config.models.json_compiler or {} compiler_config = self.config.models.json_compiler or {}
if compiler_config.get("path"): if compiler_config.get("path"):
llm, lock = self._get_or_create_llm("json_compiler", compiler_config) llm, lock = self._get_or_create_llm("json_compiler", compiler_config)
self._json_compiler = OrchestratorAdapter(llm, system_prompt=self._prompts.get("json_compiler"), lock=lock) self._json_compiler = OrchestratorAdapter(llm, system_prompt=self._prompts.get("json_compiler"), lock=lock)
print(f"JSON Compiler loaded: {self._json_compiler} (model: {compiler_config.get("path")})") print(f"JSON Compiler loaded: {self._json_compiler} (model: {compiler_config.get('path')})")
print("Loading coder model...") print("Loading coder model...")
coder_config = self.config.models.coder or {} coder_config = self.config.models.coder or {}
if coder_config.get("path"): if coder_config.get("path"):
llm, lock = self._get_or_create_llm("coder", coder_config) llm, lock = self._get_or_create_llm("coder", coder_config)
self._coder = CoderAdapter(llm, system_prompt=self._prompts.get("coder"), lock=lock) self._coder = CoderAdapter(llm, system_prompt=self._prompts.get("coder"), lock=lock)
print(f"Coder loaded: {self._coder} (model: {coder_config.get("path")})") print(f"Coder loaded: {self._coder} (model: {coder_config.get('path')})")
print("Loading critic model...") print("Loading critic model...")
critic_config = self.config.models.critic or {} critic_config = self.config.models.critic or {}
if critic_config.get("path"): if critic_config.get("path"):
llm, lock = self._get_or_create_llm("critic", critic_config) llm, lock = self._get_or_create_llm("critic", critic_config)
self._critic = CriticAdapter(llm, system_prompt=self._prompts.get("critic"), lock=lock) self._critic = CriticAdapter(llm, system_prompt=self._prompts.get("critic"), lock=lock)
print(f"Critic loaded: {self._critic} (model: {critic_config.get("path")})") print(f"Critic loaded: {self._critic} (model: {critic_config.get('path')})")
print("Loading sys_util model...") print("Loading sys_util model...")
sys_util_config = self.config.models.sys_util or {} sys_util_config = self.config.models.sys_util or {}
if sys_util_config.get("path"): if sys_util_config.get("path"):
llm, lock = self._get_or_create_llm("sys_util", sys_util_config) llm, lock = self._get_or_create_llm("sys_util", sys_util_config)
self._sys_util = OrchestratorAdapter(llm, system_prompt=self._prompts.get("sys_util"), lock=lock) self._sys_util = OrchestratorAdapter(llm, system_prompt=self._prompts.get("sys_util"), lock=lock)
print(f"Sys_util loaded: {self._sys_util} (model: {sys_util_config.get("path")})") print(f"Sys_util loaded: {self._sys_util} (model: {sys_util_config.get('path')})")
print("All models loaded successfully") print("All models loaded successfully")
@ -241,6 +251,28 @@ class RuntimeController:
if async_coder: if async_coder:
self.execution_engine.set_coder(async_coder) self.execution_engine.set_coder(async_coder)
# Create MemoryRecallService using the configured model (default: sys_util)
# Reuses already-loaded async adapter — no duplicate model loading
recall_model_name = self.config.runtime.recall_model
recall_async_model = {
"sys_util": async_sys_util,
"thinker": async_thinker,
"json_compiler": async_compiler,
"critic": async_critic,
"coder": async_coder,
}.get(recall_model_name, async_sys_util)
self._recall_service = MemoryRecallService(
memory_interface=self._memory_interface,
recall_model=recall_async_model,
)
self.runtime_loop.set_recall_service(self._recall_service)
print(f"MemoryRecallService initialized with model: {recall_model_name}")
# Set memory policy in runtime loop
self.runtime_loop.set_memory_policy(self._memory_policy)
print(f"MemoryWritePolicy set: {self._memory_policy is not None}")
except Exception as e: except Exception as e:
print(f"Failed to load models at startup: {e}") print(f"Failed to load models at startup: {e}")
raise RuntimeError(f"Model loading failed: {e}") from e raise RuntimeError(f"Model loading failed: {e}") from e
@ -375,21 +407,76 @@ class RuntimeController:
def handle_task(self, task: UserTask) -> dict[str, object]: def handle_task(self, task: UserTask) -> dict[str, object]:
return self.runtime_loop.run_task(task) return self.runtime_loop.run_task(task)
def submit_task(self, task: UserTask) -> dict[str, object]:
self._background_tasks[task.task_id] = self._background_executor.submit(
self.handle_task,
task,
)
return {"task_id": task.task_id, "status": "accepted"}
def resolve_permission(self, task_id: str, decision: str) -> dict[str, object]: def resolve_permission(self, task_id: str, decision: str) -> dict[str, object]:
return self.runtime_loop.resolve_permission( return self.runtime_loop.resolve_permission(
task_id=task_id, decision=decision task_id=task_id, decision=decision
) )
def submit_permission_resolution(self, task_id: str, decision: str) -> dict[str, object]:
if not self.task_state_store.get_task(task_id):
return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
self._background_tasks[task_id] = self._background_executor.submit(
self.resolve_permission,
task_id,
decision,
)
return {"task_id": task_id, "status": "accepted"}
def resolve_secret(self, task_id: str, secret: str) -> dict[str, object]: def resolve_secret(self, task_id: str, secret: str) -> dict[str, object]:
return self.runtime_loop.resolve_secret( return self.runtime_loop.resolve_secret(
task_id=task_id, secret=secret task_id=task_id, secret=secret
) )
def submit_secret_resolution(self, task_id: str, secret: str) -> dict[str, object]:
if not self.task_state_store.get_task(task_id):
return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
self._background_tasks[task_id] = self._background_executor.submit(
self.resolve_secret,
task_id,
secret,
)
return {"task_id": task_id, "status": "accepted"}
def resolve_password(self, task_id: str, password: str) -> dict[str, object]: def resolve_password(self, task_id: str, password: str) -> dict[str, object]:
return self.runtime_loop.resolve_password( return self.runtime_loop.resolve_password(
task_id=task_id, password=password task_id=task_id, password=password
) )
def resolve_review(self, task_id: str, decision: str, correction: str | None = None) -> dict[str, object]:
return self.runtime_loop.resolve_review(
task_id=task_id,
decision=decision,
correction=correction,
)
def submit_review_resolution(self, task_id: str, decision: str, correction: str | None = None) -> dict[str, object]:
if not self.task_state_store.get_task(task_id):
return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
self._background_tasks[task_id] = self._background_executor.submit(
self.resolve_review,
task_id,
decision,
correction,
)
return {"task_id": task_id, "status": "accepted"}
def submit_password_resolution(self, task_id: str, password: str) -> dict[str, object]:
if not self.task_state_store.get_task(task_id):
return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
self._background_tasks[task_id] = self._background_executor.submit(
self.resolve_password,
task_id,
password,
)
return {"task_id": task_id, "status": "accepted"}
def handle_critic_feedback( def handle_critic_feedback(
self, self,
feedback: str, feedback: str,

View File

@ -3,16 +3,46 @@ from __future__ import annotations
import asyncio import asyncio
from app.core.context_builder import ContextBuilder from app.core.context_builder import ContextBuilder
from app.core.contracts import ExecutionDirective, PermissionDecision, PermissionRequest, RuntimeEvent, SecretRequest, TaskCheckpoint, UserTask from app.core.contracts import CriticScore, ExecutionDirective, PermissionDecision, PermissionRequest, RuntimeEvent, SecretRequest, TaskCheckpoint, UserTask
from app.core.execution_engine import ExecutionEngine from app.core.execution_engine import ExecutionEngine
from app.core.async_router import AsyncRouter from app.core.async_router import AsyncRouter
from app.events.event_bus import EventBus from app.events.event_bus import EventBus
from app.events.event_types import CHECKPOINT_SAVED, CONTEXT_BUILT, TASK_AWAITING_INPUT, TASK_AWAITING_PERMISSION, TASK_COMPLETED, TASK_FAILED, TASK_RECEIVED from app.events.event_types import CHECKPOINT_SAVED, CONTEXT_BUILT, MEMORY_RECALL_USED, MEMORY_WRITE_DECIDED, REVIEW_RESOLVED, TASK_AWAITING_INPUT, TASK_AWAITING_PERMISSION, TASK_AWAITING_REVIEW, TASK_COMPLETED, TASK_FAILED, TASK_RECEIVED
from app.core.permission_service import PermissionService from app.core.permission_service import PermissionService
from app.memory.recall import MemoryRecallService
from app.memory.write_policy import MemoryWritePolicy
from app.state.checkpoint_store import SQLiteCheckpointStore from app.state.checkpoint_store import SQLiteCheckpointStore
from app.state.task_state_store import SQLiteTaskStateStore from app.state.task_state_store import SQLiteTaskStateStore
def _build_response_directive(execution_result: dict) -> dict | None:
"""Build a response_directive from step_results or direct output for the client."""
result = execution_result.get("result", {})
# Case 1: step_results from plan execution
step_results = result.get("step_results")
if step_results:
response_parts = []
for step in step_results:
result_data = step.get("result", {})
tool_result = result_data.get("result", result_data)
if tool_result.get("ok") and tool_result.get("output"):
response_parts.append(str(tool_result["output"]))
if response_parts:
response_text = "\n\n".join(response_parts)
return ExecutionDirective(
type="respond", payload={"text": response_text}
).model_dump(mode="json")
# Case 2: direct tool output (e.g. from resolve_secret -> execute_tool)
if result.get("ok") and result.get("output"):
return ExecutionDirective(
type="respond", payload={"text": str(result["output"])}
).model_dump(mode="json")
return None
class RuntimeLoop: class RuntimeLoop:
"""Central control loop skeleton coordinating task state and events.""" """Central control loop skeleton coordinating task state and events."""
@ -26,6 +56,8 @@ class RuntimeLoop:
execution_engine: ExecutionEngine, execution_engine: ExecutionEngine,
permission_service: PermissionService, permission_service: PermissionService,
memory_interface=None, memory_interface=None,
recall_service: MemoryRecallService | None = None,
memory_policy: MemoryWritePolicy | None = None,
) -> None: ) -> None:
self._event_bus = event_bus self._event_bus = event_bus
self._task_state_store = task_state_store self._task_state_store = task_state_store
@ -35,6 +67,14 @@ class RuntimeLoop:
self._execution_engine = execution_engine self._execution_engine = execution_engine
self._permission_service = permission_service self._permission_service = permission_service
self._memory_interface = memory_interface self._memory_interface = memory_interface
self._recall_service = recall_service
self._memory_policy = memory_policy
def set_recall_service(self, recall_service: MemoryRecallService) -> None:
self._recall_service = recall_service
def set_memory_policy(self, policy: MemoryWritePolicy | None) -> None:
self._memory_policy = policy
def run_task(self, task: UserTask) -> dict[str, object]: def run_task(self, task: UserTask) -> dict[str, object]:
# Check input for hard-stop commands BEFORE processing # Check input for hard-stop commands BEFORE processing
@ -82,6 +122,23 @@ class RuntimeLoop:
context = self._context_builder.build(task=task, checkpoint=checkpoint) context = self._context_builder.build(task=task, checkpoint=checkpoint)
self._publish(task, CONTEXT_BUILT, {"keys": sorted(context.keys())}) self._publish(task, CONTEXT_BUILT, {"keys": sorted(context.keys())})
# Active memory recall: system decides if it needs to search memory
recall_result = asyncio.run(self._run_recall(task))
if recall_result["should_recall"]:
context["memory_recall"] = {
"query": recall_result["query"],
"summary": recall_result["summary"],
"entries": [
{"text": e.text, "kind": e.kind, "weight": e.weight}
for e in recall_result["results"]
],
}
self._publish(task, MEMORY_RECALL_USED, {
"query": recall_result["query"],
"results_count": len(recall_result["results"]),
"reason": recall_result["reason"],
})
directive = asyncio.run( directive = asyncio.run(
self._router.decide(state=state, context=context, task_id=task.task_id, session_id=task.session_id) self._router.decide(state=state, context=context, task_id=task.task_id, session_id=task.session_id)
) )
@ -104,15 +161,21 @@ class RuntimeLoop:
"reason": "Permission denied - требуется sudo пароль", "reason": "Permission denied - требуется sudo пароль",
"attempts": 0, "attempts": 0,
} }
elif execution_result["status"] == "awaiting_review":
state_patch["pending_permission_request"] = None
state_patch["pending_secret_request"] = None
state_patch["resolved_permission_decision"] = None
state_patch["pending_review"] = execution_result["result"]["review"]
else: else:
state_patch["pending_permission_request"] = None state_patch["pending_permission_request"] = None
state_patch["pending_secret_request"] = None state_patch["pending_secret_request"] = None
state_patch["resolved_permission_decision"] = None state_patch["resolved_permission_decision"] = None
state_patch["pending_review"] = None
self._task_state_store.update_task(task.task_id, state_patch) self._task_state_store.update_task(task.task_id, state_patch)
final_status = str(execution_result["status"]) final_status = str(execution_result["status"])
# For awaiting states - do NOT mark task as completed, keep it in pending state # For awaiting states - do NOT mark task as completed, keep it in pending state
if final_status in ("awaiting_permission", "awaiting_input", "awaiting_password"): if final_status in ("awaiting_permission", "awaiting_input", "awaiting_password", "awaiting_review"):
# Task stays in pending state, don't update to completed # Task stays in pending state, don't update to completed
pass pass
else: else:
@ -125,9 +188,9 @@ class RuntimeLoop:
) )
self._checkpoint_store.save(final_checkpoint) self._checkpoint_store.save(final_checkpoint)
# Generate response after plan execution # Generate response for user
# Case 1: step_results from plan execution
if final_status == "completed" and execution_result.get("result", {}).get("step_results"): if final_status == "completed" and execution_result.get("result", {}).get("step_results"):
# Format tool results into response
step_results = execution_result["result"]["step_results"] step_results = execution_result["result"]["step_results"]
response_parts = [] response_parts = []
for step in step_results: for step in step_results:
@ -135,16 +198,21 @@ class RuntimeLoop:
tool_result = result_data.get("result", result_data) tool_result = result_data.get("result", result_data)
if tool_result.get("ok") and tool_result.get("output"): if tool_result.get("ok") and tool_result.get("output"):
response_parts.append(tool_result["output"]) response_parts.append(tool_result["output"])
if response_parts: if response_parts:
# Create respond directive
response_text = "\n\n".join(response_parts) response_text = "\n\n".join(response_parts)
respond_directive = ExecutionDirective( execution_result["response_directive"] = ExecutionDirective(
type="respond", type="respond", payload={"text": response_text}
payload={"text": response_text}, ).model_dump(mode="json")
)
# Add to execution result # Case 2: respond directive from orchestrator (direct response, no steps)
execution_result["response_directive"] = respond_directive.model_dump(mode="json") if final_status == "completed" and not execution_result.get("response_directive"):
# Use the original directive from router.decide()
if hasattr(directive, "type") and directive.type == "respond":
if directive.payload.get("text"):
execution_result["response_directive"] = directive.model_dump(mode="json")
elif isinstance(directive, dict) and directive.get("type") == "respond":
if directive.get("payload", {}).get("text"):
execution_result["response_directive"] = directive
# Map status to terminal event type # Map status to terminal event type
if final_status == "completed": if final_status == "completed":
@ -155,6 +223,8 @@ class RuntimeLoop:
terminal_event_type = TASK_AWAITING_PERMISSION terminal_event_type = TASK_AWAITING_PERMISSION
elif final_status == "awaiting_input": elif final_status == "awaiting_input":
terminal_event_type = TASK_AWAITING_INPUT terminal_event_type = TASK_AWAITING_INPUT
elif final_status == "awaiting_review":
terminal_event_type = TASK_AWAITING_REVIEW
elif final_status == "awaiting_password": elif final_status == "awaiting_password":
terminal_event_type = TASK_AWAITING_PERMISSION terminal_event_type = TASK_AWAITING_PERMISSION
else: else:
@ -175,7 +245,10 @@ class RuntimeLoop:
"task_id": task.task_id, "task_id": task.task_id,
"status": final_status, "status": final_status,
"directive": directive.model_dump(mode="json"), "directive": directive.model_dump(mode="json"),
"result": execution_result["result"], "result": {
**execution_result["result"],
"response_directive": execution_result.get("response_directive"),
},
"events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)], "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
} }
@ -254,6 +327,9 @@ class RuntimeLoop:
"pending_secret_request": execution_result["result"].get("secret_request") "pending_secret_request": execution_result["result"].get("secret_request")
if final_status == "awaiting_input" if final_status == "awaiting_input"
else None, else None,
"pending_review": execution_result["result"].get("review")
if final_status == "awaiting_review"
else None,
"resolved_permission_decision": resolved, "resolved_permission_decision": resolved,
}, },
) )
@ -266,6 +342,8 @@ class RuntimeLoop:
terminal_event_type = TASK_AWAITING_INPUT terminal_event_type = TASK_AWAITING_INPUT
elif final_status == "awaiting_permission": elif final_status == "awaiting_permission":
terminal_event_type = TASK_AWAITING_PERMISSION terminal_event_type = TASK_AWAITING_PERMISSION
elif final_status == "awaiting_review":
terminal_event_type = TASK_AWAITING_REVIEW
else: else:
terminal_event_type = TASK_FAILED terminal_event_type = TASK_FAILED
self._publish( self._publish(
@ -283,7 +361,10 @@ class RuntimeLoop:
return { return {
"task_id": task.task_id, "task_id": task.task_id,
"status": final_status, "status": final_status,
"result": execution_result["result"], "result": {
**execution_result["result"],
"response_directive": _build_response_directive(execution_result),
},
"events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)], "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
} }
@ -314,12 +395,15 @@ class RuntimeLoop:
secret_override=secret, secret_override=secret,
) )
final_status = str(execution_result["status"]) final_status = str(execution_result["status"])
pending_review = execution_result["result"].get("review") if final_status == "awaiting_review" else None
pending_secret = execution_result["result"].get("secret_request") if final_status == "awaiting_input" else None
self._task_state_store.update_task( self._task_state_store.update_task(
task.task_id, task.task_id,
{ {
"status": final_status, "status": final_status,
"pending_secret_request": None, "pending_secret_request": pending_secret,
"resolved_permission_decision": None, "resolved_permission_decision": resolved_permission_payload if final_status == "awaiting_input" else None,
"pending_review": pending_review,
}, },
) )
checkpoint = TaskCheckpoint(task_id=task.task_id, status=final_status) checkpoint = TaskCheckpoint(task_id=task.task_id, status=final_status)
@ -331,6 +415,8 @@ class RuntimeLoop:
terminal_event_type = TASK_AWAITING_INPUT terminal_event_type = TASK_AWAITING_INPUT
elif final_status == "awaiting_permission": elif final_status == "awaiting_permission":
terminal_event_type = TASK_AWAITING_PERMISSION terminal_event_type = TASK_AWAITING_PERMISSION
elif final_status == "awaiting_review":
terminal_event_type = TASK_AWAITING_REVIEW
else: else:
terminal_event_type = TASK_FAILED terminal_event_type = TASK_FAILED
self._publish( self._publish(
@ -344,10 +430,55 @@ class RuntimeLoop:
return { return {
"task_id": task.task_id, "task_id": task.task_id,
"status": final_status, "status": final_status,
"result": execution_result["result"], "result": {
**execution_result["result"],
"response_directive": _build_response_directive(execution_result),
},
"events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)], "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
} }
def resolve_review(self, task_id: str, decision: str, correction: str | None = None) -> dict[str, object]:
state = self._task_state_store.get_task(task_id)
if not state:
return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
pending_review = state.get("pending_review")
if not pending_review:
return {"task_id": task_id, "status": "failed", "result": {"error": "No pending review"}}
task = UserTask(
task_id=task_id,
session_id=state["session_id"],
input=state["task_input"],
context={
**state.get("task_context", {}),
"previous_action_review": {
"decision": decision,
"correction": correction,
"review": pending_review,
},
},
)
self._publish(task, REVIEW_RESOLVED, {
"decision": decision,
"correction": correction,
"review": pending_review,
})
if self._memory_interface:
try:
self._memory_interface.insert(
text=f"User reviewed model action as {decision}. Correction: {correction or ''}. Review: {pending_review}",
kind="critique",
source="user",
task_id=task_id,
session_id=state["session_id"],
weight=0.9 if decision == "wrong_action" else 0.5,
metadata={"decision": decision, "review": pending_review},
)
except Exception:
pass
self._task_state_store.update_task(task_id, {"pending_review": None, "status": "replanning"})
return self.run_task(task)
def resolve_password(self, task_id: str, password: str) -> dict[str, object]: def resolve_password(self, task_id: str, password: str) -> dict[str, object]:
state = self._task_state_store.get_task(task_id) state = self._task_state_store.get_task(task_id)
if not state: if not state:
@ -445,7 +576,10 @@ class RuntimeLoop:
return { return {
"task_id": task.task_id, "task_id": task.task_id,
"status": final_status, "status": final_status,
"result": execution_result["result"], "result": {
**execution_result["result"],
"response_directive": _build_response_directive(execution_result),
},
"events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)], "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
} }
@ -459,22 +593,61 @@ class RuntimeLoop:
) )
self._event_bus.publish(event) self._event_bus.publish(event)
async def _run_recall(self, task: UserTask) -> dict:
"""Run active memory recall before orchestration."""
if not self._recall_service:
return {"should_recall": False, "reason": "no_recall_service", "query": "", "results": [], "summary": ""}
try:
return await self._recall_service.recall(task_input=task.input)
except Exception as e:
return {"should_recall": False, "reason": f"recall_error: {e}", "query": "", "results": [], "summary": ""}
def _save_to_memory(self, task: UserTask, execution_result: dict, status: str) -> None: def _save_to_memory(self, task: UserTask, execution_result: dict, status: str) -> None:
"""Save task input and result to memory for session context.""" """Save task input and result to memory for session context, using MemoryWritePolicy."""
if not self._memory_interface: if not self._memory_interface:
return return
try: try:
# Build a synthetic critic_score for policy based on task status
# For summary/tool_result without real critic, we derive from execution outcome
if status == "completed":
synthetic_score = CriticScore(
correctness=0.9, usefulness=0.8, safety=0.95,
memory_store=True, weight=0.85, explanation="Task completed successfully"
)
elif status == "failed":
synthetic_score = CriticScore(
correctness=0.2, usefulness=0.3, safety=0.7,
memory_store=True, weight=0.5, explanation="Task failed — store for learning"
)
else:
synthetic_score = CriticScore(
correctness=0.5, usefulness=0.5, safety=0.8,
memory_store=False, weight=0.3, explanation=f"Status: {status}"
)
# Save task input as summary # Save task input as summary
self._memory_interface.insert( decision = "store"
text=f"User request: {task.input}", if self._memory_policy:
kind="summary", decision = self._memory_policy.decide(
source="user", critic_score=synthetic_score,
task_id=task.task_id, memory_type="summary",
session_id=task.session_id, session_id=task.session_id,
weight=0.8, )
metadata={"status": status}, if decision in ("store", "store_with_weight"):
) weight = synthetic_score.weight if decision == "store_with_weight" else 0.8
self._memory_interface.insert(
text=f"User request: {task.input}",
kind="summary",
source="user",
task_id=task.task_id,
session_id=task.session_id,
weight=weight,
metadata={"status": status, "policy_decision": decision},
)
self._publish(task, MEMORY_WRITE_DECIDED, {
"kind": "summary", "decision": decision, "text_preview": task.input[:80]
})
# Save execution result # Save execution result
result_text = "" result_text = ""
@ -489,16 +662,27 @@ class RuntimeLoop:
result_text = f" | Error: {execution_result.get('result', {}).get('error', 'Unknown')}" result_text = f" | Error: {execution_result.get('result', {}).get('error', 'Unknown')}"
if result_text: if result_text:
self._memory_interface.insert( decision = "store"
text=f"Result: {status}{result_text}", if self._memory_policy:
kind="tool_result", decision = self._memory_policy.decide(
source="system", critic_score=synthetic_score,
task_id=task.task_id, memory_type="tool_result",
session_id=task.session_id, session_id=task.session_id,
weight=0.7, )
metadata={"status": status}, if decision in ("store", "store_with_weight"):
) weight = synthetic_score.weight if decision == "store_with_weight" else 0.7
self._memory_interface.insert(
text=f"Result: {status}{result_text}",
kind="tool_result",
source="system",
task_id=task.task_id,
session_id=task.session_id,
weight=weight,
metadata={"status": status, "policy_decision": decision},
)
self._publish(task, MEMORY_WRITE_DECIDED, {
"kind": "tool_result", "decision": decision, "text_preview": result_text[:80]
})
except Exception as e: except Exception as e:
# Log but don't fail the task
import logging import logging
logging.getLogger(__name__).warning(f"Failed to save to memory: {e}") logging.getLogger(__name__).warning(f"Failed to save to memory: {e}")

View File

@ -2,6 +2,7 @@ from __future__ import annotations
import asyncio import asyncio
from collections import defaultdict from collections import defaultdict
from dataclasses import dataclass
from app.core.contracts import RuntimeEvent from app.core.contracts import RuntimeEvent
from app.events.event_bus import EventBus from app.events.event_bus import EventBus
@ -12,7 +13,7 @@ class StreamingManager:
def __init__(self, event_bus: EventBus) -> None: def __init__(self, event_bus: EventBus) -> None:
self._event_bus = event_bus self._event_bus = event_bus
self._subscribers: dict[str, list[asyncio.Queue[RuntimeEvent]]] = defaultdict(list) self._subscribers: dict[str, list[StreamSubscriber]] = defaultdict(list)
self._event_bus.subscribe(self._on_event) self._event_bus.subscribe(self._on_event)
def replay_events(self, task_id: str) -> list[RuntimeEvent]: def replay_events(self, task_id: str) -> list[RuntimeEvent]:
@ -20,17 +21,26 @@ class StreamingManager:
def subscribe(self, task_id: str) -> asyncio.Queue[RuntimeEvent]: def subscribe(self, task_id: str) -> asyncio.Queue[RuntimeEvent]:
queue: asyncio.Queue[RuntimeEvent] = asyncio.Queue() queue: asyncio.Queue[RuntimeEvent] = asyncio.Queue()
self._subscribers[task_id].append(queue) self._subscribers[task_id].append(
StreamSubscriber(loop=asyncio.get_running_loop(), queue=queue)
)
return queue return queue
def unsubscribe(self, task_id: str, queue: asyncio.Queue[RuntimeEvent]) -> None: def unsubscribe(self, task_id: str, queue: asyncio.Queue[RuntimeEvent]) -> None:
listeners = self._subscribers.get(task_id, []) listeners = self._subscribers.get(task_id, [])
if queue in listeners: for listener in list(listeners):
listeners.remove(queue) if listener.queue is queue:
listeners.remove(listener)
break
if not listeners and task_id in self._subscribers: if not listeners and task_id in self._subscribers:
del self._subscribers[task_id] del self._subscribers[task_id]
def _on_event(self, event: RuntimeEvent) -> None: def _on_event(self, event: RuntimeEvent) -> None:
for queue in self._subscribers.get(event.task_id, []): for listener in list(self._subscribers.get(event.task_id, [])):
queue.put_nowait(event) listener.loop.call_soon_threadsafe(listener.queue.put_nowait, event)
@dataclass
class StreamSubscriber:
loop: asyncio.AbstractEventLoop
queue: asyncio.Queue[RuntimeEvent]

View File

@ -5,6 +5,20 @@ from app.tools.base import BaseTool
from app.tools.sandbox import ToolSandbox from app.tools.sandbox import ToolSandbox
def _detect_sudo_auth_failure(output: str) -> bool:
normalized = output.lower()
return any(
marker in normalized
for marker in (
"incorrect password",
"incorrect password attempt",
"sudo: no password was provided",
"sorry, try again",
"authentication failure",
)
)
class Tool(BaseTool): class Tool(BaseTool):
name = "shell_exec" name = "shell_exec"
description = "Execute shell commands" description = "Execute shell commands"
@ -18,16 +32,24 @@ class Tool(BaseTool):
return ToolResult(tool=self.name, ok=False, error="Missing command", metadata={"exit_code": -1}) return ToolResult(tool=self.name, ok=False, error="Missing command", metadata={"exit_code": -1})
cwd = args.get("cwd") cwd = args.get("cwd")
stdin_secret = args.get("stdin_secret") stdin_secret = args.get("stdin_secret")
output_callback = args.get("__output_callback")
completed = self._sandbox.run_shell( completed = self._sandbox.run_shell(
command=command, command=command,
cwd=str(cwd) if cwd else None, cwd=str(cwd) if cwd else None,
stdin_data=str(stdin_secret) if stdin_secret is not None else None, stdin_data=str(stdin_secret) if stdin_secret is not None else None,
output_callback=output_callback if callable(output_callback) else None,
) )
output = completed.stdout if completed.returncode == 0 else completed.stderr or completed.stdout output = completed.stdout if completed.returncode == 0 else completed.stderr or completed.stdout
sudo_auth_failed = completed.returncode != 0 and _detect_sudo_auth_failure(
f"{completed.stdout}\n{completed.stderr}"
)
return ToolResult( return ToolResult(
tool=self.name, tool=self.name,
ok=completed.returncode == 0, ok=completed.returncode == 0,
output=output, output=output,
error=None if completed.returncode == 0 else f"Command failed with exit code {completed.returncode}", error=None if completed.returncode == 0 else f"Command failed with exit code {completed.returncode}",
metadata={"exit_code": completed.returncode}, metadata={
"exit_code": completed.returncode,
"sudo_auth_failed": sudo_auth_failed,
},
) )

View File

@ -1,16 +1,28 @@
from __future__ import annotations from __future__ import annotations
import os import os
import signal
import subprocess import subprocess
import threading
import time
from pathlib import Path from pathlib import Path
from typing import Callable
class ToolSandbox: class ToolSandbox:
"""Applies simple working directory and timeout restrictions.""" """Applies simple working directory and timeout restrictions."""
def __init__(self, allowed_root: str | Path, timeout_ms: int) -> None: def __init__(
self,
allowed_root: str | Path,
timeout_ms: int,
command_timeout_ms: int | None = None,
idle_timeout_ms: int | None = None,
) -> None:
self._allowed_root = Path(allowed_root).resolve() self._allowed_root = Path(allowed_root).resolve()
self._timeout_seconds = max(timeout_ms / 1000, 1) self._timeout_seconds = max(timeout_ms / 1000, 0.001)
self._command_timeout_seconds = max((command_timeout_ms or timeout_ms) / 1000, 0.001)
self._idle_timeout_seconds = max((idle_timeout_ms or timeout_ms) / 1000, 0.001)
def ensure_path_allowed(self, path: str | Path) -> Path: def ensure_path_allowed(self, path: str | Path) -> Path:
resolved = Path(path).expanduser().resolve() resolved = Path(path).expanduser().resolve()
@ -23,17 +35,105 @@ class ToolSandbox:
command: str, command: str,
cwd: str | Path | None = None, cwd: str | Path | None = None,
stdin_data: str | None = None, stdin_data: str | None = None,
output_callback: Callable[[str, str], None] | None = None,
) -> subprocess.CompletedProcess[str]: ) -> subprocess.CompletedProcess[str]:
working_directory = self.ensure_path_allowed(cwd or self._allowed_root) working_directory = self.ensure_path_allowed(cwd or self._allowed_root)
env = {"PATH": os.environ.get("PATH", "")} env = {"PATH": os.environ.get("PATH", "")}
return subprocess.run( if output_callback is None:
return subprocess.run(
command,
shell=True,
cwd=str(working_directory),
env=env,
text=True,
capture_output=True,
input=stdin_data,
timeout=self._command_timeout_seconds,
check=False,
)
process = subprocess.Popen(
command, command,
shell=True, shell=True,
cwd=str(working_directory), cwd=str(working_directory),
env=env, env=env,
text=True, text=True,
capture_output=True, stdin=subprocess.PIPE if stdin_data is not None else None,
input=stdin_data, stdout=subprocess.PIPE,
timeout=self._timeout_seconds, stderr=subprocess.PIPE,
check=False, start_new_session=True,
)
stdout_chunks: list[str] = []
stderr_chunks: list[str] = []
output_lock = threading.Lock()
last_output_at = time.monotonic()
if stdin_data is not None and process.stdin is not None:
process.stdin.write(stdin_data)
process.stdin.close()
def read_stream(stream_name: str) -> None:
stream = process.stdout if stream_name == "stdout" else process.stderr
if stream is None:
return
chunks = stdout_chunks if stream_name == "stdout" else stderr_chunks
try:
for line in iter(stream.readline, ""):
if not line:
break
chunks.append(line)
nonlocal last_output_at
with output_lock:
last_output_at = time.monotonic()
output_callback(stream_name, line)
finally:
stream.close()
stdout_thread = threading.Thread(target=read_stream, args=("stdout",), daemon=True)
stderr_thread = threading.Thread(target=read_stream, args=("stderr",), daemon=True)
stdout_thread.start()
stderr_thread.start()
timed_out = False
timeout_reason: str | None = None
started_at = time.monotonic()
return_code: int | None = None
while return_code is None:
return_code = process.poll()
if return_code is not None:
break
now = time.monotonic()
with output_lock:
idle_for = now - last_output_at
if now - started_at > self._command_timeout_seconds:
timed_out = True
timeout_reason = f"Command timed out after {self._command_timeout_seconds:.0f}s"
break
if idle_for > self._idle_timeout_seconds:
timed_out = True
timeout_reason = f"Command produced no output for {self._idle_timeout_seconds:.0f}s"
break
time.sleep(0.1)
if timed_out:
try:
os.killpg(process.pid, signal.SIGKILL)
except ProcessLookupError:
pass
except PermissionError:
process.kill()
return_code = process.wait()
timeout_message = f"{timeout_reason}\n"
stderr_chunks.append(timeout_message)
output_callback("stderr", timeout_message)
stdout_thread.join(timeout=1)
stderr_thread.join(timeout=1)
return subprocess.CompletedProcess(
args=command,
returncode=return_code if not timed_out else -9,
stdout="".join(stdout_chunks),
stderr="".join(stderr_chunks),
) )

View File

@ -5,6 +5,22 @@ from app.tools.base import BaseTool
from app.tools.sandbox import ToolSandbox from app.tools.sandbox import ToolSandbox
def _detect_sudo_auth_failure(output: str) -> bool:
normalized = output.lower()
return any(
marker in normalized
for marker in (
"incorrect password",
"incorrect password attempt",
"sudo: no password was provided",
"sudo: password incorrect",
"sorry, try again",
"authentication failure",
"wrong password",
)
)
class ShellExecTool(BaseTool): class ShellExecTool(BaseTool):
name = "shell_exec" name = "shell_exec"
@ -18,6 +34,7 @@ class ShellExecTool(BaseTool):
cwd = args.get("cwd") cwd = args.get("cwd")
stdin_secret = args.get("stdin_secret") stdin_secret = args.get("stdin_secret")
password = args.get("password") password = args.get("password")
output_callback = args.get("__output_callback")
if password: if password:
command = f'echo "{password}" | sudo -S {command}' command = f'echo "{password}" | sudo -S {command}'
@ -26,21 +43,23 @@ class ShellExecTool(BaseTool):
command=command, command=command,
cwd=str(cwd) if cwd else None, cwd=str(cwd) if cwd else None,
stdin_data=str(stdin_secret) if stdin_secret is not None else None, stdin_data=str(stdin_secret) if stdin_secret is not None else None,
output_callback=output_callback if callable(output_callback) else None,
) )
output = completed.stdout if completed.returncode == 0 else completed.stderr or completed.stdout output = completed.stdout if completed.returncode == 0 else completed.stderr or completed.stdout
error_output = completed.stderr or completed.stdout error_output = completed.stderr or completed.stdout
is_sudo_error = ( sudo_auth_failed = completed.returncode != 0 and _detect_sudo_auth_failure(
completed.returncode != 0 and f"{completed.stdout}\n{completed.stderr}"
("permission denied" in error_output.lower() or
"incorrect password" in error_output.lower() or
"sudo: password incorrect" in error_output.lower() or
"wrong password" in error_output.lower())
) )
needs_sudo = completed.returncode != 0 and "permission denied" in error_output.lower() and not sudo_auth_failed
return ToolResult( return ToolResult(
tool=self.name, tool=self.name,
ok=completed.returncode == 0, ok=completed.returncode == 0,
output=output, output=output,
error=None if completed.returncode == 0 else f"Command failed with exit code {completed.returncode}", error=None if completed.returncode == 0 else f"Command failed with exit code {completed.returncode}",
metadata={"exit_code": completed.returncode, "needs_sudo": is_sudo_error}, metadata={
"exit_code": completed.returncode,
"needs_sudo": needs_sudo,
"sudo_auth_failed": sudo_auth_failed,
},
) )

42
config/models.json.backup Normal file
View File

@ -0,0 +1,42 @@
{
"thinker": {
"path": "Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf",
"backend": "vulkan",
"n_gpu_layers": -1,
"max_tokens": 2048,
"temperature": 0.3
},
"json_compiler": {
"path": "gemma-4-E4B-it-Q4_K_M.gguf",
"backend": "cpu",
"n_gpu_layers": 0,
"max_tokens": 1024,
"temperature": 0.1
},
"coder": {
"path": "X-Coder-SFT-Qwen3-8B.Q6_K.gguf",
"backend": "cpu",
"n_gpu_layers": 0,
"max_tokens": 2048,
"temperature": 0.2
},
"critic": {
"path": "gemma-4-E4B-it-Q4_K_M.gguf",
"backend": "cpu",
"n_gpu_layers": 0,
"max_tokens": 1024,
"temperature": 0.1
},
"sys_util": {
"path": "Menlo_Lucy-Q4_K_M.gguf",
"backend": "cpu",
"n_gpu_layers": 0,
"max_tokens": 1024,
"temperature": 0.1
},
"embeddings": {
"path": "all-MiniLM-L6-v2",
"model_name": "sentence-transformers/all-MiniLM-L6-v2",
"embedding_dim": 384
}
}

42
config/models.json.test Normal file
View File

@ -0,0 +1,42 @@
{
"thinker": {
"path": "Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf",
"backend": "vulkan",
"n_gpu_layers": -1,
"max_tokens": 2048,
"temperature": 0.3
},
"json_compiler": {
"path": "gemma-4-E4B-it-Q4_K_M.gguf",
"backend": "cpu",
"n_gpu_layers": 0,
"max_tokens": 1024,
"temperature": 0.1
},
"coder": {
"path": "X-Coder-SFT-Qwen3-8B.Q6_K.gguf",
"backend": "cpu",
"n_gpu_layers": 0,
"max_tokens": 2048,
"temperature": 0.2
},
"critic": {
"path": "gemma-4-E4B-it-Q4_K_M.gguf",
"backend": "cpu",
"n_gpu_layers": 0,
"max_tokens": 1024,
"temperature": 0.1
},
"sys_util": {
"path": "Menlo_Lucy-Q4_K_M.gguf",
"backend": "cpu",
"n_gpu_layers": 0,
"max_tokens": 1024,
"temperature": 0.1
},
"embeddings": {
"path": "all-MiniLM-L6-v2",
"model_name": "sentence-transformers/all-MiniLM-L6-v2",
"embedding_dim": 384
}
}

View File

@ -35,6 +35,12 @@
"chmod -R 000", "chmod -R 000",
"chmod -R 777", "chmod -R 777",
"chown -R", "chown -R",
"apt",
"apt-get",
"dpkg",
"yum",
"dnf",
"pacman",
"shutdown", "shutdown",
"reboot", "reboot",
"halt", "halt",

View File

@ -1,8 +1,15 @@
{ {
"thinker": "You are the orchestrator of a local AI agent runtime. Your job is to analyze the user's task and decide how to execute it.\n\n## Decision Types\n\n1. **Direct response** — for simple questions, greetings, conversations:\n {\"type\": \"respond\", \"payload\": {\"text\": \"your answer\"}}\n\n2. **Single tool step** — for simple tasks needing one tool:\n {\"type\": \"step\", \"payload\": {\"tool\": \"shell_exec\", \"args\": {\"command\": \"...\"}}}\n\n3. **Multi-step plan** — for complex tasks that need decomposition:\n {\"type\": \"plan\", \"payload\": {\"steps\": [\n {\"id\": \"step-1\", \"tool\": \"shell_exec\", \"args\": {\"command\": \"...\"}, \"description\": \"...\", \"depends_on\": []},\n {\"id\": \"step-2\", \"tool\": \"file_read\", \"args\": {\"path\": \"...\"}, \"description\": \"...\", \"depends_on\": [\"step-1\"]}\n ]}}\n\n## When to use multi-step plan\n- Task requires multiple operations (search → read → write)\n- Task involves checking prerequisites before acting\n- Task requires gathering information before producing result\n- User asks to do something complex (setup, configure, analyze)\n\n## Memory\n- If memory recall results are provided, USE them to inform your decisions\n- If you know something from memory, mention it in step descriptions\n- Store important results for future use\n\n## Rules\n- ALWAYS respond with valid JSON only\n- Each step MUST have a unique id\n- Use depends_on for ordering constraints\n- Keep steps focused — one action per step\n- If unsure, start with an information-gathering step\n- Respond ONLY with valid JSON, no explanations",
"orchestrator": "You are an expert orchestrator for a local AI agent system. Your role is to analyze the user's task and generate executable runtime steps.\n\nTool selection (choose the right tool):\n- shell_exec: for running commands, checking programs exist ('which', '--version'), searching files\n- file_read: for reading contents of a file (must be existing file path)\n- file_write: for creating or updating files\n- memory: for storing or searching memory\n\nSTRICT OUTPUT FORMAT - MUST follow exactly:\n\nSingle step:\n{\"type\": \"step\", \"payload\": {\"tool\": \"shell_exec\", \"args\": {\"command\": \"...\"}}}\n{\"type\": \"step\", \"payload\": {\"tool\": \"file_read\", \"args\": {\"path\": \"...\"}}}\n{\"type\": \"step\", \"payload\": {\"tool\": \"file_write\", \"args\": {\"path\": \"...\", \"content\": \"...\"}}}\n\nMulti-step plan:\n{\"type\": \"plan\", \"payload\": {\"steps\": [{\"tool\": \"file_read\", \"args\": {\"path\": \"...\"}, \"description\": \"...\", \"depends_on\": []}]}}\n\nDirect response:\n{\"type\": \"respond\", \"payload\": {\"text\": \"...\"}}\n\nIMPORTANT:\n- Use exactly {\"type\": \"step|plan|respond\", \"payload\": {...}} format\n- Do NOT output array alone\n- Do NOT use \"kind\" - use \"type\"\n- Respond ONLY with valid JSON\n- Your response MUST be complete valid JSON - the closing brace } MUST be present\n- Do NOT truncate your response - if you cannot fit all steps, use a single step\n\nTool selection:\n- For checking if a program/command exists: use shell_exec with 'which <program>' or '<program> --version'\n- For reading file contents: use file_read with path to file (NOT command)\n- For executing any command: use shell_exec\n- Previous experience (from memory) may help - consider it but YOU decide how to proceed", "orchestrator": "You are an expert orchestrator for a local AI agent system. Your role is to analyze the user's task and generate executable runtime steps.\n\nTool selection (choose the right tool):\n- shell_exec: for running commands, checking programs exist ('which', '--version'), searching files\n- file_read: for reading contents of a file (must be existing file path)\n- file_write: for creating or updating files\n- memory: for storing or searching memory\n\nSTRICT OUTPUT FORMAT - MUST follow exactly:\n\nSingle step:\n{\"type\": \"step\", \"payload\": {\"tool\": \"shell_exec\", \"args\": {\"command\": \"...\"}}}\n{\"type\": \"step\", \"payload\": {\"tool\": \"file_read\", \"args\": {\"path\": \"...\"}}}\n{\"type\": \"step\", \"payload\": {\"tool\": \"file_write\", \"args\": {\"path\": \"...\", \"content\": \"...\"}}}\n\nMulti-step plan:\n{\"type\": \"plan\", \"payload\": {\"steps\": [{\"tool\": \"file_read\", \"args\": {\"path\": \"...\"}, \"description\": \"...\", \"depends_on\": []}]}}\n\nDirect response:\n{\"type\": \"respond\", \"payload\": {\"text\": \"...\"}}\n\nIMPORTANT:\n- Use exactly {\"type\": \"step|plan|respond\", \"payload\": {...}} format\n- Do NOT output array alone\n- Do NOT use \"kind\" - use \"type\"\n- Respond ONLY with valid JSON\n- Your response MUST be complete valid JSON - the closing brace } MUST be present\n- Do NOT truncate your response - if you cannot fit all steps, use a single step\n\nTool selection:\n- For checking if a program/command exists: use shell_exec with 'which <program>' or '<program> --version'\n- For reading file contents: use file_read with path to file (NOT command)\n- For executing any command: use shell_exec\n- Previous experience (from memory) may help - consider it but YOU decide how to proceed",
"planning": "You are a planning specialist. Generate execution plans.\n\nOutput MUST be:\n{\"type\": \"plan\", \"version\": \"1.0\", \"payload\": {\"steps\": [{\"tool\": \"\", \"args\": {}, \"description\": \"...\", \"depends_on\": []}]}}\n\nRules:\n- Each step must have unique id (auto-generated)\n- Use \"depends_on\" for step ordering\n- Use \"tool\" for tool operations\n- Respond ONLY with valid JSON", "planning": "You are a planning specialist. Generate execution plans.\n\nOutput MUST be:\n{\"type\": \"plan\", \"version\": \"1.0\", \"payload\": {\"steps\": [{\"tool\": \"\", \"args\": {}, \"description\": \"...\", \"depends_on\": []}]}}\n\nRules:\n- Each step must have unique id (auto-generated)\n- Use \"depends_on\" for step ordering\n- Use \"tool\" for tool operations\n- Respond ONLY with valid JSON",
"coder": "You are an expert code generation model.\n\nOutput format:\n{\"type\": \"code\", \"payload\": {\"language\": \"python\", \"content\": \"...\"}}\n\nOR for completion:\n{\"type\": \"respond\", \"payload\": {\"text\": \"...\"}}\n\nGenerate clean, working code. Respond ONLY with valid JSON.", "coder": "You are an expert code generation model.\n\nOutput format:\n{\"type\": \"code\", \"payload\": {\"language\": \"python\", \"content\": \"...\"}}\n\nOR for completion:\n{\"type\": \"respond\", \"payload\": {\"text\": \"...\"}}\n\nGenerate clean, working code. Respond ONLY with valid JSON.",
"critic": "You are a critic model. Evaluate tool execution results.\n\nScoring criteria:\n- correctness: 0-1 (does result accomplish task?)\n- usefulness: 0-1 (is result useful?)\n- safety: 0-1 (is result safe?)\n- suggest_memory: boolean (should this be stored in memory?)\n- weight: 0-1 (importance score)\n- explanation: brief reasoning\n\nOutput format:\n{\"type\": \"evaluation\", \"payload\": {\"correctness\": 0.0-1.0, \"usefulness\": 0.0-1.0, \"safety\": 0.0-1.0, \"suggest_memory\": true|false, \"weight\": 0.0-1.0, \"explanation\": \"...\"}}\n\nRespond ONLY with valid JSON.", "critic": "You are a critic model. Evaluate tool execution results.\n\nScoring criteria:\n- correctness: 0-1 (does result accomplish task?)\n- usefulness: 0-1 (is result useful?)\n- safety: 0-1 (is result safe?)\n- suggest_memory: boolean (should this be stored in memory?)\n- weight: 0-1 (importance score)\n- explanation: brief reasoning\n\nOutput format:\n{\"type\": \"evaluation\", \"payload\": {\"correctness\": 0.0-1.0, \"usefulness\": 0.0-1.0, \"safety\": 0.0-1.0, \"suggest_memory\": true|false, \"weight\": 0.0-1.0, \"explanation\": \"...\"}}\n\nRespond ONLY with valid JSON.",
"system": "You are ducklm, a local AI agent runtime.\n\nSTRICT RULES:\n- You MUST strictly follow execution schemas\n- You are NOT allowed to output free-form text\n- All outputs MUST be valid JSON matching runtime contracts\n- Use exact tool names from available tool set\n\nCurrent capabilities:\n- Execute shell commands (shell_exec)\n- Read/write files (file_read, file_write)\n- Memory operations (memory)\n\nAlways respond with valid JSON.", "system": "You are ducklm, a local AI agent runtime.\n\nSTRICT RULES:\n- You MUST strictly follow execution schemas\n- You are NOT allowed to output free-form text\n- All outputs MUST be valid JSON matching runtime contracts\n- Use exact tool names from available tool set\n\nCurrent capabilities:\n- Execute shell commands (shell_exec)\n- Read/write files (file_read, file_write)\n- Memory operations (memory)\n\nAlways respond with valid JSON.",
"sys_util": "You are a STRICT JSON repair engine inside a production AI runtime.\nYour job is ONLY to fix invalid JSON syntax.\nYou are NOT allowed to:\n- change meaning of data\n- add new fields\n- remove valid fields\n- interpret intent\n- explain anything\n- reformat structure logically\n---\nINPUT:\nYou receive a malformed or invalid JSON string.\n---\nOUTPUT RULES:\n- Output ONLY valid JSON\n- No markdown\n- No comments\n- No explanations\n- No extra text\n---\nREPAIR RULES (STRICT):\nFix ONLY syntax issues:\n- missing or extra commas\n- missing quotes\n- incorrect brackets\n- trailing commas\n- invalid escaping\n- broken strings\n- unbalanced braces\nDO NOT:\n- rename keys\n- reorder fields intentionally\n- guess missing semantic data\n- \"improve\" structure\n---\nIMPORTANT:\nIf multiple valid repairs exist:\n\u2192 choose the minimal change that makes JSON valid\n---\nOUTPUT MUST BE VALID JSON OR NOTHING ELSE\nInvalid JSON:"
"sys_util": "You are a STRICT JSON repair engine inside a production AI runtime.\nYour job is ONLY to fix invalid JSON syntax.\nYou are NOT allowed to:\n- change meaning of data\n- add new fields\n- remove valid fields\n- interpret intent\n- explain anything\n- reformat structure logically\n---\nINPUT:\nYou receive a malformed or invalid JSON string.\n---\nOUTPUT RULES:\n- Output ONLY valid JSON\n- No markdown\n- No comments\n- No explanations\n- No extra text\n---\nREPAIR RULES (STRICT):\nFix ONLY syntax issues:\n- missing or extra commas\n- missing quotes\n- incorrect brackets\n- trailing commas\n- invalid escaping\n- broken strings\n- unbalanced braces\nDO NOT:\n- rename keys\n- reorder fields intentionally\n- guess missing semantic data\n- \"improve\" structure\n---\nIMPORTANT:\nIf multiple valid repairs exist:\n→ choose the minimal change that makes JSON valid\n---\nOUTPUT MUST BE VALID JSON OR NOTHING ELSE\nInvalid JSON:"
} }

View File

@ -1,6 +1,8 @@
{ {
"step_timeout_ms": 30000, "step_timeout_ms": 30000,
"task_timeout_ms": 300000, "task_timeout_ms": 300000,
"shell_command_timeout_ms": 3600000,
"shell_idle_timeout_ms": 600000,
"planner_retry_limit": 2, "planner_retry_limit": 2,
"tool_retry_limit": 1, "tool_retry_limit": 1,
"replan_limit": 1, "replan_limit": 1,
@ -34,5 +36,7 @@
"debug_orchestrator_log_length": 500, "debug_orchestrator_log_length": 500,
"json_fix_retry_limit": 2, "json_fix_retry_limit": 2,
"json_fix_use_sys_util": true, "json_fix_use_sys_util": true,
"intent_classifier": "thinker" "intent_classifier": "thinker",
"recall_model": "json_compiler",
"critic_retry_limit": 2
} }

View File

@ -0,0 +1,24 @@
# UI Bootstrap And Review Flow Plan
## Goal
Move the web chat UI to Bootstrap 5.3 with Bootswatch themes and improve review/password/terminal-output ergonomics.
## Required Changes
- Replace the current hand-written visual system in `app/api/static/index.html` with Bootstrap 5.3 layout/components.
- Add Bootswatch theme support with a visible theme selector and persistent localStorage choice.
- Password/secret input must submit on Enter as well as the "Отправить" button.
- Console/tool output must render inside a collapsed Bootstrap accordion item.
- The accordion body must contain terminal-style output inside `<pre></pre>`.
- The terminal accordion must expand only when the user clicks it.
- Review UI must show critic/system assessment and user voting buttons:
- `Ошибочное действие`
- `Всё верно`
- optional correction/comment text.
## Notes
- Keep runtime event handling WebSocket-driven.
- Do not mix console output with assistant prose.
- Keep raw tool output available for debugging, but collapsed by default.

BIN
favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

274
server.err Normal file
View File

@ -0,0 +1,274 @@
Loading weights: 0%| | 0/103 [00:00<?, ?it/s] Loading weights: 100%|██████████| 103/103 [00:00<00:00, 5627.96it/s]
INFO: Started server process [221205]
INFO: Waiting for application startup.
llama_context: n_ctx_seq (4096) < n_ctx_train (262144) -- the full capacity of the model will not be utilized
llama_context: n_ctx_seq (4096) < n_ctx_train (131072) -- the full capacity of the model will not be utilized
llama_kv_cache_iswa: using full-size SWA cache (ref: https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
llama_kv_cache: the V embeddings have different sizes across layers and FA is not enabled - padding V cache to 1024
llama_kv_cache: the V embeddings have different sizes across layers and FA is not enabled - padding V cache to 1024
llama_context: n_ctx_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
llama_context: n_ctx_seq (4096) < n_ctx_train (40960) -- the full capacity of the model will not be utilized
INFO: Application startup complete.
INFO: Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/uvicorn/protocols/http/h11_impl.py", line 415, in run_asgi
result = await app( # type: ignore[func-returns-value]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
self.scope, self.receive, self.send
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
return await self.app(scope, receive, send)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/applications.py", line 1159, in __call__
await super().__call__(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/applications.py", line 90, in __call__
await self.middleware_stack(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/errors.py", line 186, in __call__
raise exc
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/errors.py", line 164, in __call__
await self.app(scope, receive, _send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/exceptions.py", line 63, in __call__
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/middleware/asyncexitstack.py", line 18, in __call__
await self.app(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 660, in __call__
await self.middleware_stack(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 680, in app
await route.handle(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 276, in handle
await self.app(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 134, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 120, in app
response = await f(request)
^^^^^^^^^^^^^^^^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 674, in app
raw_response = await run_endpoint_function(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
...<3 lines>...
)
^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 330, in run_endpoint_function
return await run_in_threadpool(dependant.call, **values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/concurrency.py", line 32, in run_in_threadpool
return await anyio.to_thread.run_sync(func)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/to_thread.py", line 63, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
func, args, abandon_on_cancel=abandon_on_cancel, limiter=limiter
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 2518, in run_sync_in_worker_thread
return await future
^^^^^^^^^^^^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 1002, in run
result = context.run(func, *args)
File "/home/mirivlad/git/ducklm/app/api/server.py", line 103, in resolve_secret
return runtime.resolve_secret(task_id=request.task_id, secret=request.secret)
~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mirivlad/git/ducklm/app/runtime/runtime_controller.py", line 408, in resolve_secret
return self.runtime_loop.resolve_secret(
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
task_id=task_id, secret=secret
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "/home/mirivlad/git/ducklm/app/runtime/runtime_loop.py", line 378, in resolve_secret
execution_result = self._execution_engine.execute(
task=task,
...<2 lines>...
secret_override=secret,
)
File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 87, in execute
return self._execute_plan(
~~~~~~~~~~~~~~~~~~^
task=task,
^^^^^^^^^^
...<3 lines>...
password_override=password_override,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 211, in _execute_plan
result = self._execute_tool(
task=task,
...<3 lines>...
password_override=password_override,
)
File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 824, in _execute_tool
tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args)
File "/home/mirivlad/git/ducklm/app/tools/plugins/shell_exec/__init__.py", line 21, in execute
completed = self._sandbox.run_shell(
command=command,
cwd=str(cwd) if cwd else None,
stdin_data=str(stdin_secret) if stdin_secret is not None else None,
)
File "/home/mirivlad/git/ducklm/app/tools/sandbox.py", line 29, in run_shell
return subprocess.run(
~~~~~~~~~~~~~~^
command,
^^^^^^^^
...<7 lines>...
check=False,
^^^^^^^^^^^^
)
^
File "/usr/lib/python3.13/subprocess.py", line 556, in run
stdout, stderr = process.communicate(input, timeout=timeout)
~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.13/subprocess.py", line 1222, in communicate
stdout, stderr = self._communicate(input, endtime, timeout)
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.13/subprocess.py", line 2129, in _communicate
self._check_timeout(endtime, orig_timeout, stdout, stderr)
~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.13/subprocess.py", line 1269, in _check_timeout
raise TimeoutExpired(
...<2 lines>...
stderr=b''.join(stderr_seq) if stderr_seq else None)
subprocess.TimeoutExpired: Command 'sudo -S -p '' apt update && apt upgrade -y' timed out after 30.0 seconds
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/uvicorn/protocols/http/h11_impl.py", line 415, in run_asgi
result = await app( # type: ignore[func-returns-value]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
self.scope, self.receive, self.send
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
return await self.app(scope, receive, send)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/applications.py", line 1159, in __call__
await super().__call__(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/applications.py", line 90, in __call__
await self.middleware_stack(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/errors.py", line 186, in __call__
raise exc
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/errors.py", line 164, in __call__
await self.app(scope, receive, _send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/exceptions.py", line 63, in __call__
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/middleware/asyncexitstack.py", line 18, in __call__
await self.app(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 660, in __call__
await self.middleware_stack(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 680, in app
await route.handle(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 276, in handle
await self.app(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 134, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 120, in app
response = await f(request)
^^^^^^^^^^^^^^^^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 674, in app
raw_response = await run_endpoint_function(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
...<3 lines>...
)
^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 330, in run_endpoint_function
return await run_in_threadpool(dependant.call, **values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/concurrency.py", line 32, in run_in_threadpool
return await anyio.to_thread.run_sync(func)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/to_thread.py", line 63, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
func, args, abandon_on_cancel=abandon_on_cancel, limiter=limiter
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 2518, in run_sync_in_worker_thread
return await future
^^^^^^^^^^^^
File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 1002, in run
result = context.run(func, *args)
File "/home/mirivlad/git/ducklm/app/api/server.py", line 103, in resolve_secret
return runtime.resolve_secret(task_id=request.task_id, secret=request.secret)
~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mirivlad/git/ducklm/app/runtime/runtime_controller.py", line 408, in resolve_secret
return self.runtime_loop.resolve_secret(
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
task_id=task_id, secret=secret
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "/home/mirivlad/git/ducklm/app/runtime/runtime_loop.py", line 378, in resolve_secret
execution_result = self._execution_engine.execute(
task=task,
...<2 lines>...
secret_override=secret,
)
File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 87, in execute
return self._execute_plan(
~~~~~~~~~~~~~~~~~~^
task=task,
^^^^^^^^^^
...<3 lines>...
password_override=password_override,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 211, in _execute_plan
result = self._execute_tool(
task=task,
...<3 lines>...
password_override=password_override,
)
File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 824, in _execute_tool
tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args)
File "/home/mirivlad/git/ducklm/app/tools/plugins/shell_exec/__init__.py", line 21, in execute
completed = self._sandbox.run_shell(
command=command,
cwd=str(cwd) if cwd else None,
stdin_data=str(stdin_secret) if stdin_secret is not None else None,
)
File "/home/mirivlad/git/ducklm/app/tools/sandbox.py", line 29, in run_shell
return subprocess.run(
~~~~~~~~~~~~~~^
command,
^^^^^^^^
...<7 lines>...
check=False,
^^^^^^^^^^^^
)
^
File "/usr/lib/python3.13/subprocess.py", line 556, in run
stdout, stderr = process.communicate(input, timeout=timeout)
~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.13/subprocess.py", line 1222, in communicate
stdout, stderr = self._communicate(input, endtime, timeout)
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.13/subprocess.py", line 2129, in _communicate
self._check_timeout(endtime, orig_timeout, stdout, stderr)
~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.13/subprocess.py", line 1269, in _check_timeout
raise TimeoutExpired(
...<2 lines>...
stderr=b''.join(stderr_seq) if stderr_seq else None)
subprocess.TimeoutExpired: Command 'sudo -S -p '' apt update && apt upgrade -y' timed out after 30.0 seconds

254
server.out Normal file
View File

@ -0,0 +1,254 @@
Models policy ready
Registered tool: file_write
Registered tool: shell_exec
Registered tool: memory
Registered tool: file_read
Lifespan: Starting model loading...
Lifespan: Loading models...
Loading thinker model...
Thinker loaded: <app.models.orchestrator.OrchestratorAdapter object at 0x7f1db5b6cc20> (model: Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf)
Loading json_compiler model...
JSON Compiler loaded: <app.models.orchestrator.OrchestratorAdapter object at 0x7f1db5b7bb10> (model: gemma-4-E4B-it-Q4_K_M.gguf)
Loading coder model...
Coder loaded: <app.models.coder.CoderAdapter object at 0x7f1db5b6d2b0> (model: X-Coder-SFT-Qwen3-8B.Q6_K.gguf)
Loading critic model...
Reusing model instance: gemma-4-E4B-it-Q4_K_M.gguf for critic
Critic loaded: <app.models.critic.CriticAdapter object at 0x7f1db5b6d160> (model: gemma-4-E4B-it-Q4_K_M.gguf)
Loading sys_util model...
Sys_util loaded: <app.models.orchestrator.OrchestratorAdapter object at 0x7f1db30ec2d0> (model: Menlo_Lucy-Q4_K_M.gguf)
All models loaded successfully
MemoryRecallService initialized with model: json_compiler
MemoryWritePolicy set: True
Lifespan: Models loaded
Lifespan: Rebuilding vector index (289 entries)...
Lifespan: Vector index rebuilt
INFO: 127.0.0.1:47236 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:47238 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:47240 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:45740 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:45754 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:41296 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:41304 - "GET / HTTP/1.1" 200 OK
INFO: 127.0.0.1:41304 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:41304 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO: 127.0.0.1:41318 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:41310 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:40504 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:45288 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:45302 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:47488 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:47498 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:48888 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:48898 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:44008 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:44024 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:44008 - "POST /chat HTTP/1.1" 200 OK
INFO: 127.0.0.1:50236 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:50246 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:57020 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:57032 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:36982 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:36996 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:35350 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:35358 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:38442 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:38456 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:38442 - "POST /permissions/resolve HTTP/1.1" 200 OK
INFO: 127.0.0.1:35664 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:35666 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:41680 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:41682 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:55484 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:55486 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:53136 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:53142 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:50412 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:50412 - "POST /secrets/resolve HTTP/1.1" 200 OK
INFO: 127.0.0.1:50416 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:50384 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:50396 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:35882 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:35890 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:34008 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:34012 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:38358 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:38366 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:39500 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:39516 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:52800 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:52812 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:60246 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:60256 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:55192 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:55208 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:55192 - "POST /secrets/resolve HTTP/1.1" 200 OK
INFO: 127.0.0.1:50170 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:50184 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:60392 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:60404 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:42626 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:42630 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:37478 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:37480 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:59892 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:59902 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:50284 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:50290 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:59488 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:59492 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:53584 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:53590 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:50978 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:50990 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:43110 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:43118 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:39906 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:39908 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:39100 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:39110 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:43436 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:43448 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:60214 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:60228 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:56192 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:45580 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:59680 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:52038 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:34120 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:54374 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:41916 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:48474 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:58570 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:58284 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:47014 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:37884 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:56196 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:60026 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:48534 - "POST /secrets/resolve HTTP/1.1" 200 OK
INFO: 127.0.0.1:48536 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:46114 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:49446 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:33518 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:40316 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:47326 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:36022 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:36806 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:54232 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:54248 - "GET / HTTP/1.1" 200 OK
INFO: 127.0.0.1:54248 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:54248 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO: 127.0.0.1:38470 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:54264 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:50474 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:50490 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:44644 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:44652 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:41856 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:57392 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:45778 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:59094 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:39508 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:51214 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:54724 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:41204 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:33686 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:38154 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:44658 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:56664 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:33906 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:36934 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:48746 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:50876 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:38912 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:40786 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:51882 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:40002 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:43176 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:49824 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:44316 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:58994 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:47794 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:37642 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:32882 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:53578 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:35804 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:47732 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:34050 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:55386 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:43992 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:43998 - "GET / HTTP/1.1" 200 OK
INFO: 127.0.0.1:43998 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:43998 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO: 127.0.0.1:39194 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:33540 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:53022 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:41056 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:44000 - "POST /chat HTTP/1.1" 200 OK
INFO: 127.0.0.1:44000 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:44000 - "POST /permissions/resolve HTTP/1.1" 200 OK
INFO: 127.0.0.1:57534 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:60834 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:59886 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:42774 - "POST /secrets/resolve HTTP/1.1" 500 Internal Server Error
INFO: 127.0.0.1:50140 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:52360 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:57882 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:44816 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:37956 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:37956 - "GET / HTTP/1.1" 200 OK
INFO: 127.0.0.1:37956 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:37956 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO: 127.0.0.1:50254 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:46082 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:56836 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:35716 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:37656 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:45248 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:50242 - "POST /chat HTTP/1.1" 200 OK
INFO: 127.0.0.1:44868 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:44882 - "POST /permissions/resolve HTTP/1.1" 200 OK
INFO: 127.0.0.1:44882 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:48796 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:60814 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:53286 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:44882 - "POST /secrets/resolve HTTP/1.1" 500 Internal Server Error
INFO: 127.0.0.1:53816 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:39450 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:53198 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:58340 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:58686 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:47278 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:46400 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:58580 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:35014 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:43342 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:34798 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:41652 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:36938 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:58066 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:45948 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:45656 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:33986 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:52016 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:55700 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:48468 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:33002 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:43004 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:43014 - "POST /secrets/resolve HTTP/1.1" 200 OK
INFO: 127.0.0.1:43014 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:36870 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:45970 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:60292 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:53738 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:49414 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:56572 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:51224 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:53742 - "POST /secrets/resolve HTTP/1.1" 200 OK
INFO: 127.0.0.1:42496 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:54868 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:57530 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:60898 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:54112 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:44548 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:37414 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:45064 - "GET /health HTTP/1.1" 200 OK

1
server.pid Normal file
View File

@ -0,0 +1 @@
844579

314
test_ducklm.py Executable file
View File

@ -0,0 +1,314 @@
#!/usr/bin/env python3
"""
Тестовый скрипт для проверки работы ducklm.
Позволяет ИИ-кодеру тестировать систему через отправку запросов и проверку выполнения.
"""
import json
import time
import requests
import sys
from typing import Dict, Any, Optional
class DuckLMTester:
def __init__(self, base_url: str = "http://127.0.0.1:8000"):
self.base_url = base_url
self.session = requests.Session()
self.test_results = []
def log_test(self, test_name: str, passed: bool, details: str = ""):
"""Записать результат теста"""
result = {
"test": test_name,
"passed": passed,
"details": details,
"timestamp": time.time()
}
self.test_results.append(result)
status = "✓ PASS" if passed else "✗ FAIL"
print(f"{status}: {test_name}")
if details:
print(f" Details: {details}")
def test_health(self) -> bool:
"""Проверить эндпоинт здоровья"""
try:
response = self.session.get(f"{self.base_url}/health", timeout=5)
if response.status_code == 200:
data = response.json()
if data.get("status") == "ok":
self.log_test("Health Check", True, "Server is healthy")
return True
else:
self.log_test("Health Check", False, f"Unexpected response: {data}")
return False
else:
self.log_test("Health Check", False, f"HTTP {response.status_code}")
return False
except Exception as e:
self.log_test("Health Check", False, f"Connection error: {str(e)}")
return False
def test_simple_chat(self) -> bool:
"""Простой тест чата"""
try:
payload = {"input": "Привет, как дела?"}
response = self.session.post(
f"{self.base_url}/chat",
json=payload,
timeout=30
)
if response.status_code == 200:
data = response.json()
status = data.get("status")
if status in ["completed", "awaiting_permission", "awaiting_input"]:
self.log_test(
"Simple Chat",
True,
f"Status: {status}, Response received"
)
return True
else:
self.log_test(
"Simple Chat",
False,
f"Unexpected status: {status}"
)
return False
else:
self.log_test(
"Simple Chat",
False,
f"HTTP {response.status_code}: {response.text}"
)
return False
except Exception as e:
self.log_test("Simple Chat", False, f"Request error: {str(e)}")
return False
def test_tool_execution(self) -> bool:
"""Тест выполнения инструмента"""
try:
# Тест простой команды shell
payload = {
"input": "Выполни простую команду",
"context": {
"requested_tool": "shell_exec",
"tool_args": {"command": "echo 'test'"}
}
}
response = self.session.post(
f"{self.base_url}/chat",
json=payload,
timeout=30
)
if response.status_code == 200:
data = response.json()
status = data.get("status")
if status == "completed":
output = data.get("result", {}).get("output", "")
if "test" in output:
self.log_test(
"Tool Execution",
True,
f"Command executed successfully: {output.strip()}"
)
return True
else:
self.log_test(
"Tool Execution",
False,
f"Unexpected output: {output}"
)
return False
elif status == "awaiting_permission":
self.log_test(
"Tool Execution",
True,
"Permission required (expected for some commands)"
)
return True
else:
self.log_test(
"Tool Execution",
False,
f"Unexpected status: {status}"
)
return False
else:
self.log_test(
"Tool Execution",
False,
f"HTTP {response.status_code}: {response.text}"
)
return False
except Exception as e:
self.log_test("Tool Execution", False, f"Request error: {str(e)}")
return False
def test_permission_flow(self) -> bool:
"""Тест потока разрешений"""
try:
# Сначала отправляем задачу, требующую разрешения
payload = {
"input": "Запусти команду, требующую разрешения",
"context": {
"requested_tool": "shell_exec",
"tool_args": {"command": "whoami"}
}
}
response = self.session.post(
f"{self.base_url}/chat",
json=payload,
timeout=30
)
if response.status_code != 200:
self.log_test(
"Permission Flow",
False,
f"Initial request failed: HTTP {response.status_code}"
)
return False
data = response.json()
if data.get("status") == "awaiting_permission":
task_id = data.get("task_id")
if not task_id:
self.log_test(
"Permission Flow",
False,
"No task_id in response"
)
return False
# Теперь разрешаем разрешение
resolve_payload = {
"task_id": task_id,
"decision": "allow_once"
}
resolve_response = self.session.post(
f"{self.base_url}/permissions/resolve",
json=resolve_payload,
timeout=10
)
if resolve_response.status_code == 200:
resolve_data = resolve_response.json()
final_status = resolve_data.get("status")
if final_status in ["completed", "failed"]:
self.log_test(
"Permission Flow",
True,
f"Permission resolved, final status: {final_status}"
)
return True
else:
self.log_test(
"Permission Flow",
False,
f"Unexpected final status: {final_status}"
)
return False
else:
self.log_test(
"Permission Flow",
False,
f"Permission resolution failed: HTTP {resolve_response.status_code}"
)
return False
else:
# Если разрешение не потребовалось, это тоже нормально для некоторых систем
self.log_test(
"Permission Flow",
True,
f"No permission required, status: {data.get('status')}"
)
return True
except Exception as e:
self.log_test("Permission Flow", False, f"Request error: {str(e)}")
return False
def run_all_tests(self) -> Dict[str, Any]:
"""Запустить все тесты"""
print("Starting ducklm tests...")
print("=" * 50)
# Ждем немного, чтобы сервер успел запуститься
time.sleep(2)
tests = [
self.test_health,
self.test_simple_chat,
self.test_tool_execution,
self.test_permission_flow,
]
passed = 0
total = len(tests)
for test in tests:
if test():
passed += 1
time.sleep(1) # Небольшая пауза между тестами для слабого железа
print("=" * 50)
print(f"Tests completed: {passed}/{total} passed")
# Сводка результатов
summary = {
"total_tests": total,
"passed_tests": passed,
"failed_tests": total - passed,
"success_rate": passed / total if total > 0 else 0,
"test_results": self.test_results
}
return summary
def main():
"""Основная функция"""
import argparse
parser = argparse.ArgumentParser(description="Тест ducklm системы")
parser.add_argument("--url", default="http://127.0.0.1:8000", help="Base URL for ducklm server")
parser.add_argument("--test", choices=["health", "chat", "tool", "permission", "all"],
default="all", help="Specific test to run")
args = parser.parse_args()
tester = DuckLMTester(args.url)
if args.test == "all":
results = tester.run_all_tests()
print("\nFINAL RESULTS:")
print(f"Passed: {results['passed_tests']}/{results['total_tests']}")
print(f"Success Rate: {results['success_rate']*100:.1f}%")
# Возвращаем код выхода basado на результатах
sys.exit(0 if results['failed_tests'] == 0 else 1)
else:
# Запуск конкретного теста
test_map = {
"health": tester.test_health,
"chat": tester.test_simple_chat,
"tool": tester.test_tool_execution,
"permission": tester.test_permission_flow,
}
test_func = test_map[args.test]
if test_func():
print(f"Test {args.test}: PASSED")
sys.exit(0)
else:
print(f"Test {args.test}: FAILED")
sys.exit(1)
if __name__ == "__main__":
main()

409
test_ducklm_direct.py Normal file
View File

@ -0,0 +1,409 @@
#!/usr/bin/env python3
"""
Прямой тест ducklm через RuntimeController (без HTTP сервера).
Позволяет ИИ-кодеру тестировать систему через отправку запросов и проверку выполнения.
"""
import json
import time
import sys
from pathlib import Path
from typing import Dict, Any
# Добавляем текущую директорию в путь для импорта app
sys.path.insert(0, '.')
from app.runtime.runtime_controller import RuntimeController
from app.core.contracts import UserTask
class DuckLMDirectTester:
def __init__(self, base_dir: str = "."):
self.base_dir = Path(base_dir)
self.test_results = []
self.controller = None
def setup(self):
"""Инициализировать контроллер"""
try:
print("Инициализация RuntimeController...")
self.controller = RuntimeController(base_dir=self.base_dir)
print("RuntimeController инициализирован успешно")
return True
except Exception as e:
print(f"Ошибка инициализации RuntimeController: {e}")
return False
def log_test(self, test_name: str, passed: bool, details: str = ""):
"""Записать результат теста"""
result = {
"test": test_name,
"passed": passed,
"details": details,
"timestamp": time.time()
}
self.test_results.append(result)
status = "✓ PASS" if passed else "✗ FAIL"
print(f"{status}: {test_name}")
if details:
print(f" Details: {details}")
def test_health(self) -> bool:
"""Проверить что контроллер работает"""
try:
if self.controller is None:
self.log_test("Health Check", False, "Controller not initialized")
return False
# Проверяем что основные компоненты присутствуют
components = [
("event_bus", self.controller.event_bus),
("permission_service", self.controller.permission_service),
("task_state_store", self.controller.task_state_store),
("checkpoint_store", self.controller.checkpoint_store),
("context_builder", self.controller.context_builder),
("router", self.controller.router),
("execution_engine", self.controller.execution_engine),
]
missing = []
for name, component in components:
if component is None:
missing.append(name)
if missing:
self.log_test("Health Check", False, f"Missing components: {missing}")
return False
else:
self.log_test("Health Check", True, "Все компоненты инициализированы")
return True
except Exception as e:
self.log_test("Health Check", False, f"Error: {str(e)}")
return False
def test_simple_task(self) -> bool:
"""Простой тест задачи"""
try:
if self.controller is None:
self.log_test("Simple Task", False, "Controller not initialized")
return False
# Создаем простую задачу
task = UserTask(input="Привет, как дела?")
# Выполняем задачу через контроллер
result = self.controller.handle_task(task)
status = result.get("status")
if status in ["completed", "awaiting_permission", "awaiting_input"]:
self.log_test(
"Simple Task",
True,
f"Status: {status}, Task ID: {result.get('task_id')}"
)
return True
else:
self.log_test(
"Simple Task",
False,
f"Unexpected status: {status}"
)
return False
except Exception as e:
self.log_test("Simple Task", False, f"Request error: {str(e)}")
return False
def test_tool_task(self) -> bool:
"""Тест задачи с инструментом"""
try:
if self.controller is None:
self.log_test("Tool Task", False, "Controller not initialized")
return False
# Тест простой команды shell через контекст
task = UserTask(
input="Выполни простую команду",
context={
"requested_tool": "shell_exec",
"tool_args": {"command": "echo 'hello from test'"}
}
)
result = self.controller.handle_task(task)
status = result.get("status")
if status == "completed":
output = result.get("result", {}).get("output", "")
if "hello from test" in output:
self.log_test(
"Tool Task",
True,
f"Command executed successfully: {output.strip()}"
)
return True
else:
self.log_test(
"Tool Task",
False,
f"Unexpected output: {output}"
)
return False
elif status == "awaiting_permission":
self.log_test(
"Tool Task",
True,
"Permission required (expected for some commands)"
)
return True
else:
self.log_test(
"Tool Task",
False,
f"Unexpected status: {status}"
)
return False
except Exception as e:
self.log_test("Tool Task", False, f"Request error: {str(e)}")
return False
def test_memory_tools(self) -> bool:
"""Тест инструментов памяти"""
try:
if self.controller is None:
self.log_test("Memory Tools", False, "Controller not initialized")
return False
# Тест вставки в память
task_insert = UserTask(
input="Запомни эту информацию: тестовое значение 123",
context={
"requested_tool": "memory",
"tool_args": {
"operation": "insert",
"text": "тестовое значение 123",
"kind": "fact",
"weight": 0.8
}
}
)
result_insert = self.controller.handle_task(task_insert)
if result_insert.get("status") != "completed":
self.log_test(
"Memory Tools Insert",
False,
f"Insert failed: {result_insert.get('status')}"
)
return False
# Тест поиска в памяти
task_search = UserTask(
input="Найди запомненную информацию",
context={
"requested_tool": "memory",
"tool_args": {
"operation": "search",
"query": "тестовое значение",
"limit": 5
}
}
)
result_search = self.controller.handle_task(task_search)
if result_search.get("status") == "completed":
output = result_search.get("result", {}).get("output", "")
self.log_test(
"Memory Tools",
True,
f"Memory search successful: {output[:100]}..."
)
return True
else:
self.log_test(
"Memory Tools Search",
False,
f"Search failed: {result_search.get('status')}"
)
return False
except Exception as e:
self.log_test("Memory Tools", False, f"Request error: {str(e)}")
return False
def test_file_operations(self) -> bool:
"""Тест операций с файлами"""
try:
if self.controller is None:
self.log_test("File Operations", False, "Controller not initialized")
return False
import tempfile
import os
# Создаем временный файл для теста
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
temp_path = f.name
f.write("initial content for testing")
try:
# Тест чтения файла
task_read = UserTask(
input="Прочитай файл",
context={
"requested_tool": "file_read",
"tool_args": {"path": temp_path}
}
)
result_read = self.controller.handle_task(task_read)
if result_read.get("status") != "completed":
self.log_test(
"File Read",
False,
f"Read failed: {result_read.get('status')}"
)
return False
# Тест записи файла
new_content = "updated content from test"
task_write = UserTask(
input="Запиши в файл",
context={
"requested_tool": "file_write",
"tool_args": {
"path": temp_path,
"content": new_content
}
}
)
result_write = self.controller.handle_task(task_write)
if result_write.get("status") != "completed":
self.log_test(
"File Write",
False,
f"Write failed: {result_write.get('status')}"
)
return False
# Проверяем что файл действительно обновился
with open(temp_path, 'r') as f:
actual_content = f.read()
if actual_content == new_content:
self.log_test(
"File Operations",
True,
f"File read/write successful: {actual_content}"
)
return True
else:
self.log_test(
"File Operations",
False,
f"File content mismatch. Expected: {new_content}, Got: {actual_content}"
)
return False
finally:
# Очищаем временный файл
if os.path.exists(temp_path):
os.unlink(temp_path)
except Exception as e:
self.log_test("File Operations", False, f"Request error: {str(e)}")
return False
def run_all_tests(self) -> Dict[str, Any]:
"""Запустить все тесты"""
print("Starting direct ducklm tests...")
print("=" * 50)
if not self.setup():
print("Failed to setup controller")
return {"error": "Setup failed"}
tests = [
self.test_health,
self.test_simple_task,
self.test_tool_task,
self.test_memory_tools,
self.test_file_operations,
]
passed = 0
total = len(tests)
for test in tests:
if test():
passed += 1
time.sleep(0.5) # Небольшая пауза между тестами
print("=" * 50)
print(f"Tests completed: {passed}/{total} passed")
# Сводка результатов
summary = {
"total_tests": total,
"passed_tests": passed,
"failed_tests": total - passed,
"success_rate": passed / total if total > 0 else 0,
"test_results": self.test_results
}
return summary
def main():
"""Основная функция"""
import argparse
parser = argparse.ArgumentParser(description="Тест ducklm системы (прямой доступ)")
parser.add_argument("--basedir", default=".", help="Base directory for ducklm")
parser.add_argument("--test", choices=["health", "simple", "tool", "memory", "file", "all"],
default="all", help="Specific test to run")
args = parser.parse_args()
tester = DuckLMDirectTester(args.basedir)
if args.test == "all":
results = tester.run_all_tests()
print("\nFINAL RESULTS:")
print(f"Passed: {results['passed_tests']}/{results['total_tests']}")
print(f"Success Rate: {results['success_rate']*100:.1f}%")
# Возвращаем код выхода basado на результатах
sys.exit(0 if results['failed_tests'] == 0 else 1)
else:
# Запуск конкретного теста
if not tester.setup():
print("Failed to setup controller")
sys.exit(1)
test_map = {
"health": tester.test_health,
"simple": tester.test_simple_task,
"tool": tester.test_tool_task,
"memory": tester.test_memory_tools,
"file": tester.test_file_operations,
}
test_func = test_map[args.test]
if test_func():
print(f"Test {args.test}: PASSED")
sys.exit(0)
else:
print(f"Test {args.test}: FAILED")
sys.exit(1)
if __name__ == "__main__":
main()

View File

@ -1,5 +1,9 @@
from app.api.server import chat, critic_feedback, health, list_events, resolve_permission, resolve_secret import asyncio
from app.core.permission_resolution import PermissionResolutionRequest, SecretResolutionRequest import time
import app.api.server as server
from app.api.server import chat, critic_feedback, health, list_events, resolve_permission, resolve_review, resolve_secret
from app.core.permission_resolution import PermissionResolutionRequest, ReviewResolutionRequest, SecretResolutionRequest
from app.api.server import CriticFeedbackRequest from app.api.server import CriticFeedbackRequest
from app.core.contracts import UserTask from app.core.contracts import UserTask
@ -16,8 +20,52 @@ def test_events_handler_returns_event_list() -> None:
def test_chat_handler_returns_runtime_events() -> None: def test_chat_handler_returns_runtime_events() -> None:
body = chat(UserTask(input="hello from handler test")) body = chat(UserTask(input="hello from handler test"))
assert body["status"] == "completed" assert body["status"] in {"accepted", "completed"}
assert body["events"][0]["type"] == "task_received" if body["status"] == "completed":
assert body["events"][0]["type"] == "task_received"
def test_chat_handler_submits_task_without_waiting_for_completion(monkeypatch) -> None:
class SlowRuntime:
def submit_task(self, task):
return {"task_id": task.task_id, "status": "accepted"}
def handle_task(self, task):
time.sleep(0.25)
return {"task_id": task.task_id, "status": "completed", "events": []}
monkeypatch.setattr("app.api.server.runtime", SlowRuntime())
started = time.monotonic()
body = chat(UserTask(input="long task"))
assert time.monotonic() - started < 0.1
assert body["status"] == "accepted"
def test_lifespan_loads_models_without_threadpool_executor(monkeypatch) -> None:
class FakeRuntime:
_memory_interface = None
def __init__(self) -> None:
self.loaded = False
def load_models_at_startup(self) -> None:
self.loaded = True
class FailingLoop:
def run_in_executor(self, *args, **kwargs):
raise AssertionError("lifespan must not load llama models via run_in_executor")
fake_runtime = FakeRuntime()
monkeypatch.setattr(server, "runtime", fake_runtime)
monkeypatch.setattr(server.asyncio, "get_event_loop", lambda: FailingLoop())
async def run_lifespan() -> None:
async with server.lifespan(None):
pass
asyncio.run(run_lifespan())
assert fake_runtime.loaded is True
def test_resolve_permission_handler_allows_completion() -> None: def test_resolve_permission_handler_allows_completion() -> None:
@ -34,6 +82,29 @@ def test_resolve_secret_handler_requires_pending_request() -> None:
assert body["status"] == "failed" assert body["status"] == "failed"
def test_resolve_review_handler_submits_review_resolution(monkeypatch) -> None:
class ReviewRuntime:
def submit_review_resolution(self, task_id, decision, correction=None):
return {
"task_id": task_id,
"status": "accepted",
"decision": decision,
"correction": correction,
}
monkeypatch.setattr("app.api.server.runtime", ReviewRuntime())
body = resolve_review(
ReviewResolutionRequest(
task_id="task-1",
decision="wrong_action",
correction="replan",
)
)
assert body["status"] == "accepted"
assert body["decision"] == "wrong_action"
def test_structured_feedback_can_be_accepted_without_memory_write() -> None: def test_structured_feedback_can_be_accepted_without_memory_write() -> None:
initial = chat(UserTask(input="feedback target")) initial = chat(UserTask(input="feedback target"))
body = critic_feedback( body = critic_feedback(

View File

@ -0,0 +1,46 @@
from app.core.command_analyzer import CommandAnalyzer
from app.core.permission_service import PermissionService
def _permission_service() -> PermissionService:
return PermissionService(
config={
"settings": {},
"command_categories": {
"no_always": {
"allow_once": True,
"allow_always": False,
"commands": ["apt", "apt-get", "dpkg", "systemctl"],
}
},
"path_settings": {},
}
)
def test_detects_unelevated_root_required_segment_after_sudo_chain() -> None:
analyzer = CommandAnalyzer(_permission_service())
diagnosis = analyzer.analyze(
command="sudo apt update && apt upgrade -y",
task_id="task-1",
session_id="session-1",
)
assert diagnosis["type"] == "privilege_scope_error"
assert diagnosis["root_required_segments"] == ["apt update", "apt upgrade -y"]
assert diagnosis["elevated_segments"] == ["apt update"]
assert diagnosis["unelevated_root_segments"] == ["apt upgrade -y"]
def test_accepts_each_root_required_segment_when_each_is_elevated() -> None:
analyzer = CommandAnalyzer(_permission_service())
diagnosis = analyzer.analyze(
command="sudo apt update && sudo apt upgrade -y",
task_id="task-1",
session_id="session-1",
)
assert diagnosis["type"] == "ok"
assert diagnosis["unelevated_root_segments"] == []

View File

@ -14,12 +14,25 @@ def test_runtime_loop_emits_basic_events() -> None:
def test_runtime_loop_routes_natural_language_shell_request_to_permission_flow() -> None: def test_runtime_loop_routes_natural_language_shell_request_to_permission_flow() -> None:
import os, shutil
# Clear permission cache to ensure clean state
cache_file = os.path.join(os.path.dirname(__file__), '..', 'data', 'runtime', 'allowed_commands.json')
if os.path.exists(cache_file):
os.remove(cache_file)
controller = RuntimeController() controller = RuntimeController()
result = controller.handle_task(UserTask(input="запусти sudo apt update")) result = controller.handle_task(UserTask(input="запусти sudo apt update"))
event_types = [event["type"] for event in result["events"]] event_types = [event["type"] for event in result["events"]]
# sudo commands require both permission and password
# First step: permission request
assert result["status"] == "awaiting_permission" assert result["status"] == "awaiting_permission"
assert result["directive"]["type"] == "tool" assert result["directive"]["type"] == "tool"
assert result["directive"]["payload"]["tool"] == "shell_exec" assert result["directive"]["payload"]["tool"] == "shell_exec"
assert "permission_requested" in event_types assert "permission_requested" in event_types
assert "task_awaiting_permission" in event_types assert "task_awaiting_permission" in event_types
assert result["result"]["error"] == "Permission required before execution." assert result["result"]["error"] == "Permission required before execution."
# After granting permission, should request sudo password
resumed = controller.resolve_permission(task_id=result["task_id"], decision="allow_once")
assert resumed["status"] == "awaiting_input"
assert resumed["result"]["secret_request"]["kind"] == "sudo_password"

View File

@ -2,7 +2,11 @@ import json
from pathlib import Path from pathlib import Path
from app.core.contracts import ExecutionDirective, UserTask from app.core.contracts import ExecutionDirective, UserTask
from app.core.contracts import PermissionDecision
from app.core.contracts import ToolResult
from app.events.event_types import TOOL_OUTPUT_CHUNK
from app.runtime.runtime_controller import RuntimeController from app.runtime.runtime_controller import RuntimeController
from app.tools.sandbox import ToolSandbox
def _write_config_tree(base_dir: Path) -> None: def _write_config_tree(base_dir: Path) -> None:
@ -27,9 +31,38 @@ def _write_config_tree(base_dir: Path) -> None:
"critic_prompt": "", "critic_prompt": "",
}, },
"permissions.json": { "permissions.json": {
"dangerous_commands": {"rm": "ask_always", "sudo": "ask_always"}, "settings": {
"sensitive_paths": ["/etc", "/usr", "/var"], "allow_caching": True,
"default_approval_behavior": "ask_always", "cache_file": str(base_dir / "data/runtime/allowed_commands.json"),
"normalize_commands": True,
"split_chained": True
},
"command_categories": {
"hard_stop": {
"commands": ["rm -rf /", "rm -rf /*", "dd if=/dev/zero of=/dev/sd*"]
},
"no_always": {
"allow_once": True,
"allow_always": False,
"commands": [
"rm -rf *", "rm -rf .*", "shutdown", "reboot", "halt",
"apt", "apt-get", "dpkg", "yum", "dnf", "pacman",
"systemctl stop", "systemctl start", "systemctl restart",
"service stop", "service start", "killall", "pkill -9"
]
},
"normal": {
"allow_once": True,
"allow_always": True,
"commands": ["shell_exec", "file_write"]
}
},
"path_settings": {
"allow_read_outside": True,
"allow_write_paths": [str(base_dir), "/tmp"],
"require_confirmation_for_write": True,
"require_confirmation_for_shell": True
}
}, },
"runtime.json": { "runtime.json": {
"step_timeout_ms": 5000, "step_timeout_ms": 5000,
@ -92,6 +125,8 @@ def test_shell_exec_requires_permission_for_dangerous_command(tmp_path: Path) ->
}, },
) )
) )
# rm -rf /tmp/nonexistent is not hard_stop (only exact "rm -rf /" is)
# but it matches "rm -rf *" in no_always category
assert result["status"] == "awaiting_permission" assert result["status"] == "awaiting_permission"
assert "permission_request" in result["result"] assert "permission_request" in result["result"]
@ -108,8 +143,87 @@ def test_shell_exec_allows_safe_command(tmp_path: Path) -> None:
}, },
) )
) )
# Even safe commands require permission in the new permission model
assert result["status"] == "awaiting_permission"
assert "permission_request" in result["result"]
# Grant permission and verify execution
resumed = controller.resolve_permission(task_id=result["task_id"], decision="allow_once")
assert resumed["status"] == "completed"
assert str(tmp_path) in resumed["result"]["output"]
def test_shell_exec_publishes_output_chunks_before_completion(tmp_path: Path) -> None:
_write_config_tree(tmp_path)
controller = RuntimeController(base_dir=tmp_path)
perm_override = PermissionDecision(
action_type="shell_command",
pattern="printf",
decision="allow_always",
)
task = UserTask(
input="stream shell output",
context={
"requested_tool": "shell_exec",
"tool_args": {"command": "printf 'first\\n'; sleep 0.1; printf 'second\\n'"},
},
)
result = controller.execution_engine.execute(
task,
ExecutionDirective(
type="tool",
payload={
"tool": "shell_exec",
"args": {"command": "printf 'first\\n'; sleep 0.1; printf 'second\\n'"},
},
),
permission_override=perm_override,
)
events = controller.event_bus.list_for_task(task.task_id)
chunk_events = [event for event in events if event.type == TOOL_OUTPUT_CHUNK]
completed_index = next(index for index, event in enumerate(events) if event.type == "tool_completed")
first_chunk_index = next(index for index, event in enumerate(events) if event.type == TOOL_OUTPUT_CHUNK)
assert result["status"] == "completed" assert result["status"] == "completed"
assert str(tmp_path) in result["result"]["output"] assert [event.payload["chunk"] for event in chunk_events] == ["first\n", "second\n"]
assert first_chunk_index < completed_index
def test_streaming_shell_uses_idle_timeout_not_step_timeout(tmp_path: Path) -> None:
sandbox = ToolSandbox(
allowed_root=tmp_path,
timeout_ms=100,
command_timeout_ms=2000,
idle_timeout_ms=500,
)
chunks: list[str] = []
result = sandbox.run_shell(
command="printf 'first\\n'; sleep 0.2; printf 'second\\n'",
output_callback=lambda _stream, chunk: chunks.append(chunk),
)
assert result.returncode == 0
assert result.stdout == "first\nsecond\n"
assert chunks == ["first\n", "second\n"]
def test_streaming_shell_timeout_kills_child_process_group(tmp_path: Path) -> None:
marker = tmp_path / "child-survived"
sandbox = ToolSandbox(
allowed_root=tmp_path,
timeout_ms=100,
command_timeout_ms=100,
idle_timeout_ms=1000,
)
result = sandbox.run_shell(
command=f"sh -c 'sleep 1; touch {marker}'",
output_callback=lambda _stream, _chunk: None,
)
assert result.returncode == -9
assert not marker.exists()
class _RecoveryCritic: class _RecoveryCritic:
@ -122,6 +236,13 @@ def test_failed_shell_step_can_recover_and_continue(tmp_path: Path) -> None:
controller = RuntimeController(base_dir=tmp_path) controller = RuntimeController(base_dir=tmp_path)
controller.execution_engine.set_critic(_RecoveryCritic()) controller.execution_engine.set_critic(_RecoveryCritic())
controller.execution_engine._recovery_limit = 1 controller.execution_engine._recovery_limit = 1
# Bypass permission check for this test — we're testing recovery, not permissions
from app.core.contracts import PermissionDecision
perm_override = PermissionDecision(
action_type="shell_command",
pattern="grep",
decision="allow_always",
)
result = controller.execution_engine.execute( result = controller.execution_engine.execute(
UserTask( UserTask(
input="run grep with no matches and recover", input="run grep with no matches and recover",
@ -139,12 +260,177 @@ def test_failed_shell_step_can_recover_and_continue(tmp_path: Path) -> None:
] ]
}, },
), ),
permission_override=perm_override,
) )
assert result["status"] == "completed" assert result["status"] == "completed"
failed_result = result["result"]["step_results"][0]["result"]["result"] failed_result = result["result"]["step_results"][0]["result"]["result"]
assert failed_result["metadata"]["exit_code"] == 1 assert failed_result["metadata"]["exit_code"] == 1
def test_privilege_scope_failure_awaits_user_review_before_replan(tmp_path: Path) -> None:
_write_config_tree(tmp_path)
controller = RuntimeController(base_dir=tmp_path)
task = UserTask(
input="обнови систему",
context={
"requested_tool": "shell_exec",
"tool_args": {"command": "sudo apt update && apt upgrade -y"},
},
)
class FailingShellTool:
def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
return ToolResult(
tool="shell_exec",
ok=False,
output="Error: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?",
error="Command failed with exit code 100",
metadata={"exit_code": 100},
)
controller.tool_registry._tools["shell_exec"] = FailingShellTool()
initial = controller.handle_task(task)
assert initial["status"] == "awaiting_permission"
controller.resolve_permission(task_id=task.task_id, decision="allow_once")
result = controller.resolve_secret(task_id=task.task_id, secret="secret")
assert result["status"] == "awaiting_review"
assert result["result"]["review"]["diagnosis"]["type"] == "privilege_scope_error"
assert result["result"]["review"]["critic_assessment"]["classification"] == "model_planning_error"
def test_plan_pauses_on_privilege_scope_review_instead_of_completing(tmp_path: Path) -> None:
_write_config_tree(tmp_path)
controller = RuntimeController(base_dir=tmp_path)
class FailingShellTool:
def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
return ToolResult(
tool="shell_exec",
ok=False,
output="Error: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?",
error="Command failed with exit code 100",
metadata={"exit_code": 100},
)
controller.tool_registry._tools["shell_exec"] = FailingShellTool()
result = controller.execution_engine.execute(
UserTask(input="обнови систему"),
ExecutionDirective(
type="plan",
payload={
"steps": [
{
"id": "1",
"tool": "shell_exec",
"args": {"command": "sudo apt update && apt upgrade -y"},
"depends_on": [],
}
]
},
),
permission_override=PermissionDecision(
action_type="shell_command",
pattern="apt",
decision="allow_once",
),
secret_override="secret",
)
assert result["status"] == "awaiting_review"
assert result["result"]["review"]["diagnosis"]["type"] == "privilege_scope_error"
def test_sudo_auth_failure_requests_secret_retry_not_review(tmp_path: Path) -> None:
_write_config_tree(tmp_path)
controller = RuntimeController(base_dir=tmp_path)
class BadPasswordShellTool:
def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
return ToolResult(
tool="shell_exec",
ok=False,
output="Sorry, try again.\nsudo: no password was provided\nsudo: 1 incorrect password attempt\n",
error="Command failed with exit code 1",
metadata={"exit_code": 1, "sudo_auth_failed": True},
)
controller.tool_registry._tools["shell_exec"] = BadPasswordShellTool()
result = controller.execution_engine.execute(
UserTask(input="обнови систему"),
ExecutionDirective(
type="plan",
payload={
"steps": [
{
"id": "1",
"tool": "shell_exec",
"args": {"command": "sudo apt update && apt upgrade -y"},
"depends_on": [],
}
]
},
),
permission_override=PermissionDecision(
action_type="shell_command",
pattern="apt",
decision="allow_once",
),
secret_override="wrong",
)
assert result["status"] == "awaiting_input"
assert result["result"]["secret_request"]["kind"] == "sudo_password"
assert result["result"]["secret_request"]["prompt"] == "Sudo password incorrect. Try again"
assert result["result"]["attempt_failed"] is True
def test_runtime_keeps_secret_state_after_bad_sudo_password(tmp_path: Path) -> None:
_write_config_tree(tmp_path)
controller = RuntimeController(base_dir=tmp_path)
class RetryPasswordShellTool:
calls = 0
def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
self.calls += 1
if self.calls == 1:
return ToolResult(
tool="shell_exec",
ok=False,
output="Sorry, try again.\nsudo: no password was provided\nsudo: 1 incorrect password attempt\n",
error="Command failed with exit code 1",
metadata={"exit_code": 1, "sudo_auth_failed": True},
)
return ToolResult(
tool="shell_exec",
ok=True,
output="root\n",
metadata={"exit_code": 0},
)
controller.tool_registry._tools["shell_exec"] = RetryPasswordShellTool()
task = UserTask(
input="кто root",
context={
"requested_tool": "shell_exec",
"tool_args": {"command": "sudo whoami"},
},
)
initial = controller.handle_task(task)
assert initial["status"] == "awaiting_permission"
allowed = controller.resolve_permission(task_id=task.task_id, decision="allow_once")
assert allowed["status"] == "awaiting_input"
retry = controller.resolve_secret(task_id=task.task_id, secret="wrong")
assert retry["status"] == "awaiting_input"
assert retry["result"]["attempt_failed"] is True
final = controller.resolve_secret(task_id=task.task_id, secret="correct")
assert final["status"] == "completed"
assert final["result"]["output"] == "root\n"
def test_permission_resolution_can_resume_task(tmp_path: Path) -> None: def test_permission_resolution_can_resume_task(tmp_path: Path) -> None:
_write_config_tree(tmp_path) _write_config_tree(tmp_path)
controller = RuntimeController(base_dir=tmp_path) controller = RuntimeController(base_dir=tmp_path)
@ -169,12 +455,35 @@ def test_sudo_permission_resolution_requests_secret_input(tmp_path: Path) -> Non
assert resumed["result"]["secret_request"]["kind"] == "sudo_password" assert resumed["result"]["secret_request"]["kind"] == "sudo_password"
def test_implicit_sudo_command_requests_password(tmp_path: Path) -> None:
"""Commands like 'apt list --upgradable' that require sudo but don't start with 'sudo'
should also trigger password request after permission is granted."""
_write_config_tree(tmp_path)
controller = RuntimeController(base_dir=tmp_path)
# apt list --upgradable requires root but doesn't start with 'sudo'
initial = controller.handle_task(
UserTask(
input="проверь обновления",
context={
"requested_tool": "shell_exec",
"tool_args": {"command": "apt list --upgradable"},
},
)
)
assert initial["status"] == "awaiting_permission"
# Grant permission — should request sudo password since apt requires root
resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once")
assert resumed["status"] == "awaiting_input"
assert resumed["result"]["secret_request"]["kind"] == "sudo_password"
def test_secret_resolution_continues_after_pending_secret_saved(tmp_path: Path) -> None: def test_secret_resolution_continues_after_pending_secret_saved(tmp_path: Path) -> None:
_write_config_tree(tmp_path) _write_config_tree(tmp_path)
controller = RuntimeController(base_dir=tmp_path) controller = RuntimeController(base_dir=tmp_path)
initial = controller.handle_task(UserTask(input="запусти sudo apt update")) initial = controller.handle_task(UserTask(input="запусти sudo apt update"))
assert initial["status"] == "awaiting_permission"
resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once") resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once")
assert resumed["status"] == "awaiting_input" assert resumed["status"] == "awaiting_input"
final = controller.resolve_secret(task_id=initial["task_id"], secret="wrongpass") final = controller.resolve_secret(task_id=initial["task_id"], secret="wrongpass")
assert final["status"] in {"completed", "failed"} assert final["status"] in {"completed", "failed", "awaiting_input"}
assert "error" in final["result"] or "output" in final["result"] assert "error" in final["result"] or "output" in final["result"]