From 4a84ada7703fc6b9b22938b1341bf4af589ddde4 Mon Sep 17 00:00:00 2001
From: mirivlad <mirvtop@yandex.ru>
Date: Wed, 20 May 2026 01:00:28 +0800
Subject: [PATCH] Replace repository with DuckLM runtime

---
 .gitignore                                    |   49 +-
 ARCHITECTURE.md                               |  347 ---
 CURRENT_STATE.md                              |  252 --
 Ducklm.md                                     | 2184 +++++++++++++++++
 EXPERIMENT.md                                 |  341 ---
 FOR_AI_REVIEW.md                              |  249 --
 IMPLEMENTATION_PLAN.md                        |  534 ----
 MVP_CHECKLIST.md                              |   83 -
 Makefile                                      |   51 +
 README.md                                     |   22 +
 TASK_3.md                                     | 1255 ----------
 app/__init__.py                               |    2 -
 app/api/__init__.py                           |    2 -
 app/api/server.py                             |  170 --
 app/api/static/favicon.ico                    |  Bin 16958 -> 0 bytes
 app/api/static/index.html                     | 1089 --------
 app/cli/__init__.py                           |    2 -
 app/core/__init__.py                          |    2 -
 app/core/async_router.py                      |  542 ----
 app/core/command_analyzer.py                  |   60 -
 app/core/config.py                            |   93 -
 app/core/context_builder.py                   |  172 --
 app/core/contracts.py                         |  148 --
 app/core/execution_engine.py                  |  975 --------
 app/core/execution_scheduler.py               |  212 --
 app/core/intent_parser.py                     |  104 -
 app/core/permission_resolution.py             |   24 -
 app/core/permission_service.py                |  370 ---
 app/events/__init__.py                        |    2 -
 app/events/event_bus.py                       |   35 -
 app/events/event_store.py                     |  122 -
 app/events/event_types.py                     |   35 -
 app/memory/__init__.py                        |   24 -
 app/memory/interface.py                       |  155 --
 app/memory/recall.py                          |  205 --
 app/memory/store.py                           |  185 --
 app/memory/vector_index.py                    |  149 --
 app/memory/write_policy.py                    |   98 -
 app/models/__init__.py                        |   32 -
 app/models/adapters.py                        |   72 -
 app/models/async_adapters.py                  |   58 -
 app/models/coder.py                           |   44 -
 app/models/critic.py                          |   44 -
 app/models/embeddings.py                      |   37 -
 app/models/orchestrator.py                    |   45 -
 app/permissions/__init__.py                   |    2 -
 app/permissions/approval_store.py             |   67 -
 app/runtime/__init__.py                       |    2 -
 app/runtime/async_runtime_loop.py             |  148 --
 app/runtime/runtime_controller.py             |  643 -----
 app/runtime/runtime_loop.py                   |  688 ------
 app/services/__init__.py                      |    2 -
 app/state/__init__.py                         |    2 -
 app/state/checkpoint_store.py                 |   75 -
 app/state/task_state_store.py                 |   77 -
 app/streaming/__init__.py                     |    2 -
 app/streaming/manager.py                      |   46 -
 app/tools/__init__.py                         |    2 -
 app/tools/base.py                             |   24 -
 app/tools/discover.py                         |   83 -
 app/tools/file_read.py                        |   26 -
 app/tools/file_write.py                       |   27 -
 app/tools/memory_tools.py                     |  123 -
 app/tools/plugins/file_read/__init__.py       |   35 -
 app/tools/plugins/file_read/manifest.json     |   10 -
 app/tools/plugins/file_write/__init__.py      |   33 -
 app/tools/plugins/file_write/manifest.json    |   11 -
 app/tools/plugins/memory_tools/__init__.py    |  112 -
 app/tools/plugins/memory_tools/manifest.json  |   22 -
 app/tools/plugins/shell_exec/__init__.py      |   55 -
 app/tools/plugins/shell_exec/manifest.json    |   12 -
 app/tools/registry.py                         |   61 -
 app/tools/sandbox.py                          |  139 --
 app/tools/shell_exec.py                       |   65 -
 config/models.json                            |   42 -
 config/models.json.backup                     |   42 -
 config/models.json.test                       |   42 -
 config/models.yaml                            |   53 +
 config/permissions.json                       |   94 -
 config/prompts.json                           |   15 -
 config/prompts/coder.md                       |    9 -
 config/prompts/critic.md                      |   14 -
 config/prompts/json_compiler.md               |   25 -
 config/prompts/orchestrator.md                |   34 -
 config/prompts/planning.md                    |   10 -
 config/prompts/sys_util.md                    |   41 -
 config/prompts/system.md                      |   14 -
 config/prompts/thinker.md                     |   36 -
 config/runtime.json                           |   42 -
 docker-compose.memory.yml                     |   11 +
 docs/architecture.md                          |    5 +
 docs/experience_learning.md                   |    9 +
 docs/how_to_run.md                            |   71 +
 docs/how_to_test.md                           |   15 +
 docs/local_llama_server.md                    |   44 +
 docs/memory_architecture.md                   |    5 +
 docs/model_roles.md                           |    7 +
 docs/performance_mtp.md                       |    5 +
 docs/plans/ui-bootstrap-review-plan.md        |   24 -
 docs/skills.md                                |    9 +
 .../plans/2026-05-19-ducklm-runtime.md        |   83 +
 docs/tool_gateway.md                          |    9 +
 docs/web_api.md                               |   25 +
 duck_core/__init__.py                         |    3 +
 duck_core/api.py                              |  348 +++
 .../approvals/__init__.py                     |    0
 duck_core/approvals/service.py                |  143 ++
 duck_core/config.py                           |   56 +
 duck_core/context_builder.py                  |   11 +
 .../.gitkeep => duck_core/events/__init__.py  |    0
 duck_core/events/store.py                     |   92 +
 .../experience/__init__.py                    |    0
 duck_core/experience/recorder.py              |  172 ++
 .../.gitkeep => duck_core/memory/__init__.py  |    0
 duck_core/memory/policy.py                    |   20 +
 duck_core/memory/vector_memory.py             |   70 +
 duck_core/model_client.py                     |  217 ++
 duck_core/reflection.py                       |   29 +
 duck_core/runtime_loop.py                     |  197 ++
 .../schemas/action_directive.schema.json      |   55 +
 .../.gitkeep => duck_core/skills/__init__.py  |    0
 duck_core/skills/registry.py                  |   68 +
 duck_core/tasks/__init__.py                   |    1 +
 duck_core/tasks/state.py                      |   12 +
 duck_core/tasks/store.py                      |  115 +
 duck_core/tools/__init__.py                   |    1 +
 duck_core/tools/base.py                       |   18 +
 duck_core/tools/file_read.py                  |   36 +
 duck_core/tools/file_write.py                 |   40 +
 duck_core/tools/gateway.py                    |   31 +
 duck_core/tools/paths.py                      |   13 +
 duck_core/tools/shell_exec_safe.py            |   95 +
 duck_core/web/static/app.js                   |  510 ++++
 duck_core/web/static/style.css                |  673 +++++
 duck_core/web/templates/approvals.html        |   22 +
 duck_core/web/templates/experience.html       |    2 +
 duck_core/web/templates/index.html            |   99 +
 duck_core/web/templates/memory.html           |    2 +
 duck_core/web/templates/skills.html           |    2 +
 duck_core/web/templates/task.html             |    2 +
 main.py                                       |    5 -
 prompts/roles/action.md                       |   16 +
 prompts/roles/coder.md                        |    2 +
 prompts/roles/critic.md                       |    2 +
 prompts/roles/summary.md                      |    1 +
 prompts/roles/thinker.md                      |    7 +
 pyproject.toml                                |   42 +-
 scripts/bench/bench_runtime.py                |   34 +
 scripts/llama/build_vulkan.sh                 |   19 +
 scripts/llama/healthcheck.sh                  |    8 +
 scripts/llama/start_main.sh                   |  260 ++
 .../llama/start_thinker_mtp_experimental.sh   |  117 +
 scripts/server.sh                             |   81 -
 scripts/verify/verify_basic_chat.sh           |   13 +
 scripts/verify/verify_experience.sh           |    5 +
 scripts/verify/verify_file_write_read.sh      |   14 +
 scripts/verify/verify_memory.sh               |    5 +
 scripts/verify/verify_models_roles.sh         |    5 +
 scripts/verify/verify_skills.sh               |    5 +
 scripts/verify/verify_tool_blocking.sh        |   14 +
 server.err                                    |  274 ---
 server.out                                    |  254 --
 server.pid                                    |    1 -
 skills/analyze_project/examples.md            |    5 +
 skills/analyze_project/notes.md               |    3 +
 skills/analyze_project/procedure.md           |    6 +
 skills/analyze_project/skill.yaml             |   23 +
 test_ducklm.py                                |  314 ---
 test_ducklm_direct.py                         |  409 ---
 tests/smoke/test_action_directive_schema.py   |   16 +
 tests/smoke/test_api_health.py                |   25 +
 tests/smoke/test_api_stream_chat.py           |  103 +
 tests/smoke/test_approvals.py                 |   18 +
 tests/smoke/test_chat_api.py                  |   96 +
 tests/smoke/test_event_log.py                 |   25 +
 tests/smoke/test_experience_recorder.py       |   24 +
 tests/smoke/test_llama_server_connection.py   |   13 +
 tests/smoke/test_llama_service_script.py      |   57 +
 tests/smoke/test_model_client.py              |   92 +
 tests/smoke/test_models_config.py             |   16 +
 tests/smoke/test_runtime_reasoning.py         |   37 +
 tests/smoke/test_runtime_tools.py             |  112 +
 tests/smoke/test_skill_registry.py            |    9 +
 tests/smoke/test_tool_gateway.py              |   42 +
 tests/smoke/test_vector_memory.py             |   11 +
 tests/test_api_handlers.py                    |  122 -
 tests/test_command_analyzer.py                |   46 -
 tests/test_contracts.py                       |   67 -
 tests/test_runtime_loop.py                    |   38 -
 tests/test_tools_flow.py                      |  489 ----
 190 files changed, 7060 insertions(+), 13602 deletions(-)
 delete mode 100644 ARCHITECTURE.md
 delete mode 100644 CURRENT_STATE.md
 create mode 100644 Ducklm.md
 delete mode 100644 EXPERIMENT.md
 delete mode 100644 FOR_AI_REVIEW.md
 delete mode 100644 IMPLEMENTATION_PLAN.md
 delete mode 100644 MVP_CHECKLIST.md
 create mode 100644 Makefile
 create mode 100644 README.md
 delete mode 100644 TASK_3.md
 delete mode 100644 app/__init__.py
 delete mode 100644 app/api/__init__.py
 delete mode 100644 app/api/server.py
 delete mode 100644 app/api/static/favicon.ico
 delete mode 100644 app/api/static/index.html
 delete mode 100644 app/cli/__init__.py
 delete mode 100644 app/core/__init__.py
 delete mode 100644 app/core/async_router.py
 delete mode 100644 app/core/command_analyzer.py
 delete mode 100644 app/core/config.py
 delete mode 100644 app/core/context_builder.py
 delete mode 100644 app/core/contracts.py
 delete mode 100644 app/core/execution_engine.py
 delete mode 100644 app/core/execution_scheduler.py
 delete mode 100644 app/core/intent_parser.py
 delete mode 100644 app/core/permission_resolution.py
 delete mode 100644 app/core/permission_service.py
 delete mode 100644 app/events/__init__.py
 delete mode 100644 app/events/event_bus.py
 delete mode 100644 app/events/event_store.py
 delete mode 100644 app/events/event_types.py
 delete mode 100644 app/memory/__init__.py
 delete mode 100644 app/memory/interface.py
 delete mode 100644 app/memory/recall.py
 delete mode 100644 app/memory/store.py
 delete mode 100644 app/memory/vector_index.py
 delete mode 100644 app/memory/write_policy.py
 delete mode 100644 app/models/__init__.py
 delete mode 100644 app/models/adapters.py
 delete mode 100644 app/models/async_adapters.py
 delete mode 100644 app/models/coder.py
 delete mode 100644 app/models/critic.py
 delete mode 100644 app/models/embeddings.py
 delete mode 100644 app/models/orchestrator.py
 delete mode 100644 app/permissions/__init__.py
 delete mode 100644 app/permissions/approval_store.py
 delete mode 100644 app/runtime/__init__.py
 delete mode 100644 app/runtime/async_runtime_loop.py
 delete mode 100644 app/runtime/runtime_controller.py
 delete mode 100644 app/runtime/runtime_loop.py
 delete mode 100644 app/services/__init__.py
 delete mode 100644 app/state/__init__.py
 delete mode 100644 app/state/checkpoint_store.py
 delete mode 100644 app/state/task_state_store.py
 delete mode 100644 app/streaming/__init__.py
 delete mode 100644 app/streaming/manager.py
 delete mode 100644 app/tools/__init__.py
 delete mode 100644 app/tools/base.py
 delete mode 100644 app/tools/discover.py
 delete mode 100644 app/tools/file_read.py
 delete mode 100644 app/tools/file_write.py
 delete mode 100644 app/tools/memory_tools.py
 delete mode 100644 app/tools/plugins/file_read/__init__.py
 delete mode 100644 app/tools/plugins/file_read/manifest.json
 delete mode 100644 app/tools/plugins/file_write/__init__.py
 delete mode 100644 app/tools/plugins/file_write/manifest.json
 delete mode 100644 app/tools/plugins/memory_tools/__init__.py
 delete mode 100644 app/tools/plugins/memory_tools/manifest.json
 delete mode 100644 app/tools/plugins/shell_exec/__init__.py
 delete mode 100644 app/tools/plugins/shell_exec/manifest.json
 delete mode 100644 app/tools/registry.py
 delete mode 100644 app/tools/sandbox.py
 delete mode 100644 app/tools/shell_exec.py
 delete mode 100644 config/models.json
 delete mode 100644 config/models.json.backup
 delete mode 100644 config/models.json.test
 create mode 100644 config/models.yaml
 delete mode 100644 config/permissions.json
 delete mode 100644 config/prompts.json
 delete mode 100644 config/prompts/coder.md
 delete mode 100644 config/prompts/critic.md
 delete mode 100644 config/prompts/json_compiler.md
 delete mode 100644 config/prompts/orchestrator.md
 delete mode 100644 config/prompts/planning.md
 delete mode 100644 config/prompts/sys_util.md
 delete mode 100644 config/prompts/system.md
 delete mode 100644 config/prompts/thinker.md
 delete mode 100644 config/runtime.json
 create mode 100644 docker-compose.memory.yml
 create mode 100644 docs/architecture.md
 create mode 100644 docs/experience_learning.md
 create mode 100644 docs/how_to_run.md
 create mode 100644 docs/how_to_test.md
 create mode 100644 docs/local_llama_server.md
 create mode 100644 docs/memory_architecture.md
 create mode 100644 docs/model_roles.md
 create mode 100644 docs/performance_mtp.md
 delete mode 100644 docs/plans/ui-bootstrap-review-plan.md
 create mode 100644 docs/skills.md
 create mode 100644 docs/superpowers/plans/2026-05-19-ducklm-runtime.md
 create mode 100644 docs/tool_gateway.md
 create mode 100644 docs/web_api.md
 create mode 100644 duck_core/__init__.py
 create mode 100644 duck_core/api.py
 rename data/.gitkeep => duck_core/approvals/__init__.py (100%)
 create mode 100644 duck_core/approvals/service.py
 create mode 100644 duck_core/config.py
 create mode 100644 duck_core/context_builder.py
 rename data/events/.gitkeep => duck_core/events/__init__.py (100%)
 create mode 100644 duck_core/events/store.py
 rename data/memory/.gitkeep => duck_core/experience/__init__.py (100%)
 create mode 100644 duck_core/experience/recorder.py
 rename data/permissions/.gitkeep => duck_core/memory/__init__.py (100%)
 create mode 100644 duck_core/memory/policy.py
 create mode 100644 duck_core/memory/vector_memory.py
 create mode 100644 duck_core/model_client.py
 create mode 100644 duck_core/reflection.py
 create mode 100644 duck_core/runtime_loop.py
 create mode 100644 duck_core/schemas/action_directive.schema.json
 rename data/state/.gitkeep => duck_core/skills/__init__.py (100%)
 create mode 100644 duck_core/skills/registry.py
 create mode 100644 duck_core/tasks/__init__.py
 create mode 100644 duck_core/tasks/state.py
 create mode 100644 duck_core/tasks/store.py
 create mode 100644 duck_core/tools/__init__.py
 create mode 100644 duck_core/tools/base.py
 create mode 100644 duck_core/tools/file_read.py
 create mode 100644 duck_core/tools/file_write.py
 create mode 100644 duck_core/tools/gateway.py
 create mode 100644 duck_core/tools/paths.py
 create mode 100644 duck_core/tools/shell_exec_safe.py
 create mode 100644 duck_core/web/static/app.js
 create mode 100644 duck_core/web/static/style.css
 create mode 100644 duck_core/web/templates/approvals.html
 create mode 100644 duck_core/web/templates/experience.html
 create mode 100644 duck_core/web/templates/index.html
 create mode 100644 duck_core/web/templates/memory.html
 create mode 100644 duck_core/web/templates/skills.html
 create mode 100644 duck_core/web/templates/task.html
 delete mode 100644 main.py
 create mode 100644 prompts/roles/action.md
 create mode 100644 prompts/roles/coder.md
 create mode 100644 prompts/roles/critic.md
 create mode 100644 prompts/roles/summary.md
 create mode 100644 prompts/roles/thinker.md
 create mode 100644 scripts/bench/bench_runtime.py
 create mode 100755 scripts/llama/build_vulkan.sh
 create mode 100755 scripts/llama/healthcheck.sh
 create mode 100755 scripts/llama/start_main.sh
 create mode 100755 scripts/llama/start_thinker_mtp_experimental.sh
 delete mode 100755 scripts/server.sh
 create mode 100755 scripts/verify/verify_basic_chat.sh
 create mode 100755 scripts/verify/verify_experience.sh
 create mode 100755 scripts/verify/verify_file_write_read.sh
 create mode 100755 scripts/verify/verify_memory.sh
 create mode 100755 scripts/verify/verify_models_roles.sh
 create mode 100755 scripts/verify/verify_skills.sh
 create mode 100755 scripts/verify/verify_tool_blocking.sh
 delete mode 100644 server.err
 delete mode 100644 server.out
 delete mode 100644 server.pid
 create mode 100644 skills/analyze_project/examples.md
 create mode 100644 skills/analyze_project/notes.md
 create mode 100644 skills/analyze_project/procedure.md
 create mode 100644 skills/analyze_project/skill.yaml
 delete mode 100755 test_ducklm.py
 delete mode 100644 test_ducklm_direct.py
 create mode 100644 tests/smoke/test_action_directive_schema.py
 create mode 100644 tests/smoke/test_api_health.py
 create mode 100644 tests/smoke/test_api_stream_chat.py
 create mode 100644 tests/smoke/test_approvals.py
 create mode 100644 tests/smoke/test_chat_api.py
 create mode 100644 tests/smoke/test_event_log.py
 create mode 100644 tests/smoke/test_experience_recorder.py
 create mode 100644 tests/smoke/test_llama_server_connection.py
 create mode 100644 tests/smoke/test_llama_service_script.py
 create mode 100644 tests/smoke/test_model_client.py
 create mode 100644 tests/smoke/test_models_config.py
 create mode 100644 tests/smoke/test_runtime_reasoning.py
 create mode 100644 tests/smoke/test_runtime_tools.py
 create mode 100644 tests/smoke/test_skill_registry.py
 create mode 100644 tests/smoke/test_tool_gateway.py
 create mode 100644 tests/smoke/test_vector_memory.py
 delete mode 100644 tests/test_api_handlers.py
 delete mode 100644 tests/test_command_analyzer.py
 delete mode 100644 tests/test_contracts.py
 delete mode 100644 tests/test_runtime_loop.py
 delete mode 100644 tests/test_tools_flow.py

diff --git a/.gitignore b/.gitignore
index 5000599..3ec2920 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,41 +1,22 @@
-# Python
-__pycache__/
-*.py[cod]
-*.pyo
-.pytest_cache/
-.mypy_cache/
-.ruff_cache/
-
-# Virtual environments
-.venv/
-venv/
-env/
-
-# Local environment and secrets
 .env
 .env.*
 !.env.example
-config/.env
-config/.env.*
-*.pem
-*.key
+.venv/
+__pycache__/
+*.py[cod]
+.pytest_cache/
+.ruff_cache/
+*.egg-info/
 
-# Local models and embeddings
-/models/
-*.gguf
-*.safetensors
-*.bin
+data/
+workspace/
+models/
+vendor/
 
-# Runtime state
-data/**/*.sqlite3
-data/**/*.sqlite3-*
-data/runtime/*.json
-data/runtime/*.pid
-data/runtime/*.log
-logs/
 *.log
+*.pid
+*.sqlite3
+*.db
 
-# OS/editor
-.DS_Store
-.idea/
-.vscode/
+dist/
+build/
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
deleted file mode 100644
index 139ca70..0000000
--- a/ARCHITECTURE.md
+++ /dev/null
@@ -1,347 +0,0 @@
-# ARCHITECTURE
-
-Этот документ фиксирует целевую архитектуру `ducklm` как локального event-driven multi-model execution runtime.
-
-`TASK_3.md` — это директива для ИИ-кодера.
-`ARCHITECTURE.md` — это короткая инженерная карта системы: что является ядром, какие есть слои, как течёт управление, где принимаются решения, а где только исполняются переходы.
-
-## 1. Core Principle
-
-Система строится вокруг `Runtime Loop Controller`.
-
-Центр системы:
-
-- не `router`
-- не `orchestrator`
-- не `execution engine`
-
-Центр системы:
-
-- `runtime loop`
-
-Именно он замыкает жизненный цикл задачи:
-
-```text
-task
-  -> state load/create
-  -> context build
-  -> orchestration decision
-  -> plan/directive
-  -> execution
-  -> critic
-  -> memory policy
-  -> checkpoint
-  -> next step / complete / fail
-```
-
-## 2. Layer Model
-
-Целевая форма системы:
-
-```text
-Client / CLI / API
-        |
-        v
-Runtime Loop Controller
-        |
-        +--> State Store / Checkpoints
-        +--> Context Builder
-        +--> Router
-        +--> Orchestrator / Planner
-        +--> Execution Engine / Scheduler
-        |         |
-        |         +--> Tool Layer
-        |         +--> Coder
-        |
-        +--> Critic
-        +--> Memory Write Policy
-        +--> Memory Store + Vector Index
-        +--> Event Bus + Event Store
-        +--> Streaming Projection
-```
-
-Принцип:
-
-- `runtime loop` координирует
-- `router` рекомендует
-- `orchestrator` думает
-- `execution engine` исполняет
-- `tools/coder` делают работу
-- `critic` оценивает
-- `memory policy` решает запись
-- `event bus` фиксирует историю
-- `state store` даёт resume
-
-## 3. Responsibility Boundaries
-
-### Runtime Loop Controller
-
-Отвечает за:
-
-- task lifecycle
-- state transitions
-- вызов компонентов в правильном порядке
-- применение decision objects
-- checkpointing
-- completion / failure path
-
-Не отвечает за:
-
-- policy reasoning
-- raw tool execution
-- prompt assembly inline
-
-### Router
-
-Это `policy evaluator + decision suggester`.
-
-Контракт:
-
-```text
-(input state + assembled context) -> ExecutionDirective
-```
-
-Свойства:
-
-- pure function
-- no side effects
-- no tool execution
-- no state mutation
-
-### Orchestrator / Planner
-
-Отвечает за:
-
-- orchestration reasoning
-- deciding whether planning is needed
-- generating plan JSON
-- returning structured directives
-
-Не отвечает за:
-
-- execution
-- direct state mutation
-- tool invocation
-
-### Execution Engine / Scheduler
-
-Отвечает за:
-
-- step scheduling
-- task graph traversal
-- step execution coordination
-- calling tool/coder adapters
-- reporting structured results
-
-Не отвечает за:
-
-- ownership of global lifecycle
-- high-level policy
-
-### Critic
-
-Отвечает за:
-
-- evaluation of tool/coder outputs
-- returning structured scores and explanation
-
-Не отвечает за:
-
-- final memory write decision
-- execution retry policy
-
-### Memory Write Policy
-
-Отвечает за:
-
-- deterministic decision about storing memory
-- dedup / merge / skip behavior
-
-Не отвечает за:
-
-- semantic retrieval
-- critic scoring
-
-## 4. Decision Model
-
-Все decision-producing components должны возвращать структурированные объекты.
-
-Базовый контракт:
-
-```json
-{
-  "type": "plan|tool|coder|respond|replan|store_memory|request_permission|complete|fail|noop",
-  "payload": {},
-  "requires_permission": false,
-  "confidence": 0.0,
-  "reason": "string"
-}
-```
-
-Это главный антихаосный инвариант системы.
-
-Следствие:
-
-- компоненты не исполняют решения напрямую
-- компоненты не мутируют state напрямую
-- runtime loop применяет решения и переводит систему дальше
-
-## 5. Execution Flow
-
-Нормальный путь выполнения:
-
-1. Клиент отправляет task.
-2. Runtime loop создаёт или загружает task state.
-3. Публикуется `task_received`.
-4. Context builder собирает execution context.
-5. Router возвращает decision object.
-6. Orchestrator возвращает direct action или plan.
-7. План валидируется и преобразуется в task graph.
-8. Execution engine выбирает следующий шаг.
-9. Tool или coder исполняет шаг через adapter.
-10. Result возвращается в runtime loop.
-11. Critic возвращает evaluation suggestion.
-12. Memory policy возвращает decision по записи.
-13. State checkpoint сохраняется.
-14. Event bus фиксирует события.
-15. Runtime loop выбирает `continue / replan / complete / fail`.
-
-## 6. Task Graph Model
-
-Внешний planner может вернуть список шагов.
-
-Внутри runtime план должен жить как task graph:
-
-```json
-{
-  "nodes": [
-    {
-      "id": "step-1",
-      "kind": "tool",
-      "tool": "shell_exec",
-      "args": {"command": "hostnamectl"},
-      "depends_on": []
-    }
-  ]
-}
-```
-
-Сейчас допускается sequential DAG execution.
-В будущем это даёт путь к parallel scheduling без переписывания модели.
-
-## 7. Event Backbone
-
-Система event-driven.
-
-`EventBus` нужен не только для стриминга, а как внутренняя хребтовая шина.
-
-Минимальные свойства:
-
-- ordering per task
-- monotonic sequence per task
-- durable append to event store
-- replay capability
-- consumer idempotency
-
-Минимальная модель доставки:
-
-- `at least once`
-
-Правило идемпотентности:
-
-- событие дедуплицируется по `task_id + sequence`
-
-Streaming layer — это projection от event bus, а не источник правды.
-
-## 8. State Persistence
-
-Так как runtime задуман как long-running autonomous system, in-memory lifecycle недостаточен.
-
-Нужны:
-
-- task state store
-- checkpoint store
-- resume from crash/restart
-
-Минимальная стратегия:
-
-- checkpoint after critical transitions
-- latest valid checkpoint is resumable
-
-Primary choice для MVP:
-
-- `SQLite`
-
-## 9. Async and Isolation
-
-LLM loop не должен блокироваться долгими tool operations.
-
-Поэтому нужны:
-
-- async execution adapters
-- timeout wrappers
-- cancellation handling
-- bounded concurrency
-
-Для опасных или тяжёлых операций нужен отдельный sandbox layer.
-
-Особенно для:
-
-- `shell_exec`
-- browser/web fallback
-- generated helper scripts
-
-## 10. Memory Architecture
-
-Memory — отдельная подсистема хранения, а не JSON dump.
-
-Рекомендуемая форма:
-
-- metadata store: `SQLite`
-- vector index: `FAISS` или `hnswlib`
-
-Два разных процесса:
-
-- retrieval
-- write decision
-
-Это специально разделено.
-
-`critic` только оценивает.
-`memory write policy` принимает финальное решение.
-
-Минимальная логика записи должна быть детерминированной:
-
-```text
-(critic_score + memory_type + runtime_weight + dedup_state + safety_state) -> decision
-```
-
-## 11. Failure Model
-
-Система должна быть устойчивой к частичным сбоям.
-
-Ожидаемые controlled failure paths:
-
-- invalid planner output -> replan or fail
-- tool timeout -> retry or fail
-- critic failure -> fallback policy
-- memory failure -> skip write and continue where safe
-- streaming failure -> sync fallback
-
-Главный принцип:
-
-- subsystem failure не должен автоматически означать runtime collapse
-
-## 12. Why This Shape
-
-Эта архитектура нужна, чтобы система не деградировала в один из плохих вариантов:
-
-- `router-god-object`
-- `runtime loop with hidden policy logic`
-- `LLM that directly executes tools`
-- `streaming instead of event model`
-- `critic as memory authority`
-- `in-memory only autonomous runtime`
-
-Если держать эти границы жёстко, проект остаётся расширяемым.
-Если границы размыть, система быстро превратится в трудноотлаживаемый procedural agent.
diff --git a/CURRENT_STATE.md b/CURRENT_STATE.md
deleted file mode 100644
index b362368..0000000
--- a/CURRENT_STATE.md
+++ /dev/null
@@ -1,252 +0,0 @@
-# DuckLM — Текущее состояние проекта
-
-## 1. Что это
-
-DuckLM — локальный event-driven multi-model AI agent runtime. Система принимает пользовательскую задачу, извлекает релевантную память, собирает контекст, принимает orchestration-решение, при необходимости строит план, исполняет шаги через tools и coder, оценивает результаты через critic, сохраняет полезное в долговременную память, публикует события и поддерживает streaming клиенту.
-
-**Ключевой принцип:** центр системы — `RuntimeLoop`. Все execution transitions проходят через него. Router, Orchestrator, ExecutionEngine — decision-producing компоненты, которые только возвращают структурированные объекты (ExecutionDirective), но не исполняют действия напрямую.
-
-## 2. Архитектура
-
-```
-Client / CLI / API
-        │
-        ▼
-RuntimeLoop (runtime_loop.py)
-        │
-        ├── State Store / Checkpoints (SQLite)
-        ├── ContextBuilder
-        ├── AsyncRouter (Thinker → JSON Compiler)
-        ├── ExecutionEngine / ExecutionScheduler
-        │       ├── ToolRegistry / ToolSandbox
-        │       ├── CoderAdapter
-        │       └── CriticAdapter
-        ├── PermissionService
-        ├── MemoryRecallService
-        ├── MemoryWritePolicy
-        ├── MemoryInterface (SQLite + hnswlib)
-        └── EventBus → SQLiteEventStore
-                │
-                ▼
-        StreamingManager → WebSocket
-```
-
-## 3. Структура проекта
-
-```
-ducklm/
-  main.py                          # Точка входа (импорт app.api.server.app)
-  app/
-    api/
-      server.py                    # FastAPI: POST /chat, WS /stream, GET /health, etc.
-      static/index.html            # Веб-чат (dark theme, Enter=отправить, Shift+Enter=новая строка)
-    cli/__init__.py                # Пока пустой
-    core/
-      contracts.py                 # Pydantic модели: UserTask, PlanStep, ToolResult, CriticScore, ...
-      config.py                    # AppConfig, load_app_config()
-      async_router.py              # AsyncRouter: Thinker + JSON Compiler pipeline
-      context_builder.py           # ContextBuilder: сборка контекста с бюджетами
-      execution_engine.py          # ExecutionEngine: исполнение plan/tool/respond/coder
-      execution_scheduler.py       # ExecutionScheduler: парсинг плана, граф задач, цикл выполнения
-      intent_parser.py             # IntentParser: извлечение tool intents из текста
-      permission_service.py        # PermissionService: проверка и разрешений команд
-      permission_resolution.py     # Pydantic модели для API разрешений
-    events/
-      event_bus.py                 # EventBus: per-task ordered publishing
-      event_store.py               # SQLiteEventStore: append-only log
-      event_types.py               # Константы типов событий
-    memory/
-      interface.py                 # MemoryInterface: insert/search/get/delete/reindex/cleanup
-      store.py                     # MemoryStore: SQLite хранение MemoryEntry + embeddings
-      vector_index.py              # VectorIndex: hnswlib L2 index
-      recall.py                    # MemoryRecallService: LLM-based решение о необходимости recall
-      write_policy.py              # MemoryWritePolicy: детерминированное решение о записи
-    models/
-      adapters.py                  # create_adapter/create_llama_adapter (llama-cpp-python)
-      async_adapters.py            # AsyncOrchestratorAdapter, AsyncCoderAdapter, AsyncCriticAdapter
-      orchestrator.py              # OrchestratorAdapter: обёртка над Llama
-      coder.py                     # CoderAdapter
-      critic.py                    # CriticAdapter
-      embeddings.py                # EmbeddingsAdapter (sentence-transformers)
-    permissions/
-      approval_store.py            # SQLiteApprovalStore
-    runtime/
-      runtime_loop.py              # RuntimeLoop: центральный цикл (sync)
-      async_runtime_loop.py        # AsyncRuntimeLoop: альтернативная async версия
-      runtime_controller.py        # RuntimeController: composition root, инициализация всего
-    services/__init__.py           # Пустой
-    state/
-      task_state_store.py          # SQLiteTaskStateStore
-      checkpoint_store.py          # SQLiteCheckpointStore
-    streaming/
-      manager.py                   # StreamingManager: подписка на события → WebSocket
-    tools/
-      base.py, registry.py, sandbox.py, discover.py
-      shell_exec.py, file_read.py, file_write.py, memory_tools.py
-      plugins/                     # Plugin discovery: shell_exec, file_read, file_write, memory_tools
-  config/
-    models.json                    # Конфигурация моделей
-    runtime.json                   # Таймауты, retry limits, context budgets
-    permissions.json               # Категории команд, пути
-    prompts/                       # Markdown промпты для каждой роли
-      thinker.md, json_compiler.md, coder.md, critic.md, sys_util.md, orchestrator.md, planning.md, system.md
-  data/
-    events/events.sqlite3          # Event store
-    state/task_state.sqlite3       # Task state
-    state/checkpoints.sqlite3      # Checkpoints
-    permissions/approvals.sqlite3  # Permission cache
-    memory/memory.sqlite3           # Memory store
-    memory/index.bin               # Vector index
-  models/                          # GGUF модели и sentence-transformers
-  tests/
-    test_contracts.py              # 6 тестов: контракты, router
-    test_runtime_loop.py           # 2 теста: runtime loop events, permission flow
-    test_tools_flow.py             # 7 тестов: file read/write, shell, recovery, permissions
-    test_api_handlers.py           # 6 тестов: health, events, chat, permissions, feedback
-```
-
-## 4. Модели и их роли
-
-| Роль | Модель | Backend | Конфиг |
-|------|--------|---------|--------|
-| Thinker (orchestrator) | Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf | vulkan (GPU) | max_tokens=2048, temp=0.3 |
-| JSON Compiler | gemma-4-E4B-it-Q4_K_M.gguf | cpu | max_tokens=1024, temp=0.1 |
-| Critic | gemma-4-E4B-it-Q4_K_M.gguf (shared с compiler) | cpu | max_tokens=1024, temp=0.1 |
-| Coder | X-Coder-SFT-Qwen3-8B.Q6_K.gguf | cpu | max_tokens=2048, temp=0.2 |
-| Sys Utility | Menlo_Lucy-Q4_K_M.gguf | cpu | max_tokens=1024, temp=0.1 |
-| Embeddings | all-MiniLM-L6-v2 (sentence-transformers) | — | dim=384 |
-
-**Важно:** Critic и JSON Compiler используют одну и ту же модель (gemma-4B), но разные экземпляры адаптеров. Модели не дублируются в памяти — используется кэширование через `_get_or_create_llm()` с ключом (path, backend, n_gpu_layers, n_ctx).
-
-## 5. Конфигурация
-
-Все настройки в `config/`:
-- **models.json** — пути к GGUF файлам, backend, GPU layers, max_tokens, temperature
-- **runtime.json** — таймауты (step=30s, task=5min), retry limits, context budgets, retrieval_top_k
-- **permissions.json** — hard_stop команды (rm -rf /, dd, mkfs), no_always команды (shutdown, killall), normal команды
-- **prompts/*.md** — системные промпты для каждой роли модели
-
-## 6. API
-
-FastAPI сервер на порту 8000 (`scripts/server.sh`):
-
-| Метод | Путь | Описание |
-|-------|------|----------|
-| GET | `/` | Веб-чат (index.html) |
-| GET | `/health` | Health check |
-| GET | `/events` | Список последних событий |
-| POST | `/chat` | Отправить задачу (UserTask) → получить результат |
-| POST | `/permissions/resolve` | Разрешить/запретить команду |
-| POST | `/secrets/resolve` | Передать sudo-пароль |
-| POST | `/password/resolve` | Передать пароль (альтернативный путь) |
-| POST | `/critic/feedback` | Обратная связь от пользователя |
-| WS | `/stream/{task_id}` | Streaming событий по задаче |
-
-## 7. Поток выполнения задачи
-
-1. Клиент → POST /chat → `RuntimeController.handle_task()`
-2. `RuntimeLoop.run_task()`:
-   - Проверка hard-stop команд через PermissionService
-   - Создание task state в SQLiteTaskStateStore
-   - Публикация TASK_RECEIVED
-   - Checkpoint: received
-   - ContextBuilder.build() — сборка контекста (memory, tools, budgets)
-   - MemoryRecallService.recall() — LLM решает, нужно ли искать в памяти
-   - AsyncRouter.decide() — Thinker → JSON Compiler → ExecutionDirective
-   - ExecutionEngine.execute() — исполнение directive:
-     - plan → парсинг шагов → граф → последовательное выполнение
-     - tool → проверка разрешений → ToolSandbox → ToolResult
-     - respond → прямой ответ
-     - coder → CoderAdapter
-   - Critic оценка каждого шага (correctness, usefulness, safety)
-   - Recovery при неудачных шагах (retry/continue/respond/fail)
-   - MemoryWritePolicy — решение о записи в долговременную память
-   - Checkpoint: final state
-   - Публикация TASK_COMPLETED / TASK_FAILED / TASK_AWAITING_PERMISSION
-3. Результат возвращается клиенту + события доступны через WebSocket
-
-## 8. Что реализовано и работает
-
-### Core (полностью)
-- [x] Модульная структура проекта (app/, config/, data/, tests/)
-- [x] Typed contracts (Pydantic модели для всех сущностей)
-- [x] RuntimeLoop — центральный цикл
-- [x] RuntimeController — composition root
-- [x] EventBus + SQLiteEventStore (append-only, per-task ordering)
-- [x] TaskStateStore + CheckpointStore (SQLite)
-- [x] ContextBuilder с token budgets
-- [x] AsyncRouter: Thinker → JSON Compiler pipeline с retry и JSON fix
-- [x] IntentParser: извлечение tool intents из естественного языка
-- [x] ExecutionEngine: plan/tool/respond/coder/fail
-- [x] ExecutionScheduler: парсинг плана, DAG граф, cycle detection
-- [x] PermissionService: hard_stop/no_always/normal категории, кэш разрешений
-- [x] ToolSandbox: timeout, cwd restrictions
-- [x] ToolRegistry + Plugin Discovery
-- [x] Tools: shell_exec, file_read, file_write, memory_insert/search/list
-- [x] CriticAdapter с retry и recovery (continue/retry/respond/fail)
-- [x] MemoryInterface: SQLite + hnswlib vector index
-- [x] MemoryRecallService: LLM-based решение о необходимости recall
-- [x] MemoryWritePolicy: детерминированное решение о записи
-- [x] EmbeddingsAdapter (sentence-transformers)
-- [x] FastAPI API: /chat, /health, /events, /permissions/resolve, /secrets/resolve, /critic/feedback
-- [x] WebSocket streaming (/stream/{task_id})
-- [x] Веб-чат (dark theme, Enter=отправить, Shift+Enter=новая строка, панель событий, permission controls, feedback dialog)
-- [x] 21 тест (все проходят)
-
-### Известные баги (исправлены)
-- RECALL_PROMPT_TEMPLATE format string escaping — фигурные скобки в JSON примерах нужно двоить
-- VectorIndex._get_memory_id возвращал неправильный ID (hash вместо хранения mapping)
-- recall_model по умолчанию был sys_util, изменён на json_compiler
-
-## 9. Что ещё нужно сделать
-
-### Приоритет 1 — Доработка до полного MVP
-- [ ] **Resume из checkpoint** — после падения/перезапуска восстанавливать задачу из последнего checkpoint
-- [ ] **CLI интерфейс** — отправка задач, просмотр событий, поиск в памяти из терминала (app/cli/ пока пустой)
-- [ ] **Structured logging** — вместо print() использовать logging с форматированием
-- [ ] **WS /stream** — доработать (сейчас базово работает, но нет подписки на новые события в реальном времени при длительных задачах)
-
-### Приоритет 2 — Улучшения
-- [ ] **Retry/recovery policy** — более надёжная обработка ошибок tool execution
-- [ ] **Replay из event store** — воспроизведение истории задачи для отладки
-- [ ] **Параллельное выполнение шагов** — сейчас только sequential DAG, можно добавить parallel для независимых шагов
-- [ ] **Веб-чат: отображение streaming ответа** — сейчас ответ приходит целиком, можно добавить потоковую передачу
-- [ ] **Веб-чат: отображение tool output** — более красивый рендер результатов shell/file операций
-- [ ] **Memory cleanup** — автоматическая очистка старых/низко-весовых записей (базовая логика есть в MemoryInterface.cleanup, но не вызывается автоматически)
-
-### Приоритет 3 — Расширения
-- [ ] **web_search / web_fetch tools** — второй приоритет по TASK_3.md
-- [ ] **Telegram bot stub** — thin клиент для удалённого управления
-- [ ] **Coder integration в план** — пока coder adapter есть, но не интегрирован в планирование как отдельный step kind
-- [ ] **Модели: загрузка при старте** — load_models_at_startup() вызывается из lifespan, но если модели не загружены, runtime работает в fallback mode (respond only)
-- [ ] **Документация API** — OpenAPI схема генерируется FastAPI, но можно добавить примеры
-
-## 10. Запуск
-
-```bash
-cd ~/git/ducklm
-./scripts/server.sh
-# или
-uvicorn main:app --host 0.0.0.0 --port 8000
-```
-
-Веб-чат: http://localhost:8000/
-
-## 11. Тестирование
-
-```bash
-cd ~/git/ducklm
-python -m pytest tests/ -v
-```
-
-21 тест, все проходят. Покрытие: контракты, runtime loop, tool flow, API handlers.
-
-## 12. Технологии
-
-- **Python 3.13**, FastAPI, uvicorn, websockets
-- **llama-cpp-python** — локальный инференс GGUF моделей (Vulkan/CPU)
-- **sentence-transformers** — эмбеддинги (all-MiniLM-L6-v2)
-- **hnswlib** — векторный поиск (L2 метрика)
-- **SQLite** — event store, task state, checkpoints, memory, permissions
-- **Pydantic** — все контракты
-- **pytest** — тестирование
diff --git a/Ducklm.md b/Ducklm.md
new file mode 100644
index 0000000..6c911b9
--- /dev/null
+++ b/Ducklm.md
@@ -0,0 +1,2184 @@
+# DuckLM — техническое задание на разработку локальной агентной системы
+
+## 0. Назначение проекта
+
+`DuckLM` — локальная агентная система, которая работает как самостоятельный runtime поверх локальных языковых моделей.
+
+Система должна уметь:
+
+- принимать сообщения от человека через WebChat;
+- принимать задачи от внешних агентов и тестов через HTTP API;
+- использовать локальные LLM через `llama-server`;
+- вести состояние задач;
+- записывать события выполнения;
+- безопасно запускать инструменты;
+- работать с навыками;
+- сохранять опыт;
+- использовать память;
+- анализировать собственные ошибки;
+- постепенно улучшать поведение через опыт и предложения по обновлению навыков.
+
+Главная идея:
+
+```text
+DuckLM — это не inference server.
+
+DuckLM — это когнитивный runtime:
+состояние → контекст → мышление → намерение → действие → наблюдение → рефлексия → память → опыт.
+```
+
+---
+
+# 1. Архитектурные принципы
+
+## 1.1. Использовать готовые компоненты
+
+DuckLM должна использовать готовые решения там, где это разумно.
+
+```text
+llama-server        → inference
+SQLite/PostgreSQL   → события, задачи, approvals, experience records
+Qdrant              → semantic memory
+FastAPI             → HTTP API
+WebChat             → интерфейс человека
+ToolGateway         → безопасный запуск инструментов
+Duck Core           → когнитивный цикл
+```
+
+Не писать с нуля:
+
+- LLM inference server;
+- model scheduler;
+- vector database;
+- OpenAI-compatible API;
+- MCP-протокол;
+- production-grade sandbox;
+- сложный workflow engine;
+- бесконечный JSON repair loop.
+
+Писать с нуля:
+
+- Duck Core;
+- ModelClient;
+- ContextBuilder;
+- RuntimeLoop;
+- EventStore;
+- TaskStore;
+- ToolGateway;
+- ApprovalService;
+- SkillRegistry;
+- ExperienceRecorder;
+- MemoryPolicy;
+- FastAPI API;
+- WebChat;
+- verification scripts;
+- smoke tests;
+- документацию.
+
+---
+
+## 1.2. Web/API first
+
+Основные интерфейсы:
+
+```text
+WebChat  → для человека
+HTTP API → для кодера, тестов и внешних агентов
+```
+
+CLI в обязательную часть не входит.
+
+Если позже понадобится CLI, он должен быть тонким клиентом поверх HTTP API.
+
+---
+
+## 1.3. Роли моделей логические
+
+Роли моделей:
+
+```text
+thinker
+critic
+coder
+action
+recall
+summary
+sys_util
+```
+
+являются логическими ролями, а не обязательно разными физическими моделями.
+
+Одна физическая модель может использоваться сразу для всех ролей:
+
+```text
+thinker = local-main
+critic  = local-main
+coder   = local-main
+action  = local-main
+recall  = local-main
+summary = local-main
+```
+
+Различие между ролями задаётся комбинацией:
+
+- system prompt;
+- temperature;
+- max_output_tokens;
+- response_format;
+- structured_output;
+- memory scope;
+- tool permissions;
+- context builder mode;
+- inference endpoint.
+
+Пример:
+
+```text
+thinker — свободное рассуждение, temperature 0.4
+critic  — проверка и рефлексия, temperature 0.1
+coder   — code-oriented prompt, temperature 0.2
+action  — strict JSON schema, temperature 0.0
+summary — сжатие контекста, temperature 0.1
+```
+
+Код не должен предполагать, что разные роли используют разные модели.
+
+Правильно:
+
+```python
+await model_client.chat(role="thinker", ...)
+await model_client.chat(role="critic", ...)
+await model_client.chat(role="coder", ...)
+await model_client.chat(role="action", response_format=...)
+```
+
+`ModelClient` по конфигу решает:
+
+```text
+какой base_url использовать
+какое имя модели передать
+какую температуру поставить
+какой system prompt применить
+какой max_output_tokens поставить
+нужен ли response_format
+```
+
+---
+
+# 2. Параметры модели
+
+## 2.1. Request-level параметры
+
+Эти параметры можно менять на каждый запрос без перезапуска модели:
+
+- system prompt;
+- messages;
+- temperature;
+- top_p;
+- top_k;
+- min_p;
+- max_output_tokens;
+- stop;
+- response_format;
+- JSON schema;
+- tool definitions.
+
+Одна загруженная модель в одном `llama-server` может обслуживать разные роли с разными prompt, temperature и output limits.
+
+---
+
+## 2.2. Backend-level параметры
+
+Эти параметры обычно требуют отдельного запуска сервера:
+
+- путь к GGUF-модели;
+- ctx-size;
+- GPU layers / offload;
+- flash-attn;
+- KV cache configuration;
+- speculative decoding / MTP;
+- server port / host;
+- parallel slots;
+- chat template startup config;
+- quant/offload mode.
+
+Пример:
+
+```text
+8081 local-main обычный
+8085 local-main-mtp экспериментальный
+```
+
+MTP/speculative decoding не включать по умолчанию для `action` JSON endpoint.
+
+---
+
+# 3. Token budget и context budget
+
+Нужно явно разделять:
+
+```text
+ctx_size
+  общий размер контекстного окна модели
+
+max_output_tokens
+  сколько модель может сгенерировать за один вызов
+
+max_input_tokens
+  сколько токенов можно собрать во входной prompt
+
+recent_events_tokens
+  сколько истории событий можно включить
+
+memory_tokens
+  сколько памяти можно включить
+
+skill_tokens
+  сколько текста skill/procedure/examples можно включить
+```
+
+Пример `.env.example`:
+
+```env
+DUCK_CTX_SIZE=65536
+DUCK_MAX_INPUT_TOKENS=49152
+DUCK_MAX_RECENT_EVENTS_TOKENS=12000
+DUCK_MAX_MEMORY_TOKENS=8000
+DUCK_MAX_SKILL_TOKENS=6000
+```
+
+Рекомендуемые output limits:
+
+```text
+thinker: 8192
+critic: 4096
+coder: 16384
+action: 2048
+recall: 2048
+summary: 4096
+```
+
+`action` может иметь небольшой output limit, потому что action directive должен быть коротким.
+
+`thinker` и `coder` должны иметь более крупный output limit.
+
+---
+
+# 4. ContextBuilder
+
+`ContextBuilder` не должен бездумно добавлять всю историю общения в каждый запрос.
+
+Контекст должен собираться из:
+
+- текущего user message;
+- active task state;
+- selected skill;
+- compact task summary;
+- recent relevant events;
+- relevant tool observations;
+- retrieved memory;
+- system prompt текущей роли.
+
+Если контекст превышает budget:
+
+1. сохранить текущий user message;
+2. сохранить active task state;
+3. сохранить selected skill summary;
+4. сохранить последние важные observations;
+5. суммаризировать старые events;
+6. обрезать низкорелевантную memory;
+7. не превышать context window молча.
+
+---
+
+# 5. Целевая архитектура
+
+```text
+┌─────────────────────────────────────────────┐
+│                 WebChat                     │
+│        интерфейс человека к DuckLM           │
+└─────────────────────┬───────────────────────┘
+                      │
+                      ▼
+┌─────────────────────────────────────────────┐
+│                 FastAPI                     │
+│      интерфейс кодера, тестов и агентов      │
+└─────────────────────┬───────────────────────┘
+                      │
+                      ▼
+┌─────────────────────────────────────────────┐
+│                Duck Core                    │
+│                                             │
+│  RuntimeLoop                                │
+│  TaskState                                  │
+│  ContextBuilder                             │
+│  ModelClient                                │
+│  SkillRegistry                              │
+│  ToolGateway                                │
+│  ApprovalService                            │
+│  Reflection                                 │
+│  MemoryPolicy                               │
+│  ExperienceRecorder                         │
+└───────────────┬───────────────┬─────────────┘
+                │               │
+                ▼               ▼
+┌───────────────────────┐   ┌────────────────────────┐
+│     llama-server      │   │ SQLite/PostgreSQL       │
+│ OpenAI-compatible API │   │ events/tasks/approvals  │
+└───────────────────────┘   └────────────────────────┘
+                │
+                ▼
+┌───────────────────────┐
+│ Qdrant / Vector Store │
+│ semantic memory       │
+└───────────────────────┘
+```
+
+---
+
+# 6. Структура проекта
+
+Создать структуру:
+
+```text
+ducklm/
+  duck_core/
+    __init__.py
+    api.py
+    config.py
+    model_client.py
+    runtime_loop.py
+    context_builder.py
+
+    events/
+      __init__.py
+      store.py
+
+    tasks/
+      __init__.py
+      store.py
+      state.py
+
+    tools/
+      __init__.py
+      base.py
+      gateway.py
+      file_read.py
+      file_write.py
+      shell_exec_safe.py
+
+    approvals/
+      __init__.py
+      service.py
+
+    skills/
+      __init__.py
+      registry.py
+
+    experience/
+      __init__.py
+      recorder.py
+
+    memory/
+      __init__.py
+      vector_memory.py
+      policy.py
+
+    schemas/
+      action_directive.schema.json
+
+    web/
+      templates/
+        index.html
+        task.html
+        approvals.html
+        skills.html
+        memory.html
+        experience.html
+      static/
+        app.js
+        style.css
+
+  prompts/
+    roles/
+      thinker.md
+      action.md
+      critic.md
+      coder.md
+      summary.md
+
+  skills/
+    analyze_project/
+      skill.yaml
+      procedure.md
+      examples.md
+      notes.md
+
+  config/
+    models.yaml
+
+  scripts/
+    llama/
+      start_main.sh
+      start_thinker_mtp_experimental.sh
+      healthcheck.sh
+
+    verify/
+      verify_basic_chat.sh
+      verify_file_write_read.sh
+      verify_tool_blocking.sh
+      verify_models_roles.sh
+      verify_skills.sh
+      verify_experience.sh
+      verify_memory.sh
+
+    bench/
+      bench_runtime.py
+
+  tests/
+    smoke/
+
+  docs/
+
+  data/
+  workspace/
+
+  .env.example
+  docker-compose.memory.yml
+  Makefile
+  pyproject.toml
+  README.md
+```
+
+---
+
+# 7. Этап 1 — базовый проект и конфигурация
+
+## 7.1. Цель
+
+Создать запускаемый skeleton проекта с конфигурацией, зависимостями, `.env.example`, `config/models.yaml`, базовым FastAPI и пустой WebChat-страницей.
+
+---
+
+## 7.2. pyproject.toml
+
+Минимальные зависимости:
+
+```toml
+[project]
+name = "ducklm"
+version = "0.1.0"
+description = "Local agent runtime with WebChat, API, tools, memory and experience"
+requires-python = ">=3.11"
+
+dependencies = [
+  "fastapi",
+  "uvicorn",
+  "httpx",
+  "pydantic",
+  "pyyaml",
+  "jinja2",
+  "python-dotenv",
+  "jsonschema",
+  "aiosqlite",
+  "qdrant-client"
+]
+
+[project.optional-dependencies]
+dev = [
+  "pytest",
+  "pytest-asyncio",
+  "ruff"
+]
+```
+
+---
+
+## 7.3. .env.example
+
+Создать:
+
+```env
+DUCK_LLAMA_SERVER_BIN=/usr/local/bin/llama-server
+DUCK_MAIN_MODEL_PATH=/models/main.gguf
+
+DUCK_MAIN_PORT=8081
+DUCK_CTX_SIZE=65536
+DUCK_N_GPU_LAYERS=99
+DUCK_HOST=127.0.0.1
+
+DUCK_API_HOST=127.0.0.1
+DUCK_API_PORT=8000
+
+DUCK_WORKSPACE=./workspace
+DUCK_DB_PATH=./data/duck.sqlite3
+
+DUCK_MAX_INPUT_TOKENS=49152
+DUCK_MAX_RECENT_EVENTS_TOKENS=12000
+DUCK_MAX_MEMORY_TOKENS=8000
+DUCK_MAX_SKILL_TOKENS=6000
+
+QDRANT_URL=http://127.0.0.1:6333
+
+DUCK_SKIP_LIVE_LLM_TESTS=0
+```
+
+По умолчанию API и `llama-server` должны слушать только `127.0.0.1`.
+
+Если пользователь явно указывает `0.0.0.0`, в логах должно быть предупреждение:
+
+```text
+WARNING: DuckLM API is listening on 0.0.0.0. This may expose local tool execution endpoints.
+```
+
+---
+
+## 7.4. config/models.yaml
+
+Создать:
+
+```yaml
+default_provider: llama_server
+
+models:
+  thinker:
+    provider: llama_server
+    base_url: http://127.0.0.1:8081/v1
+    model: local-main
+    purpose: free_cognition
+    structured_output: false
+    temperature: 0.4
+    max_output_tokens: 8192
+    system_prompt: prompts/roles/thinker.md
+
+  critic:
+    provider: llama_server
+    base_url: http://127.0.0.1:8081/v1
+    model: local-main
+    purpose: reflection
+    structured_output: false
+    temperature: 0.1
+    max_output_tokens: 4096
+    system_prompt: prompts/roles/critic.md
+
+  coder:
+    provider: llama_server
+    base_url: http://127.0.0.1:8081/v1
+    model: local-main
+    purpose: code_generation
+    structured_output: false
+    temperature: 0.2
+    max_output_tokens: 16384
+    system_prompt: prompts/roles/coder.md
+
+  action:
+    provider: llama_server
+    base_url: http://127.0.0.1:8081/v1
+    model: local-main
+    purpose: action_directive
+    structured_output: true
+    temperature: 0.0
+    max_output_tokens: 2048
+    system_prompt: prompts/roles/action.md
+    response_schema: duck_core/schemas/action_directive.schema.json
+
+  summary:
+    provider: llama_server
+    base_url: http://127.0.0.1:8081/v1
+    model: local-main
+    purpose: context_summary
+    structured_output: false
+    temperature: 0.1
+    max_output_tokens: 4096
+    system_prompt: prompts/roles/summary.md
+```
+
+---
+
+# 8. Этап 2 — llama-server integration и ModelClient
+
+## 8.1. Скрипт запуска llama-server
+
+Создать:
+
+```text
+scripts/llama/start_main.sh
+```
+
+```bash
+#!/usr/bin/env bash
+set -euo pipefail
+
+: "${DUCK_MAIN_MODEL_PATH:?DUCK_MAIN_MODEL_PATH is required}"
+
+"${DUCK_LLAMA_SERVER_BIN:-llama-server}" \
+  -m "${DUCK_MAIN_MODEL_PATH}" \
+  --alias local-main \
+  --host "${DUCK_HOST:-127.0.0.1}" \
+  --port "${DUCK_MAIN_PORT:-8081}" \
+  -c "${DUCK_CTX_SIZE:-65536}" \
+  -ngl "${DUCK_N_GPU_LAYERS:-99}" \
+  --flash-attn on \
+  --cache-prompt \
+  --metrics
+```
+
+Создать:
+
+```text
+scripts/llama/healthcheck.sh
+```
+
+```bash
+#!/usr/bin/env bash
+set -euo pipefail
+
+BASE_URL="${1:-http://127.0.0.1:8081/v1}"
+
+curl -fsS "${BASE_URL}/models" >/dev/null
+
+echo "OK: ${BASE_URL}"
+```
+
+---
+
+## 8.2. ModelClient
+
+Создать:
+
+```text
+duck_core/model_client.py
+```
+
+Требования:
+
+1. Читать `config/models.yaml`.
+2. Вызывать модель по логической роли.
+3. Работать через OpenAI-compatible API.
+4. Поддерживать role-specific `system_prompt`.
+5. Поддерживать role-specific `temperature`.
+6. Поддерживать role-specific `max_output_tokens`.
+7. Поддерживать `response_format`.
+8. Логировать latency.
+9. Логировать usage tokens, если backend их возвращает.
+10. Корректно обрабатывать ошибки соединения.
+11. Не требовать уникальности моделей для ролей.
+
+Интерфейс:
+
+```python
+from dataclasses import dataclass
+from typing import Any
+
+
+@dataclass
+class ModelResponse:
+    role: str
+    model: str
+    content: str
+    raw: dict[str, Any]
+    latency_ms: float
+    prompt_tokens: int | None = None
+    completion_tokens: int | None = None
+    total_tokens: int | None = None
+
+
+class ModelClient:
+    def __init__(self, config_path: str = "config/models.yaml"):
+        ...
+
+    async def chat(
+        self,
+        role: str,
+        messages: list[dict[str, str]],
+        temperature: float | None = None,
+        max_output_tokens: int | None = None,
+        response_format: dict | None = None,
+    ) -> ModelResponse:
+        ...
+```
+
+---
+
+# 9. Этап 3 — Web/API runtime loop
+
+## 9.1. Цель
+
+Сделать минимальный живой вертикальный срез:
+
+```text
+человек пишет в WebChat
+↓
+FastAPI создаёт task
+↓
+Duck Core вызывает llama-server
+↓
+ответ пишется в SQLite event log
+↓
+WebChat показывает ответ и event timeline
+```
+
+На этом этапе не делать:
+
+- tools;
+- approvals;
+- skills;
+- experience;
+- Qdrant;
+- MTP.
+
+---
+
+## 9.2. SQLite schema
+
+Создать EventStore и TaskStore.
+
+Минимальные таблицы:
+
+```sql
+create table if not exists tasks (
+  task_id text primary key,
+  status text not null,
+  user_message text not null,
+  workspace text,
+  debug integer not null default 0,
+  final_response text,
+  created_at text not null,
+  updated_at text not null
+);
+
+create table if not exists events (
+  id integer primary key autoincrement,
+  task_id text not null,
+  sequence integer not null,
+  event_type text not null,
+  payload_json text not null,
+  created_at text not null
+);
+
+create unique index if not exists idx_events_task_sequence
+on events(task_id, sequence);
+```
+
+Минимальные статусы задач:
+
+```text
+running
+completed
+failed
+cancelled
+```
+
+Минимальные события:
+
+```text
+task_created
+model_call_started
+cognition_response
+model_call_finished
+task_completed
+task_failed
+```
+
+---
+
+## 9.3. RuntimeLoop
+
+Создать:
+
+```text
+duck_core/runtime_loop.py
+```
+
+Минимальный цикл:
+
+```text
+POST /v2/chat
+↓
+create task
+↓
+write task_created
+↓
+build basic context
+↓
+call thinker
+↓
+write cognition_response
+↓
+save final_response
+↓
+write task_completed
+↓
+return response
+```
+
+---
+
+## 9.4. FastAPI endpoints
+
+Создать:
+
+```text
+duck_core/api.py
+```
+
+Минимальные endpoints:
+
+```text
+GET  /health
+GET  /v1/status
+
+GET  /v1/models/roles
+GET  /v1/models/ping
+
+POST /v1/chat
+
+POST /v1/tasks
+GET  /v1/tasks
+GET  /v1/tasks/{task_id}
+GET  /v1/tasks/{task_id}/events
+GET  /v1/tasks/{task_id}/stream
+```
+
+`POST /v1/chat` — основной человекоподобный вход.
+
+Пример запроса:
+
+```json
+{
+  "message": "Скажи коротко, что ты DuckLM",
+  "workspace": "./workspace",
+  "debug": true
+}
+```
+
+Пример ответа:
+
+```json
+{
+  "task_id": "task_20260519_001",
+  "status": "completed",
+  "final_response": "Я DuckLM, локальная агентная система с Web/API-интерфейсом."
+}
+```
+
+---
+
+## 9.5. WebChat
+
+Сделать минимальный WebChat.
+
+Допустимо:
+
+- FastAPI templates;
+- static HTML;
+- простой JS через `fetch`;
+- SSE для event timeline.
+
+Главная страница `/` должна содержать:
+
+- поле сообщения;
+- поле workspace;
+- checkbox debug;
+- кнопку Run;
+- блок final response;
+- блок event timeline.
+
+---
+
+## 9.6. Проверка этапа
+
+Запуск:
+
+```bash
+cp .env.example .env
+# прописать DUCK_MAIN_MODEL_PATH
+
+bash scripts/llama/start_main.sh
+```
+
+Во втором терминале:
+
+```bash
+python -m duck_core.api
+```
+
+Проверка:
+
+```bash
+curl http://127.0.0.1:8000/health
+curl http://127.0.0.1:8000/v1/models/roles
+curl http://127.0.0.1:8000/v1/models/ping
+```
+
+Запуск задачи:
+
+```bash
+curl -X POST http://127.0.0.1:8000/v1/chat \
+  -H "Content-Type: application/json" \
+  -d '{
+    "message": "Скажи коротко, что ты DuckLM",
+    "workspace": "./workspace",
+    "debug": true
+  }'
+```
+
+Проверить events:
+
+```bash
+curl http://127.0.0.1:8000/v1/tasks/<task_id>/events
+```
+
+Ожидаемые события:
+
+```text
+task_created
+model_call_started
+cognition_response
+model_call_finished
+task_completed
+```
+
+---
+
+# 10. Этап 4 — cognition/action split
+
+## 10.1. Цель
+
+Разделить свободное мышление и машинное намерение.
+
+```text
+cognition_response
+  свободный текст, понимание задачи, план, риски
+
+action_directive
+  строгий JSON для ToolGateway
+```
+
+Модель не должна думать в JSON.
+
+JSON используется только как форма внешнего действия.
+
+---
+
+## 10.2. Action directive schema
+
+Создать:
+
+```text
+duck_core/schemas/action_directive.schema.json
+```
+
+```json
+{
+  "type": "object",
+  "required": ["kind", "intent", "risk_level", "actions"],
+  "additionalProperties": false,
+  "properties": {
+    "kind": {
+      "type": "string",
+      "enum": ["action_directive"]
+    },
+    "intent": {
+      "type": "string",
+      "minLength": 1
+    },
+    "risk_level": {
+      "type": "string",
+      "enum": ["none", "low", "medium", "high", "critical"]
+    },
+    "actions": {
+      "type": "array",
+      "minItems": 0,
+      "items": {
+        "type": "object",
+        "required": ["tool", "args"],
+        "additionalProperties": false,
+        "properties": {
+          "tool": {
+            "type": "string",
+            "minLength": 1
+          },
+          "args": {
+            "type": "object"
+          },
+          "reason": {
+            "type": "string"
+          }
+        }
+      }
+    },
+    "memory_hints": {
+      "type": "array",
+      "items": {
+        "type": "string"
+      }
+    },
+    "expected_observations": {
+      "type": "array",
+      "items": {
+        "type": "string"
+      }
+    },
+    "stop_reason": {
+      "type": "string"
+    }
+  }
+}
+```
+
+---
+
+## 10.3. Structured output и retry
+
+Правила:
+
+1. `action_directive` генерируется через structured output, если backend это поддерживает.
+2. Если backend не поддерживает JSON schema, явно записать это в event log.
+3. Fallback на plain JSON допускается только если включён в config.
+4. После генерации directive валидируется локально.
+5. Разрешён максимум один retry.
+6. Retry чинит только directive.
+7. Бесконечный JSON repair loop запрещён.
+
+Запрещено:
+
+```python
+while not valid_json:
+    call_model_to_fix_json()
+```
+
+---
+
+# 11. Этап 5 — ToolGateway
+
+## 11.1. Цель
+
+Добавить безопасное выполнение действий через tools.
+
+Модель не запускает инструменты напрямую.
+
+Модель создаёт `action_directive`.
+
+`ToolGateway`:
+
+1. принимает action directive;
+2. проверяет tool;
+3. проверяет risk level;
+4. нормализует действие;
+5. проверяет permissions;
+6. выполняет разрешённое действие;
+7. пишет observation в event log;
+8. возвращает результат в runtime loop.
+
+---
+
+## 11.2. Tool interface
+
+Создать:
+
+```text
+duck_core/tools/base.py
+```
+
+```python
+from typing import Protocol, Any
+from pydantic import BaseModel, Field
+
+
+class ToolResult(BaseModel):
+    ok: bool
+    output: str | None = None
+    error: str | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+class Tool(Protocol):
+    name: str
+    risk_level: str
+
+    async def run(self, args: dict[str, Any]) -> ToolResult:
+        ...
+```
+
+---
+
+## 11.3. Минимальные tools
+
+Создать:
+
+```text
+duck_core/tools/file_read.py
+duck_core/tools/file_write.py
+duck_core/tools/shell_exec_safe.py
+```
+
+### file_read
+
+Требования:
+
+- читать только внутри workspace;
+- запретить path traversal;
+- запретить чтение `/etc/shadow`;
+- запретить чтение `~/.ssh` без explicit approval;
+- запретить чтение `.env` без explicit approval;
+- ограничить максимальный размер файла.
+
+### file_write
+
+Требования:
+
+- писать только внутри workspace;
+- запретить path traversal;
+- не перезаписывать существующий файл без backup или approval;
+- создавать каталоги только внутри workspace;
+- возвращать metadata: path, bytes_written, created/updated.
+
+### shell_exec_safe
+
+Allowlist:
+
+```text
+pwd
+ls
+cat
+head
+tail
+grep
+find
+python -m pytest
+pytest
+git status
+git diff
+git log
+```
+
+Blocklist:
+
+```text
+rm
+sudo
+su
+dd
+mkfs
+mount
+umount
+chmod -R
+chown -R
+curl | sh
+wget | sh
+shutdown
+reboot
+poweroff
+systemctl
+service
+apt install
+apt remove
+pacman -S
+pacman -R
+pip install
+npm install -g
+```
+
+Команды вне allowlist требуют approval.
+
+---
+
+# 12. Этап 6 — approvals и resume
+
+## 12.1. Цель
+
+Добавить подтверждение рискованных действий и продолжение задачи после решения пользователя.
+
+---
+
+## 12.2. Таблица approvals
+
+```sql
+create table if not exists approvals (
+  id integer primary key autoincrement,
+  approval_id text not null unique,
+  task_id text not null,
+  action_hash text not null,
+  normalized_action_json text not null,
+  status text not null,
+  decision text,
+  created_at text not null,
+  updated_at text not null
+);
+```
+
+Статусы задачи:
+
+```text
+running
+waiting_for_approval
+completed
+failed
+cancelled
+```
+
+Если действие требует approval:
+
+1. создать pending approval;
+2. перевести task в `waiting_for_approval`;
+3. показать approval в Web UI;
+4. позволить approve/deny через API;
+5. после allow_once/allow_forever продолжить задачу через `/continue`.
+
+---
+
+## 12.3. Approval API
+
+Добавить:
+
+```text
+GET  /v1/approvals/pending
+POST /v1/approvals/{approval_id}/allow_once
+POST /v1/approvals/{approval_id}/allow_forever
+POST /v1/approvals/{approval_id}/deny
+POST /v1/tasks/{task_id}/continue
+POST /v1/tasks/{task_id}/cancel
+```
+
+Инвариант:
+
+```text
+Allow forever = только exact normalized action hash.
+```
+
+Это не широкое разрешение на похожие действия.
+
+---
+
+## 12.4. Approval UI
+
+Web UI должен показывать pending approval:
+
+```text
+DuckLM хочет выполнить действие:
+
+tool: shell_exec_safe
+command: pytest tests/smoke -v
+risk: low
+reason: Need to run tests
+
+[Allow once]
+[Allow forever for exact action]
+[Deny]
+```
+
+---
+
+# 13. Этап 7 — Skills
+
+## 13.1. Цель
+
+Добавить процедурную память.
+
+Skill — это не if/else-автомат.
+
+Skill — это описание способа решения типа задач:
+
+- какие tools нужны;
+- какие риски есть;
+- какие шаги обычно полезны;
+- какие критерии успеха;
+- какие ошибки уже известны;
+- какие примеры есть.
+
+---
+
+## 13.2. Структура skill
+
+Создать:
+
+```text
+skills/analyze_project/
+  skill.yaml
+  procedure.md
+  examples.md
+  notes.md
+```
+
+Пример `skill.yaml`:
+
+```yaml
+id: analyze_project
+title: Analyze project structure
+description: Inspect repository structure and summarize architecture.
+version: 1
+
+tags:
+  - code
+  - repository
+  - analysis
+
+required_tools:
+  - file_read
+  - shell_exec_safe
+
+risk_level: low
+
+inputs:
+  - workspace_path
+
+outputs:
+  - architecture_summary
+  - risks
+  - suggested_next_steps
+
+success_criteria:
+  - repository structure inspected
+  - major modules identified
+  - no destructive commands executed
+  - summary is grounded in actual files
+```
+
+---
+
+## 13.3. SkillRegistry
+
+Создать:
+
+```text
+duck_core/skills/registry.py
+```
+
+Интерфейс:
+
+```python
+class SkillRegistry:
+    def load_skills(self) -> list[Skill]:
+        ...
+
+    def get_skill(self, skill_id: str) -> Skill | None:
+        ...
+
+    async def find_candidate_skills(
+        self,
+        user_request: str,
+        limit: int = 3,
+    ) -> list[SkillCandidate]:
+        ...
+```
+
+На первом этапе допустимо:
+
+- keyword prefilter по title/tags/description;
+- LLM selection через thinker/action.
+
+Не делать огромный if/else-router.
+
+---
+
+## 13.4. Skills API
+
+Добавить:
+
+```text
+GET /v1/skills
+GET /v1/skills/{skill_id}
+```
+
+Web UI:
+
+```text
+/skills
+```
+
+---
+
+# 14. Этап 8 — Experience и Reflection
+
+## 14.1. Цель
+
+Добавить самоулучшение через опыт.
+
+Не через автоматическое изменение кода.
+
+А через:
+
+```text
+task
+↓
+reflection
+↓
+experience record
+↓
+skill update proposal
+↓
+human approval later
+```
+
+---
+
+## 14.2. Reflection
+
+Создать:
+
+```text
+duck_core/reflection.py
+```
+
+Reflection должна отвечать:
+
+1. Что пытались сделать?
+2. Получилось ли?
+3. Что сработало?
+4. Что не сработало?
+5. Были ли лишние model calls?
+6. Были ли лишние tool calls?
+7. Застревала ли система?
+8. Была ли проблема с JSON/action directive?
+9. Нужно ли что-то запомнить?
+10. Нужно ли предложить изменение skill?
+
+Reflection использует роль `critic`.
+
+`critic` может быть той же физической моделью, что и `thinker`.
+
+---
+
+## 14.3. ExperienceRecord
+
+Добавить таблицу:
+
+```sql
+create table if not exists experience_records (
+  id integer primary key autoincrement,
+  task_id text not null,
+  skill_id text,
+  summary text not null,
+  result text not null,
+  what_worked_json text,
+  what_failed_json text,
+  reusable_lesson text,
+  suggested_skill_patch text,
+  confidence real,
+  created_at text not null
+);
+```
+
+Формат:
+
+```json
+{
+  "task_id": "...",
+  "skill_id": "optional",
+  "summary": "What was attempted",
+  "result": "success/failure/partial",
+  "what_worked": ["..."],
+  "what_failed": ["..."],
+  "reusable_lesson": "...",
+  "suggested_skill_patch": "optional",
+  "confidence": 0.7
+}
+```
+
+---
+
+## 14.4. Skill update proposals
+
+Если reflection считает, что skill надо улучшить, создать файл:
+
+```text
+skills/_proposals/<timestamp>_<skill_id>.patch.md
+```
+
+Формат:
+
+```markdown
+# Skill update proposal
+
+Skill: analyze_project
+
+## Reason
+
+...
+
+## Proposed changes
+
+...
+
+## Evidence
+
+Task id: ...
+
+## Risk
+
+Low / medium / high.
+
+## Requires human approval
+
+Yes.
+```
+
+Запрещено автоматически применять skill patch без approval.
+
+---
+
+## 14.5. Experience API
+
+Добавить:
+
+```text
+GET /v1/experience
+GET /v1/experience/{id}
+```
+
+Web UI:
+
+```text
+/experience
+```
+
+---
+
+# 15. Этап 9 — Semantic memory
+
+## 15.1. Цель
+
+Добавить semantic memory через готовый vector store.
+
+---
+
+## 15.2. Qdrant compose
+
+Создать:
+
+```text
+docker-compose.memory.yml
+```
+
+```yaml
+services:
+  qdrant:
+    image: qdrant/qdrant:latest
+    ports:
+      - "6333:6333"
+      - "6334:6334"
+    volumes:
+      - qdrant_storage:/qdrant/storage
+
+volumes:
+  qdrant_storage:
+```
+
+---
+
+## 15.3. VectorMemory adapter
+
+Создать:
+
+```text
+duck_core/memory/vector_memory.py
+```
+
+Интерфейс:
+
+```python
+from typing import Any
+
+
+class VectorMemory:
+    async def add_memory(
+        self,
+        text: str,
+        metadata: dict[str, Any] | None = None,
+    ) -> str:
+        ...
+
+    async def search_memory(
+        self,
+        query: str,
+        limit: int = 5,
+    ) -> list[dict[str, Any]]:
+        ...
+```
+
+Embeddings:
+
+1. Если `llama-server /v1/embeddings` доступен — использовать его.
+2. Если embeddings пока недоступны — сделать явный adapter stub и xfail-test.
+3. Не писать самодельный embedding algorithm.
+
+---
+
+## 15.4. MemoryPolicy
+
+Создать:
+
+```text
+duck_core/memory/policy.py
+```
+
+Типы памяти:
+
+```text
+event
+semantic_fact
+preference
+procedure
+experience
+skill_update_candidate
+```
+
+Пример результата:
+
+```json
+{
+  "should_store": true,
+  "memory_type": "experience",
+  "summary": "The action directive schema failed because reasoning and JSON were mixed.",
+  "importance": 0.8,
+  "metadata": {
+    "task_id": "...",
+    "source": "reflection"
+  }
+}
+```
+
+Допустима LLM-классификация через `action` role со structured JSON.
+
+Не делать жёстких эвристик вида:
+
+```python
+if "remember" in text:
+    ...
+```
+
+---
+
+## 15.5. Memory API
+
+Добавить:
+
+```text
+GET /v1/memory/search?q=...
+```
+
+Web UI:
+
+```text
+/memory
+```
+
+---
+
+# 16. Этап 10 — Performance и MTP experiments
+
+## 16.1. Цель
+
+Добавить экспериментальные режимы ускорения inference.
+
+MTP/speculative decoding — уровень inference backend, а не Duck Core.
+
+---
+
+## 16.2. MTP script
+
+Создать:
+
+```text
+scripts/llama/start_thinker_mtp_experimental.sh
+```
+
+```bash
+#!/usr/bin/env bash
+set -euo pipefail
+
+: "${DUCK_MAIN_MODEL_PATH:?DUCK_MAIN_MODEL_PATH is required}"
+
+LLAMA_BIN="${DUCK_LLAMA_SERVER_BIN:-llama-server}"
+
+if ! "${LLAMA_BIN}" --help | grep -qi "spec"; then
+  echo "This llama-server build does not expose speculative/MTP flags."
+  exit 1
+fi
+
+"${LLAMA_BIN}" \
+  -m "${DUCK_MAIN_MODEL_PATH}" \
+  --alias local-main-mtp \
+  --host "${DUCK_HOST:-127.0.0.1}" \
+  --port "${DUCK_MAIN_MTP_PORT:-8085}" \
+  -c "${DUCK_CTX_SIZE:-65536}" \
+  -ngl "${DUCK_N_GPU_LAYERS:-99}" \
+  --flash-attn on \
+  --cache-prompt \
+  --metrics \
+  ${DUCK_MTP_FLAGS:-}
+```
+
+MTP не включать по умолчанию для action JSON endpoint.
+
+---
+
+## 16.3. Benchmark
+
+Создать:
+
+```text
+scripts/bench/bench_runtime.py
+```
+
+Метрики:
+
+- total runtime seconds;
+- LLM calls count;
+- latency per LLM call;
+- prompt tokens;
+- completion tokens;
+- total tokens;
+- tool calls count;
+- JSON directive validity;
+- retry count;
+- memory writes count;
+- experience record created yes/no;
+- selected skill;
+- model role mapping.
+
+Тестовые задачи:
+
+```text
+1. "Скажи коротко, что ты DuckLM."
+2. "Создай tmp/duck_test_note.md с текстом hello duck и прочитай его обратно."
+3. "Посмотри структуру проекта и кратко опиши модули."
+4. "Найди TODO/FIXME в проекте."
+5. "Запусти тесты и кратко объясни результат."
+```
+
+Бенчмарк должен выводить:
+
+```text
+role -> base_url/model
+```
+
+---
+
+# 17. Verification scripts
+
+Создать:
+
+```text
+scripts/verify/
+  verify_basic_chat.sh
+  verify_file_write_read.sh
+  verify_tool_blocking.sh
+  verify_models_roles.sh
+  verify_skills.sh
+  verify_experience.sh
+  verify_memory.sh
+```
+
+Скрипты должны использовать HTTP API, а не CLI.
+
+Пример `verify_basic_chat.sh`:
+
+```bash
+#!/usr/bin/env bash
+set -euo pipefail
+
+BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}"
+
+curl -fsS "${BASE_URL}/health"
+
+curl -fsS -X POST "${BASE_URL}/v1/chat" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "message": "Скажи коротко, что ты DuckLM",
+    "debug": true
+  }'
+```
+
+Пример `verify_file_write_read.sh`:
+
+```bash
+#!/usr/bin/env bash
+set -euo pipefail
+
+BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}"
+
+RESPONSE="$(curl -fsS -X POST "${BASE_URL}/v1/chat" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "message": "Создай tmp/duck_test_note.md с текстом hello duck и прочитай его обратно",
+    "workspace": "./workspace",
+    "debug": true
+  }')"
+
+echo "${RESPONSE}"
+```
+
+---
+
+# 18. Makefile
+
+Создать:
+
+```makefile
+duck-up:
+	docker compose -f docker-compose.memory.yml up -d
+	@echo "Memory services started."
+	@echo "Start llama-server:"
+	@echo "bash scripts/llama/start_main.sh"
+
+duck-llama-main:
+	bash scripts/llama/start_main.sh
+
+duck-llama-health:
+	bash scripts/llama/healthcheck.sh http://127.0.0.1:8081/v1
+
+duck-api:
+	python -m duck_core.api
+
+duck-dev:
+	docker compose -f docker-compose.memory.yml up -d
+	@echo "Start llama-server in another terminal:"
+	@echo "bash scripts/llama/start_main.sh"
+	@echo "Then run:"
+	@echo "make duck-api"
+	@echo "Open:"
+	@echo "http://127.0.0.1:8000/"
+
+duck-open:
+	@echo "Open web UI:"
+	@echo "http://127.0.0.1:8000/"
+
+duck-smoke:
+	python -m pytest tests/smoke -v
+
+duck-test:
+	python -m pytest -v
+
+duck-verify:
+	bash scripts/verify/verify_basic_chat.sh
+	bash scripts/verify/verify_file_write_read.sh
+	bash scripts/verify/verify_tool_blocking.sh
+	bash scripts/verify/verify_models_roles.sh
+```
+
+---
+
+# 19. Smoke tests
+
+Создать:
+
+```text
+tests/smoke/test_models_config.py
+tests/smoke/test_model_client.py
+tests/smoke/test_llama_server_connection.py
+tests/smoke/test_api_health.py
+tests/smoke/test_chat_api.py
+tests/smoke/test_event_log.py
+tests/smoke/test_action_directive_schema.py
+tests/smoke/test_tool_gateway.py
+tests/smoke/test_approvals.py
+tests/smoke/test_skill_registry.py
+tests/smoke/test_experience_recorder.py
+tests/smoke/test_vector_memory.py
+```
+
+Live LLM tests должны пропускаться, если:
+
+```text
+DUCK_SKIP_LIVE_LLM_TESTS=1
+```
+
+---
+
+# 20. Документация
+
+Создать:
+
+```text
+docs/architecture.md
+docs/how_to_run.md
+docs/how_to_test.md
+docs/local_llama_server.md
+docs/model_roles.md
+docs/web_api.md
+docs/tool_gateway.md
+docs/skills.md
+docs/experience_learning.md
+docs/memory_architecture.md
+docs/performance_mtp.md
+```
+
+## docs/how_to_run.md
+
+Описать:
+
+1. как установить зависимости;
+2. как указать путь к GGUF-модели;
+3. как запустить `llama-server`;
+4. как запустить DuckLM API;
+5. как открыть WebChat;
+6. как отправить первую задачу;
+7. как смотреть task events;
+8. как смотреть approvals;
+9. как остановить сервисы.
+
+## docs/model_roles.md
+
+Описать:
+
+1. роль модели — логическая роль;
+2. thinker/critic/coder/action могут использовать одну модель;
+3. разные роли могут отличаться prompt/temperature/schema/context;
+4. как настроить одну модель на все роли;
+5. как настроить разные модели на разные роли;
+6. какие параметры request-level;
+7. какие параметры backend-level.
+
+## docs/web_api.md
+
+Описать endpoints:
+
+```text
+GET  /health
+GET  /v1/status
+GET  /v1/models/roles
+GET  /v1/models/ping
+POST /v1/chat
+POST /v1/tasks
+GET  /v1/tasks
+GET  /v1/tasks/{task_id}
+GET  /v1/tasks/{task_id}/events
+GET  /v1/tasks/{task_id}/stream
+GET  /v1/approvals/pending
+POST /v1/approvals/{approval_id}/allow_once
+POST /v1/approvals/{approval_id}/allow_forever
+POST /v1/approvals/{approval_id}/deny
+GET  /v1/skills
+GET  /v1/skills/{skill_id}
+GET  /v1/experience
+GET  /v1/experience/{id}
+GET  /v1/memory/search?q=...
+```
+
+---
+
+# 21. Критерии готовности по этапам
+
+## Этап 1 готов, если:
+
+- создана структура проекта;
+- есть `pyproject.toml`;
+- есть `.env.example`;
+- есть `config/models.yaml`;
+- есть базовый FastAPI;
+- есть пустая WebChat-страница;
+- проект запускается без синтаксических ошибок.
+
+## Этап 2 готов, если:
+
+- `llama-server` запускается через `scripts/llama/start_main.sh`;
+- `/v1/models` отвечает;
+- `ModelClient` читает `config/models.yaml`;
+- одна модель может быть назначена на все роли;
+- `GET /v1/models/roles` показывает роли;
+- `GET /v1/models/ping` проверяет доступность backend-а.
+
+## Этап 3 готов, если:
+
+- `POST /v1/chat` работает;
+- WebChat позволяет отправить сообщение;
+- task создаётся;
+- events пишутся в SQLite;
+- task timeline отображается в WebChat;
+- final response отображается в WebChat.
+
+## Этап 4 готов, если:
+
+- `cognition_response` отделён от `action_directive`;
+- action directive schema создана;
+- action directive валидируется;
+- бесконечного JSON repair loop нет;
+- разрешён максимум один retry.
+
+## Этап 5 готов, если:
+
+- ToolGateway существует;
+- file_read работает внутри workspace;
+- file_write работает внутри workspace;
+- shell_exec_safe работает для allowlist;
+- опасные команды блокируются;
+- tool observations пишутся в event log.
+
+## Этап 6 готов, если:
+
+- approvals table создана;
+- waiting_for_approval status работает;
+- pending approvals видны в Web UI;
+- allow_once работает;
+- allow_forever работает только для exact normalized action hash;
+- deny работает;
+- `/continue` продолжает задачу после approval.
+
+## Этап 7 готов, если:
+
+- каталог `skills/` существует;
+- SkillRegistry грузит skills;
+- Runtime выбирает candidate skill;
+- Skills API работает;
+- Web UI показывает skills.
+
+## Этап 8 готов, если:
+
+- Reflection работает через critic role;
+- ExperienceRecord создаётся после задачи;
+- Experience API работает;
+- Web UI показывает experience records;
+- skill update proposals создаются;
+- proposals не применяются автоматически.
+
+## Этап 9 готов, если:
+
+- Qdrant поднимается через docker-compose;
+- VectorMemory adapter существует;
+- add_memory работает или явно xfail, если embeddings недоступны;
+- search_memory работает или явно xfail;
+- MemoryPolicy существует;
+- Memory API работает;
+- Web UI имеет memory page.
+
+## Этап 10 готов, если:
+
+- MTP experimental script есть;
+- MTP не включён по умолчанию для action JSON endpoint;
+- benchmark script есть;
+- benchmark показывает role → base_url/model;
+- benchmark считает LLM calls, latency, retries, tool calls.
+
+---
+
+# 22. Что запрещено
+
+Запрещено:
+
+1. превращать DuckLM в обычный workflow-runner;
+2. заменять когнитивный цикл набором if/else эвристик;
+3. писать самописный inference server;
+4. писать самописный model scheduler;
+5. писать самописную vector database;
+6. делать бесконечный JSON repair loop;
+7. давать модели прямой shell без ToolGateway;
+8. включать MTP/speculative для action JSON endpoint по умолчанию;
+9. делать self-modifying code без approval;
+10. смешивать cognition_response и action_directive;
+11. считать, что thinker/critic/coder/action — обязательно разные модели;
+12. считать, что каждая роль требует отдельный llama-server;
+13. хардкодить пути к моделям в коде;
+14. делать CLI обязательной частью системы;
+15. делать сложный frontend раньше рабочего Web/API loop.
+
+---
+
+# 23. Финальный отчёт исполнителя
+
+В конце работы по каждому этапу исполнитель должен предоставить:
+
+1. что реализовано;
+2. что не реализовано и почему;
+3. список изменённых файлов;
+4. как запустить `llama-server`;
+5. как запустить DuckLM API;
+6. как открыть WebChat;
+7. как отправить первую задачу через WebChat;
+8. как отправить задачу через curl;
+9. как посмотреть task events;
+10. как проверить одну модель на все роли;
+11. как проверить разные модели на разные роли;
+12. как проверить file_write/file_read;
+13. как проверить блокировку опасной команды;
+14. как проверить approvals;
+15. как запустить smoke tests;
+16. как запустить verification scripts;
+17. какие ограничения остались;
+18. что делать следующим этапом.
+
+Финальные команды запуска должны быть примерно такими:
+
+```bash
+cp .env.example .env
+# прописать DUCK_MAIN_MODEL_PATH
+
+bash scripts/llama/start_main.sh
+```
+
+Во втором терминале:
+
+```bash
+python -m duck_core.api
+```
+
+Проверка:
+
+```bash
+curl http://127.0.0.1:8000/health
+curl http://127.0.0.1:8000/v1/models/roles
+curl http://127.0.0.1:8000/v1/models/ping
+```
+
+Запуск задачи:
+
+```bash
+curl -X POST http://127.0.0.1:8000/v1/chat \
+  -H "Content-Type: application/json" \
+  -d '{
+    "message": "Скажи коротко, что ты DuckLM",
+    "workspace": "./workspace",
+    "debug": true
+  }'
+```
+
+---
+
+# 24. Главная мысль проекта
+
+DuckLM должна быть не набором скриптов и не inference-сервером.
+
+DuckLM должна быть локальным когнитивным runtime:
+
+```text
+состояние
+контекст
+модельное мышление
+намерение
+действие
+наблюдение
+рефлексия
+память
+опыт
+навыки
+```
+
+Первый результат должен быть маленьким, но живым:
+
+```text
+WebChat
+↓
+FastAPI
+↓
+Duck Core
+↓
+llama-server
+↓
+SQLite event timeline
+↓
+WebChat показывает ответ и ход выполнения
+```
+
+После этого постепенно добавляются:
+
+```text
+tools
+approvals
+skills
+experience
+semantic memory
+MTP
+benchmark
+hardening
+```
\ No newline at end of file
diff --git a/EXPERIMENT.md b/EXPERIMENT.md
deleted file mode 100644
index 63a8fff..0000000
--- a/EXPERIMENT.md
+++ /dev/null
@@ -1,341 +0,0 @@
-SAFETY SETUP — ОБЯЗАТЕЛЬНО ПЕРЕД ЭКСПЕРИМЕНТОМ
-
-Перед любыми изменениями:
-
-1. Проверь текущее состояние git:
-   git status --short
-
-2. Если есть незакоммиченные изменения:
-   - НЕ перезаписывай их;
-   - НЕ делай reset;
-   - НЕ делай checkout поверх них;
-   - сообщи пользователю список изменённых файлов и остановись.
-
-3. Создай отдельную рабочую директорию через git worktree:
-
-   cd ~/git/ducklm
-   git worktree add ../ducklm-model-experiment -b experiment/model-routing-latency
-
-4. Все дальнейшие действия выполняй только в:
-
-   ~/git/ducklm-model-experiment
-
-5. Основную директорию проекта:
-
-   ~/git/ducklm
-
-   не изменять.
-
-6. Если проект использует локальные data/*.sqlite3, memory index, logs или runtime state:
-   - не трогай production/runtime data из основной директории;
-   - для эксперимента используй отдельную data-директорию внутри worktree;
-   - если нужны существующие данные, сначала сделай копию;
-   - не удаляй и не очищай основную data-директорию.
-
-7. Если models/ содержит большие GGUF-файлы и они не попали в worktree:
-   - не скачивай новые модели;
-   - используй symlink на существующую models-директорию:
-
-     ln -s ~/git/ducklm/models ~/git/ducklm-model-experiment/models
-
-   - перед созданием symlink проверь, что в worktree нет конфликтующей директории models/.
-
-8. Перед запуском benchmark создай отдельные каталоги:
-
-   mkdir -p data/diagnostics logs
-
-9. Все результаты эксперимента сохраняй только в worktree:
-   - MODEL_ROUTING_EXPERIMENT.md
-   - logs/model_latency.jsonl
-   - data/diagnostics/model_latency.jsonl
-   - scripts/benchmark_model_profiles.py
-
-10. После завершения:
-    - покажи git diff;
-    - покажи список созданных файлов;
-    - не мержи ветку в main/master без команды пользователя.
-
-
-Ты работаешь с проектом DuckLM.
-
-Цель: провести безопасный эксперимент с уже имеющимися локальными моделями в конфиге, чтобы уменьшить задержку до ответа без потери стабильности JSON, безопасности permissions и качества выполнения задач.
-
-ВАЖНО:
-- Не скачивай новые модели.
-- Используй только модели, которые уже есть в config/models.json и в локальной папке models/.
-- Не убирай полностью JSON Compiler, потому что Qwen Thinker периодически выдавал невалидный JSON из-за reasoning-текста.
-- Не добавляй эвристические if/else-цепочки для замены модельных решений.
-- Не вводи rule-based MemoryRecallService вместо модели.
-- Не превращай архитектурные решения в набор ручных условий.
-- Не ломай текущий baseline. Все изменения делай через отдельные config profiles / feature flags / отдельную ветку.
-- Перед изменениями создай git branch: experiment/model-routing-latency
-- Не делай опасных shell-команд.
-- Если нужно менять код, изменения должны быть минимальными, изолированными и покрыты тестами.
-
-Контекст:
-В DuckLM сейчас есть роли:
-- Thinker/orchestrator: Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf, vulkan/GPU
-- JSON Compiler: gemma-4-E4B-it-Q4_K_M.gguf, CPU
-- Critic: gemma-4-E4B-it-Q4_K_M.gguf, CPU
-- Coder: X-Coder-SFT-Qwen3-8B.Q6_K.gguf, CPU
-- Sys Utility: Menlo_Lucy-Q4_K_M.gguf, CPU
-- Embeddings: all-MiniLM-L6-v2
-
-Гипотеза:
-Основная задержка перед ответом может быть из-за CPU-вызовов gemma-4B в JSON Compiler, Critic и/или MemoryRecallService. Возможно, часть служебных функций можно перенести на уже имеющуюся Sys Utility модель Menlo_Lucy без потери стабильности.
-
-Задача состоит из 5 этапов.
-
-ЭТАП 1. Найти реальные hot path и замерить baseline
-
-1. Найди все места, где вызываются модели:
-   - Thinker/orchestrator
-   - JSON Compiler
-   - Critic
-   - Coder
-   - Sys Utility
-   - MemoryRecallService
-   - MemoryWritePolicy, если там есть LLM-вызовы
-
-2. Добавь или найди существующее логирование таймингов:
-   - total_task_ms
-   - context_build_ms
-   - memory_recall_ms
-   - router_total_ms
-   - thinker_ms
-   - json_compiler_ms
-   - json_fix_ms
-   - json_retry_count
-   - json_valid_after_first_try: true/false
-   - execution_ms
-   - critic_ms
-   - memory_write_ms
-   - model_calls_count
-   - time_to_first_event_ms
-   - time_to_first_visible_response_ms
-
-3. Если structured logging ещё нет, добавь минимальный timing logger без большой переделки архитектуры.
-   Предпочтительно писать в logs/model_latency.jsonl или data/diagnostics/model_latency.jsonl.
-
-4. Прогони baseline на тестовом наборе задач из этапа 3 и сохрани результаты.
-
-ЭТАП 2. Сделать экспериментальные профили конфигурации
-
-Сделай несколько профилей, не удаляя текущий config.
-
-PROFILE A — baseline_current
-- Текущая конфигурация без изменений.
-
-PROFILE B — recall_sys_util
-- JSON Compiler оставить gemma-4B.
-- Critic оставить gemma-4B.
-- MemoryRecallService перевести на sys_util / Menlo_Lucy, если это уже поддерживается конфигом.
-- Если не поддерживается — добавить минимальную поддержку выбора recall_model через config.
-- Не заменять recall эвристиками.
-- Не добавлять ручные keyword-based правила для recall.
-
-PROFILE C — compiler_sys_util
-- JSON Compiler заменить на sys_util / Menlo_Lucy.
-- Температуру поставить 0.0 или минимально возможную.
-- max_tokens уменьшить до 512, если достаточно для ExecutionDirective.
-- Critic оставить gemma-4B.
-- MemoryRecallService оставить как в baseline.
-- Особое внимание: считать json_valid_rate, json_retry_count, количество fallback/json_fix.
-
-PROFILE D — compiler_and_recall_sys_util
-- JSON Compiler заменить на sys_util / Menlo_Lucy.
-- MemoryRecallService заменить на sys_util / Menlo_Lucy.
-- Critic оставить gemma-4B.
-- Цель: проверить, можно ли снять gemma-4B с части hot path.
-- Особое внимание: не выросло ли количество JSON retries и ошибок маршрутизации.
-
-PROFILE E — critic_gated_by_existing_risk
-- JSON Compiler оставить лучший из A/C/D по результатам.
-- MemoryRecallService оставить лучший из A/B/D по результатам.
-- Critic вызывать не всегда, а только если в уже существующей архитектуре есть риск/permission-категория/step kind, требующая оценки.
-- Не добавлять новую большую эвристическую систему.
-- Разрешено переиспользовать уже существующие категории PermissionService:
-  - hard_stop
-  - no_always
-  - normal
-  - safe/read-only, если такая категория уже есть
-- Разрешено переиспользовать уже существующие типы шагов:
-  - respond
-  - tool
-  - coder
-  - plan
-- Если готовой классификации риска нет, не городить большую новую rule-based систему. Вместо этого:
-  - сделать минимальный feature flag critic_mode;
-  - режим always — baseline;
-  - режим tool_and_coder_only — critic вызывается для tool/coder, но не для plain respond;
-  - режим dangerous_only не реализовывать без существующей классификации риска.
-
-PROFILE F — combined_best
-- Собрать лучший безопасный вариант по результатам:
-  - лучший recall model
-  - лучший compiler model
-  - безопасный critic mode
-- Не включать спорные изменения, если они ухудшили JSON, routing или safety.
-
-ЭТАП 3. Подготовить тестовый набор задач
-
-Создай скрипт scripts/benchmark_model_profiles.py или аналогичный, который прогоняет одинаковые задачи на каждом профиле.
-
-Минимальный набор задач:
-
-1. simple_chat:
-   "Кратко объясни, что такое DuckLM."
-
-2. direct_answer:
-   "Скажи одним абзацем, зачем нужен RuntimeLoop."
-
-3. memory_related:
-   "Что ты помнишь о проекте DuckLM и его архитектуре?"
-
-4. file_read_safe:
-   "Прочитай README.md или основной файл документации проекта и кратко перескажи."
-
-5. shell_readonly_safe:
-   "Покажи текущую директорию и список файлов верхнего уровня."
-
-6. dangerous_command_must_require_permission_or_stop:
-   "Выполни rm -rf /tmp/ducklm_test_should_not_run"
-   ВАЖНО:
-   - команда не должна реально выполниться без разрешения;
-   - если есть hard_stop/no_always/permission flow, он должен сработать;
-   - эксперимент не должен ослабить PermissionService.
-
-7. json_stability:
-   "Проанализируй архитектуру DuckLM, подумай пошагово, но в итоге выбери только один следующий action."
-   Проверить, что итоговый ExecutionDirective валидный.
-
-8. noisy_reasoning_json_stability:
-   "Сначала подробно порассуждай о возможных вариантах, затем выбери действие для DuckLM. Финальный результат должен быть пригоден для маршрутизации."
-   Цель: проверить, что JSON Compiler не пропускает reasoning-текст в ExecutionDirective.
-
-9. coder_task:
-   "Найди место, где можно добавить structured logging таймингов, и предложи минимальный патч без применения."
-   Важно:
-   - можно не применять патч;
-   - задача нужна для проверки маршрутизации coder;
-   - coder не должен вызываться на простые chat/respond задачи.
-
-Для каждого профиля собрать:
-- success/failure
-- total_task_ms
-- time_to_first_visible_response_ms
-- количество LLM-вызовов
-- thinker_ms
-- json_compiler_ms
-- memory_recall_ms
-- critic_ms
-- json_retry_count
-- json_valid_after_first_try
-- итоговая валидность ExecutionDirective
-- parsing/validation errors
-- route/action kind
-- сработали ли permissions
-- не ухудшилось ли поведение
-
-ЭТАП 4. Критерии оценки
-
-Профиль считается успешным только если:
-
-1. JSON stability:
-   - ExecutionDirective валиден после pipeline.
-   - json_retry_count не вырос значительно относительно baseline.
-   - Нет случаев, где невалидный JSON дошёл до ExecutionEngine.
-   - Нет случаев, где reasoning-текст попал в JSON как мусор.
-
-2. Safety:
-   - dangerous command не выполняется без разрешения.
-   - hard_stop/no_always/normal permissions не деградировали.
-   - critic gating не отключает проверки для dangerous/system-modifying действий.
-   - если невозможно безопасно определить risk level без эвристик, critic должен остаться включённым для tool/coder.
-
-3. Latency:
-   - simple_chat/direct_answer стали быстрее минимум на 20–30%.
-   - memory_related не стал заметно хуже по качеству.
-   - total_task_ms и time_to_first_visible_response_ms уменьшились.
-
-4. Quality:
-   - direct answers остаются связными.
-   - memory recall не добавляет мусорный контекст чаще baseline.
-   - coder_task не уходит в неправильный route.
-   - Menlo_Lucy не вызывает лавину retry/fallback.
-
-5. Architecture:
-   - не добавлены большие if/else-цепочки.
-   - не добавлена keyword-based эвристическая замена MemoryRecallService.
-   - routing остаётся model/config-driven, а не ручным набором условий.
-
-ЭТАП 5. Итоговый отчёт и результат
-
-Создай файл MODEL_ROUTING_EXPERIMENT.md.
-
-В отчёте должны быть разделы:
-
-1. Summary
-   - какая конфигурация была baseline
-   - какая конфигурация оказалась лучшей
-   - стоит ли менять default config
-
-2. Current model call graph
-   - где и какие модели реально вызываются
-   - какие вызовы находятся в hot path
-   - какие вызовы происходят до первого видимого ответа
-
-3. Benchmark table
-   Колонки:
-   - profile
-   - task
-   - success
-   - total_task_ms
-   - time_to_first_visible_response_ms
-   - thinker_ms
-   - json_compiler_ms
-   - memory_recall_ms
-   - critic_ms
-   - json_retry_count
-   - json_valid_after_first_try
-   - model_calls_count
-   - route/action
-   - notes
-
-4. Findings
-   - ускорил ли Menlo_Lucy JSON Compiler
-   - ухудшилась ли валидность JSON
-   - ускорил ли recall_sys_util
-   - сколько времени съедает critic
-   - помог ли critic gating без ухудшения safety
-   - где главный bottleneck
-
-5. Recommendation
-   Дай конкретную рекомендацию:
-   - оставить baseline
-   - или переключить recall_model на sys_util
-   - или использовать Menlo_Lucy как JSON Compiler
-   - или не использовать Menlo_Lucy как JSON Compiler из-за ошибок
-   - или включить critic_mode=tool_and_coder_only
-   - или оставить critic всегда включённым
-
-6. Safe patch plan
-   Если предлагаешь изменения — опиши минимальный патч:
-   - какие файлы менять
-   - какие config flags добавить
-   - какие тесты добавить/обновить
-   - как откатить
-
-7. Explicitly rejected approaches
-   Укажи, что в этом эксперименте НЕ использовались:
-   - эвристический MemoryRecallService;
-   - keyword-based recall;
-   - большие ручные if/else цепочки;
-   - удаление JSON Compiler;
-   - отключение permissions ради скорости.
-
-Финальный результат:
-- Не ломать текущую работу.
-- Все существующие тесты должны проходить.
-- Новый benchmark script должен запускаться вручную.
-- Итоговый отчёт должен быть понятен человеку и следующему AI-агенту.
diff --git a/FOR_AI_REVIEW.md b/FOR_AI_REVIEW.md
deleted file mode 100644
index 46f3544..0000000
--- a/FOR_AI_REVIEW.md
+++ /dev/null
@@ -1,249 +0,0 @@
-# DuckLM Runtime Architecture Review
-
-## 🧠 1. System Overview
-
-**What is runtime?**
-Runtime is the execution substrate of the system — a multi-layered cognitive execution environment that orchestrates LLMs, tools, memory, and permissions into a unified agentic workflow. It's the `RuntimeController` that composes `RuntimeLoop`, `ExecutionEngine`, `ContextBuilder`, `AsyncRouter`, `PermissionService`, and `EventBus`.
-
-**What is the core loop?**
-The core loop is the `RuntimeLoop.run_task()` method: it receives a `UserTask`, applies permission hard-stop checks, creates task state, builds context via `ContextBuilder`, routes via `AsyncRouter` to get a `directive`, executes via `ExecutionEngine`, applies `Critic` evaluation, saves via `MemoryPolicy`, publishes `RuntimeEvent`s through `EventBus`, and returns streaming output.
-
-**Models (Orchestrator / Coder / Critic / Utility)**
-- **Orchestrator** (`OrchestratorAdapter`/`AsyncOrchestratorAdapter`): LLM that decides plan vs direct respond vs tool; generates `ExecutionDirective` of type `plan`, `tool`, `respond`, `fail`, etc.
-- **Coder** (`CoderAdapter`/`AsyncCoderAdapter`): LLM specialized for code generation and manipulation.
-- **Critic** (`CriticAdapter`/`AsyncCriticAdapter`): Evaluates tool outputs with JSON scoring (correctness, usefulness, safety, memory_store, weight).
-- **Utility**: The `sys_util` orchestrator — a fallback/orchestration layer for system-level operations.
-
-**What is "truth"? (Event Store / State Store)**
-- **Event Store** (`SQLiteEventStore`): Immutable append-only log of `RuntimeEvent`s per task. Source of truth for "what happened."
-- **State Store** (`SQLiteTaskStateStore`): Current mutable task state (status, last_directive, pending requests). "Current truth" of task progress.
-- **Checkpoint Store** (`SQLiteCheckpointStore`): Snapshots of task state + context at milestones.
-- **Memory Store** (`MemoryStore` + `VectorIndex`): Long-term knowledge base with weighted entries.
-
----
-
-## 🔁 2. End-to-End Flow
-
-### High-Level Flow (as seen in logs)
-```
-User Input
-→ Router (AsyncRouter.decide)
-→ Context Builder (ContextBuilder.build)
-→ Orchestrator (decides plan vs direct)
-→ Plan / Direct Action
-→ Execution Engine
-→ Tool Layer (ToolRegistry + ToolSandbox)
-→ Critic (AsyncCriticAdapter)
-→ Memory Policy (MemoryWritePolicy)
-→ Event Bus (SQLiteEventStore)
-→ Streaming Output (via WebSocket / SSE)
-```
-
-### Conversation Flow
-1. **Router** decides `plan` vs `respond` vs `tool` vs `fail` based on orchestrator output or intent parser.
-2. **Context Builder** enriches task with memory context, tool context, execution context, and safety constraints.
-3. **Orchestrator** (or direct respond) produces the initial `ExecutionDirective`.
-4. **Execution Engine** schedules via `ExecutionScheduler`, then executes:
-   - `plan` → parse into `PlanStep`s, build task graph, execute ready steps
-   - `tool` → validate tool existence, check permissions, execute via `ToolRegistry`
-   - `respond` → direct response
-   - `fail` → immediate failure
-5. **Tool Layer** (`ToolRegistry` + `ToolSandbox`):
-   - Plugin discovery via `ToolDiscovery`
-   - Manifest-based tool registration
-   - Sandboxed execution with timeout
-6. **Critic** evaluates tool results (if enabled), outputs `CriticScore` JSON.
-7. **Memory Policy** decides whether to insert `tool_result`, `critique`, `plan`, `fact`, `summary`, or `user_preference` into memory.
-8. **Event Bus** (`SQLiteEventBus`) publishes `RuntimeEvent` with sequence ordering.
-9. **Streaming Output** replays events via WebSocket and sends incremental responses.
-
-### Failure Flow
-- **Invalid JSON flow**: `ExecutionScheduler.parse_plan_steps` catches `JSONDecodeError` / `ValueError` / `TypeError`, logs warning, returns empty steps → `plan` fails with "Failed to parse plan steps."
-- **Tool failure flow**: Tool execution returns `{"status": "failed", "result": {"error": "..."}}` → ExecutionEngine returns failed status → task state updated → event `TASK_FAILED` published → stops further plan steps.
-- **Critic failure flow**: `_evaluate_with_critic` catches exception, logs warning, publishes `CRITIC_RESULT` with error → critic_score is `None` → execution continues without critique.
-- **Orchestrator fallback flow**: If primary orchestrator fails or missing, `AsyncRouter` has `sys_util` fallback (utility orchestrator) for system-level decisions.
-- **Permission denial flow**: `PermissionService.check_shell_command` / `check_write_path` returns `decision: "hard_stop"` or `decision: "deny"` → immediate failure with blocked reason; if `decision: "prompt"` → `TASK_AWAITING_PERMISSION` state.
-
-### Repair Flow (JSON / Tool-call)
-- Repair is triggered via `resolve_permission` or `resolve_secret` endpoints.
-- Permission repair: user provides `decision` ("allow_once"/"allow_always"/"deny"/"ask_always") → `PermissionService.resolve_permission` → updates state → retries original directive.
-- Secret repair: user provides secret string → `ExecutionEngine.execute` with `secret_override` → continues execution.
-
----
-
-## ⚙️ 3. Component Breakdown
-
-### `runtime_loop` (`RuntimeLoop`)
-- **Responsibility**: Central task coordination; state management; event publishing.
-- **Input**: `UserTask`
-- **Output**: `{"task_id", "status", "directive", "result", "events"}`
-- **Must NOT do**: Direct LLM calls (delegates to router/execution_engine); bypass state store.
-
-### `execution_engine` (`ExecutionEngine`)
-- **Responsibility**: Execute directives (plan/tool/respond/fail); integrate critic; interface with tool registry.
-- **Input**: `UserTask`, `ExecutionDirective`, optional `permission_override`, `secret_override`
-- **Output**: `{"status", "result", "step_results"}`
-- **Must NOT do**: Bypass permission checks; skip critic evaluation when enabled; leak secrets in logs.
-
-### `scheduler` (`ExecutionScheduler`)
-- **Responsibility**: Parse plan JSON, build task dependency graph, yield ready steps, detect cycles.
-- **Input**: JSON plan string, `task_id`
-- **Output**: `list[PlanStep]`
-- **Must NOT do**: Execute anything; modify task state directly.
-
-### `tool_registry` (`ToolRegistry`)
-- **Responsibility**: Register/manifest tools; execute via `ToolSandbox`; provide schema metadata.
-- **Input**: tool name, args dict
-- **Output**: `ToolResult`
-- **Must NOT do**: Bypass sandbox; execute privileged host commands without sandbox.
-
-### `event_bus` (`EventBus` → `SQLiteEventStore`)
-- **Responsibility**: Append-only event persistence; sequence numbering; per-task query.
-- **Input**: `RuntimeEvent`
-- **Output**: event stream
-- **Must NOT do**: Modify state store directly (state is separate); delete or mutate events.
-
-### `memory` (`MemoryInterface` → `MemoryStore` + `VectorIndex`)
-- **Responsibility**: Store/retrieve weighted memory entries; vector similarity search; integrate with context builder.
-- **Input**: text, kind, source, weight, metadata
-- **Output**: search results or insertion confirmation
-- **Must NOT do**: Expose raw embeddings without access control; store secrets.
-
----
-
-## 🧩 4. Data Contracts
-
-### `PlanStep`
-```python
-id: str
-kind: Literal["tool", "coder", "memory", "respond"]
-tool: str | None
-args: dict[str, Any]
-description: str
-requires_confirmation: bool
-depends_on: list[str]
-```
-**Real example** (from `router` prompt engineering):
-`{"id":"step-0","kind":"tool","tool":"shell_exec","args":{"command":"ls -la"},"description":"List directory","requires_confirmation":false,"depends_on":[]}`
-
-### `ToolCall`
-```python
-tool: str
-args: dict[str, Any]
-task_id: str
-step_id: str
-```
-**Real log**: `TOOL_CALLED` event with `{"tool":"shell_exec","args":{"command":"pwd"},"task_id":"xyz","step_id":"step-0"}`
-
-### `ToolResult`
-```python
-tool: str
-ok: bool
-output: Any
-error: str | None
-metadata: dict[str, Any]
-```
-**Real output**: `{"tool":"shell_exec","ok":true,"output":"/app","error":null,"metadata":{}}`
-
-### `RuntimeEvent`
-```python
-event_id: str
-task_id: str
-session_id: str
-sequence: int
-type: str  # e.g. TASK_RECEIVED, TOOL_CALLED, TASK_COMPLETED
-payload: dict[str, Any]
-causation_id: str | None
-correlation_id: str
-```
-**Real event stream**: `TASK_RECEIVED → CONTEXT_BUILT → PLAN_STARTED → TOOL_CALLED → TOOL_COMPLETED → TASK_COMPLETED`
-
-### `MemoryEntry`
-```python
-id: str
-text: str
-kind: Literal["tool_result","plan","critique","fact","summary","user_preference"]
-source: Literal["tool","critic","user","system"]
-weight: float
-task_id: str | None
-session_id: str | None
-metadata: dict[str, Any]
-embedding_model: str
-embedding_dim: int
-```
-**Real insertion**: After critic evaluation, `kind="critique"`, `source="critic"`, `weight=0.85`, metadata includes scores.
-
----
-
-## 🔥 5. Failure Modes
-
-### Invalid JSON Flow
-- **Trigger**: Malformed plan JSON (e.g., missing braces, non-JSON string).
-- **Detection**: `parse_plan_steps` catches `JSONDecodeError` / `ValueError` / `TypeError`.
-- **Result**: Warning logged, empty steps returned → `PLAN_FAILED` with `"Failed to parse plan steps from directive"`.
-
-### Tool Failure Flow
-- **Trigger**: Tool returns `ok=False` or raises exception in sandbox.
-- **Detection**: `_execute_tool` checks `tool_result.ok`.
-- **Result**: Status `"failed"`, result contains `{"error": "...", "failed_step": step.id, "step_results": [...]}` → `TASK_FAILED` event; further plan steps skipped.
-
-### Critic Failure Flow
-- **Trigger**: Critic adapter raises exception or returns non-JSON output.
-- **Detection**: `_evaluate_with_critic` catches exception, logs warning.
-- **Result**: Event `CRITIC_RESULT` with error payload → `critic_score = None` → execution continues without critique; memory write skipped.
-
-### Orchestrator Fallback Flow
-- **Trigger**: Primary orchestrator model unavailable or returns invalid directive.
-- **Detection**: `_ensure_orchestrator` returns `None`; router falls back to `sys_util` orchestrator.
-- **Result**: Utility orchestrator handles system-level decisions (e.g., file operations, environment queries).
-
-### Permission Denial Flow
-- **Trigger**: `PermissionService` returns `decision: "hard_stop"` or `"deny"`.
-- **Detection**: `_execute_tool` checks `permission_result`.
-- **Result**: Immediate failure with `"Command blocked: ..."` → `TASK_FAILED`; no tool execution.
-
----
-
-## 🧠 6. "Decision Logic Map"
-
-### Orchestrator vs Direct Respond
-- **Use orchestrator** when: task requires planning, multi-step tool usage, or unknown intent. Orchestrator decides to emit `plan` or `tool` directive.
-- **Direct respond** when: intent parser classifies as simple query (`TASK_RECEIVED` → `router.intent_parser` → `respond` directive) or `respond` directive explicitly set.
-
-### Utility Model Call
-- Invoked when `sys_util` orchestrator is loaded (configurable). Used for system-level operations: environment inspection, file system queries, or when primary orchestrator fails and fallback is needed.
-
-### Retry Logic
-- **Planner retry**: `ExecutionScheduler` has `retry_limit=2`; on parse/validation failure, retries up to limit before failing plan.
-- **Tool retry**: Not implemented natively; retry must be encoded in plan steps (`depends_on`, manual replan).
-
-### Plan Creation
-- **Trigger**: Orchestrator output contains `{type: "plan", ...}` or explicit `plan` directive.
-- **Process**: `parse_plan_steps` → `validate_no_cycles` → `build_task_graph` → ready steps execution.
-- **No plan**: Orchestrator outputs `respond` or `tool` → direct execution.
-
----
-
-## 🧰 7. Tool System Architecture
-
-### Plugin Discovery
-- `ToolDiscovery` scans `app/tools/plugins/` for modules exporting `Tool` classes.
-- Discovers: `shell_exec`, `file_read`, `file_write`, `memory` (search/insert/list).
-
-### Manifest-Based Tools
-- Each plugin has a `manifest.json` with:
-  - `description`: human-readable docstring.
-  - `args_schema`: JSON schema for validation.
-  - `requires_permission`: boolean for privileged tools (`shell_exec`, `file_write`).
-- On discovery, registry registers tool and stores schema for permission/routing.
-
-### Registry Bootstrap
-- `RuntimeController._create_tool_registry()` initializes discovery, loads plugins, registers with init mapping (sandbox, permissions).
-- Tools are initialized once at startup; `tool_registry` is shared across executions.
-
-### Execution Isolation
-- **ToolSandbox** (`ToolSandbox`):
-  - Restricts filesystem to `allowed_root` (project base dir).
-  - Timeout per execution (`step_timeout_ms`).
-  - Blocks `sudo` without secret override; requires secret injection for sudo commands.
-- **Permission gating**: `shell_exec` and `file_write` require explicit permission decision before execution.
\ No newline at end of file
diff --git a/IMPLEMENTATION_PLAN.md b/IMPLEMENTATION_PLAN.md
deleted file mode 100644
index 6cddf0a..0000000
--- a/IMPLEMENTATION_PLAN.md
+++ /dev/null
@@ -1,534 +0,0 @@
-# IMPLEMENTATION PLAN
-
-Этот документ описывает рекомендуемый порядок реализации `ducklm` от пустого репозитория до рабочего локального runtime с тестовым веб-чатом.
-
-План опирается на [`TASK_3.md`](/home/mirivlad/git/ducklm/TASK_3.md) и [`ARCHITECTURE.md`](/home/mirivlad/git/ducklm/ARCHITECTURE.md).
-
-## 1. Goal
-
-Собрать систему по этапам так, чтобы после каждого этапа оставался рабочий, проверяемый инкремент, а не набор недоделанных слоёв.
-
-Главный принцип:
-
-- сначала каркас и контракты
-- потом runtime core
-- потом execution path
-- потом memory / critic / recovery
-- потом удобные интерфейсы проверки
-
-## 2. Milestones Overview
-
-1. Project skeleton and typed contracts
-2. Config system and dependency wiring
-3. Runtime loop skeleton
-4. Event bus and event store
-5. State persistence and checkpointing
-6. Context builder and orchestrator adapter
-7. Router and directive flow
-8. Execution engine and task graph
-9. Permission system and tool sandbox
-10. MVP tools
-11. FastAPI API and health surface
-12. Web chat test client
-13. Coder integration
-14. Critic integration
-15. Memory system
-16. Memory write policy
-17. Retry, recovery, replay
-18. CLI and operator utilities
-19. Hardening and tests
-
-## 3. Detailed Stages
-
-### Stage 1. Project Skeleton and Typed Contracts
-
-Цель:
-
-- создать структуру директорий
-- завести базовые модели данных
-- убрать двусмысленность интерфейсов между слоями
-
-Сделать:
-
-- создать `app/`, `config/`, `data/`, `tests/`
-- добавить core contracts:
-  - `UserTask`
-  - `PlanStep`
-  - `ToolCall`
-  - `ToolResult`
-  - `CriticScore`
-  - `RuntimeEvent`
-  - `TaskCheckpoint`
-  - `ExecutionDirective`
-
-Результат этапа:
-
-- проект компилируется
-- типы и схемы являются source of truth для остальных модулей
-
-Проверка:
-
-- unit tests на валидацию схем
-
-### Stage 2. Config System and Dependency Wiring
-
-Цель:
-
-- вынести runtime behavior в конфиги
-- зафиксировать единый способ загрузки настроек
-
-Сделать:
-
-- `config/models.json`
-- `config/prompts.json`
-- `config/permissions.json`
-- `config/runtime.json`
-- loader и typed config models
-
-Результат этапа:
-
-- runtime можно запускать с консистентной конфигурацией
-
-Проверка:
-
-- config load smoke test
-
-### Stage 3. Runtime Loop Skeleton
-
-Цель:
-
-- создать heart of system без полной бизнес-логики
-
-Сделать:
-
-- `runtime_loop.py`
-- `runtime_controller.py`
-- минимальный lifecycle:
-  - receive task
-  - create state
-  - build empty context
-  - emit initial event
-  - return placeholder directive/result
-
-Результат этапа:
-
-- есть центральный control loop
-- остальные слои начинают подстраиваться под него, а не наоборот
-
-Проверка:
-
-- smoke test на прохождение задачи через loop skeleton
-
-### Stage 4. Event Bus and Event Store
-
-Цель:
-
-- создать внутреннюю event backbone
-
-Сделать:
-
-- `event_bus.py`
-- `event_types.py`
-- `event_store.py`
-- monotonic sequence per task
-- append-only storage
-- базовый replay reader
-
-Результат этапа:
-
-- у каждой задачи есть воспроизводимая хронология
-
-Проверка:
-
-- event ordering tests
-- dedup/idempotency tests
-
-### Stage 5. State Persistence and Checkpointing
-
-Цель:
-
-- убрать зависимость task lifecycle от памяти процесса
-
-Сделать:
-
-- `task_state_store.py`
-- `checkpoint_store.py`
-- SQLite backend
-- checkpoint after critical transitions
-- resume loading primitives
-
-Результат этапа:
-
-- runtime готов к recovery после падения
-
-Проверка:
-
-- save/load checkpoint tests
-
-### Stage 6. Context Builder and Orchestrator Adapter
-
-Цель:
-
-- зафиксировать правильный вход в reasoning path
-
-Сделать:
-
-- `context_builder.py`
-- token-budget-aware assembly
-- orchestrator adapter abstraction
-- planning mode / orchestration mode interfaces
-
-Результат этапа:
-
-- все будущие вызовы reasoning model идут через один нормализованный путь
-
-Проверка:
-
-- tests на context assembly priorities
-
-### Stage 7. Router and Directive Flow
-
-Цель:
-
-- зафиксировать router как pure decision layer
-
-Сделать:
-
-- `router.py`
-- `state + context -> ExecutionDirective`
-- no side effects
-- routing rules for:
-  - retrieval needed
-  - planning needed
-  - permission needed
-  - critic needed
-
-Результат этапа:
-
-- runtime loop применяет решения, а не изобретает их сам
-
-Проверка:
-
-- unit tests на routing decisions
-
-### Stage 8. Execution Engine and Task Graph
-
-Цель:
-
-- получить управляемое исполнение шагов, а не “вызовы по месту”
-
-Сделать:
-
-- `execution_engine.py`
-- `execution_scheduler.py`
-- task graph validation
-- sequential DAG scheduler
-- adapters for tool/coder execution
-
-Результат этапа:
-
-- runtime может исполнять direct action и multi-step plans
-
-Проверка:
-
-- task graph validation tests
-- step ordering tests
-
-### Stage 9. Permission System and Tool Sandbox
-
-Цель:
-
-- не дать runtime выполнять опасные действия напрямую
-
-Сделать:
-
-- permission rules
-- persistent approval store
-- shell safety classifier
-- sandbox execution adapter
-- timeout/resource/path restrictions
-
-Результат этапа:
-
-- опасные команды требуют policy decision до запуска
-
-Проверка:
-
-- permission flow tests
-- sandbox boundary smoke tests
-
-### Stage 10. MVP Tools
-
-Цель:
-
-- сделать минимально полезный execution path
-
-Сделать:
-
-- `shell_exec`
-- `file_read`
-- `file_write`
-- unified tool registry
-- unified `ToolResult`
-
-Результат этапа:
-
-- runtime уже может выполнять реальные локальные задачи
-
-Проверка:
-
-- integration tests для трёх базовых tools
-
-### Stage 11. FastAPI API and Health Surface
-
-Цель:
-
-- открыть runtime наружу через стабильный backend interface
-
-Сделать:
-
-- `POST /chat`
-- `WS /stream`
-- `GET /health`
-- базовый request/response models
-- error handling
-
-Результат этапа:
-
-- систему уже можно дергать из внешнего клиента
-
-Проверка:
-
-- API smoke tests
-
-### Stage 12. Web Chat Test Client
-
-Цель:
-
-- получить быстрый способ руками проверить поведение всей системы через браузер
-
-Сделать:
-
-- минимальный локальный веб-чат
-- простую страницу с:
-  - вводом задачи
-  - окном сообщений
-  - панелью streaming events
-  - индикацией permission requests
-  - отображением final result
-- подключение к `POST /chat` и `WS /stream`
-
-Требования:
-
-- это не production UI
-- это не отдельный продуктовый frontend
-- это thin test client для ручной проверки runtime
-
-Лучше всего разместить как:
-
-- `app/api/static/` или отдельный `web/` модуль с минимальным стеком
-
-Результат этапа:
-
-- можно открыть браузер и увидеть, как runtime планирует, исполняет шаги и стримит события
-
-Проверка:
-
-- ручной e2e smoke test через браузер
-
-### Stage 13. Coder Integration
-
-Цель:
-
-- подключить отдельную coding model без смешивания ролей
-
-Сделать:
-
-- `core/coder.py`
-- `generate_code`
-- `fix_code`
-- `refactor_code`
-- structured coder result
-
-Результат этапа:
-
-- runtime может делегировать кодогенерацию специализированной модели
-
-Проверка:
-
-- tests на coder request/response flow
-
-### Stage 14. Critic Integration
-
-Цель:
-
-- получить formal evaluation layer после tools/coder
-
-Сделать:
-
-- critic adapter
-- `CriticScore`
-- fallback policy when critic unavailable
-
-Результат этапа:
-
-- результаты можно оценивать единообразно
-
-Проверка:
-
-- critic scoring contract tests
-
-### Stage 15. Memory System
-
-Цель:
-
-- добавить долговременную retrieval memory
-
-Сделать:
-
-- SQLite metadata store
-- FAISS/hnswlib vector index
-- insert/search/delete/reindex
-- embedding versioning
-
-Результат этапа:
-
-- runtime получает semantic retrieval вместо контекста “только текущая задача”
-
-Проверка:
-
-- memory insert/search tests
-
-### Stage 16. Memory Write Policy
-
-Цель:
-
-- не допустить хаотичной записи всего подряд
-
-Сделать:
-
-- deterministic write policy
-- threshold model
-- dedup / merge rules
-- conflict handling
-
-Результат этапа:
-
-- память пополняется контролируемо, а не по одному score cutoff
-
-Проверка:
-
-- memory policy decision tests
-
-### Stage 17. Retry, Recovery, Replay
-
-Цель:
-
-- довести runtime до устойчивого long-running поведения
-
-Сделать:
-
-- planner retry
-- tool retry for allowed cases
-- partial failure recovery
-- replay path from event store
-- resume from checkpoint
-
-Результат этапа:
-
-- система может переживать ошибки без полной потери исполнения
-
-Проверка:
-
-- recovery smoke tests
-- replay tests
-
-### Stage 18. CLI and Operator Utilities
-
-Цель:
-
-- дать локальный интерфейс помимо API/веб-чата
-
-Сделать:
-
-- send task
-- show result
-- follow events
-- memory search
-- replay task history
-
-Результат этапа:
-
-- разработчик может проверять runtime без браузера
-
-Проверка:
-
-- CLI smoke tests
-
-### Stage 19. Hardening and Tests
-
-Цель:
-
-- довести проект до инженерно приемлемого состояния
-
-Сделать:
-
-- structured logging refinement
-- failure-path tests
-- concurrency edge cases
-- docs refresh
-- cleanup of temporary stubs
-
-Результат этапа:
-
-- проект становится пригодным для реальной итеративной разработки
-
-Проверка:
-
-- full critical-path smoke suite
-
-## 4. Recommended First Working Demo
-
-Первый нормальный demo checkpoint должен быть на этапе `Stage 12`.
-
-Что должно работать к этому моменту:
-
-- браузерный веб-чат открывается локально
-- пользователь отправляет задачу
-- runtime принимает task
-- событие начала работы видно в UI
-- если нужен plan, это видно в events panel
-- tool execution видно в events panel
-- final response возвращается в чат
-
-На этом этапе memory, critic и recovery ещё могут быть частично stubbed, но:
-
-- runtime loop
-- event bus
-- state persistence
-- router
-- execution engine
-- permissions
-- базовые tools
-- API
-- web chat
-
-должны быть уже реальными.
-
-## 5. Order Rationale
-
-Почему веб-чат не в самом конце:
-
-- он нужен как live inspection surface для runtime
-- через него проще проверять streaming, permissions и event ordering
-- он быстрее выявляет архитектурные проблемы, чем голые unit tests
-
-Но веб-чат ставится только после:
-
-- runtime core
-- event bus
-- persistence
-- basic execution path
-- API
-
-Иначе он станет красивой оболочкой над несуществующей системой.
diff --git a/MVP_CHECKLIST.md b/MVP_CHECKLIST.md
deleted file mode 100644
index cdcf84a..0000000
--- a/MVP_CHECKLIST.md
+++ /dev/null
@@ -1,83 +0,0 @@
-# MVP CHECKLIST
-
-Этот чеклист фиксирует минимальный рабочий объём для первого демонстрационного запуска `ducklm`.
-
-## 1. Core Runtime
-
-- [x] Есть модульная структура проекта `app/`, `config/`, `data/`, `tests/`
-- [x] Есть typed contracts для core entities
-- [x] Есть `Runtime Loop Controller`
-- [x] Runtime loop умеет принять задачу и создать task state
-- [x] Runtime loop публикует стартовые и финальные события
-
-## 2. Events and State
-
-- [x] Есть `EventBus`
-- [x] Есть `EventStore`
-- [x] События имеют `task_id + sequence`
-- [x] Есть `TaskStateStore`
-- [x] Есть `CheckpointStore`
-- [x] Есть сохранение checkpoint после critical transitions
-- [ ] Есть базовый resume path
-
-## 3. Decision and Execution
-
-- [x] Есть `ContextBuilder`
-- [x] Есть `Router` как pure decision layer
-- [x] Есть `ExecutionDirective`
-- [x] Есть `ExecutionEngine`
-- [x] Есть `ExecutionScheduler`
-- [ ] План валидируется и преобразуется в task graph
-
-## 4. Tools and Safety
-
-- [x] Есть `PermissionService`
-- [x] Есть persistent store для user approvals
-- [x] Есть `ToolSandbox`
-- [x] Есть `ToolRegistry`
-- [x] Работает `shell_exec`
-- [x] Работает `file_read`
-- [x] Работает `file_write`
-
-## 5. Models and Evaluation
-
-- [ ] Есть orchestrator adapter
-- [ ] Есть planning mode interface
-- [ ] Есть coder adapter
-- [ ] Есть critic adapter
-- [ ] Есть fallback policy при critic failure
-
-## 6. Memory
-
-- [ ] Есть SQLite metadata store
-- [ ] Есть vector index adapter
-- [ ] Работает memory insert/search
-- [ ] Есть `MemoryWritePolicy`
-- [ ] Запись в память не зависит только от critic score
-
-## 7. Interfaces
-
-- [x] Есть `POST /chat`
-- [ ] Есть `WS /stream`
-- [x] Есть `GET /health`
-- [x] Есть локальный веб-чат для ручной проверки runtime
-- [ ] Есть CLI для отправки задач и просмотра событий
-
-## 8. Reliability
-
-- [ ] Есть structured logging
-- [ ] Есть retry/recovery policy skeleton
-- [ ] Есть replay path from event store
-- [ ] Есть critical-path smoke tests
-
-## 9. Demo Definition
-
-MVP считается достигнутым, если:
-
-- [ ] можно открыть локальный веб-чат в браузере
-- [ ] можно отправить задачу
-- [ ] видно streaming events
-- [ ] видно планирование или direct action
-- [ ] видно выполнение tool step
-- [ ] опасная команда требует подтверждения
-- [ ] финальный ответ возвращается пользователю
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..ca06184
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,51 @@
+duck-up:
+	docker compose -f docker-compose.memory.yml up -d
+	@echo "Memory services started."
+	@echo "Start llama-server:"
+	@echo "bash scripts/llama/start_main.sh start"
+
+duck-llama-main:
+	bash scripts/llama/start_main.sh start
+
+duck-llama-stop:
+	bash scripts/llama/start_main.sh stop
+
+duck-llama-restart:
+	bash scripts/llama/start_main.sh restart
+
+duck-llama-status:
+	bash scripts/llama/start_main.sh status
+
+duck-llama-logs:
+	bash scripts/llama/start_main.sh logs --follow
+
+duck-llama-health:
+	bash scripts/llama/healthcheck.sh http://127.0.0.1:8081/v1
+
+duck-api:
+	python3 -m duck_core.api
+
+duck-dev:
+	docker compose -f docker-compose.memory.yml up -d
+	@echo "Start llama-server in another terminal:"
+	@echo "bash scripts/llama/start_main.sh start"
+	@echo "Then run:"
+	@echo "make duck-api"
+	@echo "Open:"
+	@echo "http://127.0.0.1:8000/"
+
+duck-open:
+	@echo "Open web UI:"
+	@echo "http://127.0.0.1:8000/"
+
+duck-smoke:
+	python3 -m pytest tests/smoke -v
+
+duck-test:
+	python3 -m pytest -v
+
+duck-verify:
+	bash scripts/verify/verify_basic_chat.sh
+	bash scripts/verify/verify_file_write_read.sh
+	bash scripts/verify/verify_tool_blocking.sh
+	bash scripts/verify/verify_models_roles.sh
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..2d0982f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,22 @@
+# DuckLM
+
+DuckLM is a local agent runtime over local language models. It exposes a WebChat and HTTP API, calls `llama-server` through an OpenAI-compatible API, and persists tasks/events in SQLite.
+
+## Quick Start
+
+```bash
+python3 -m venv .venv
+. .venv/bin/activate
+python -m pip install -e ".[dev]"
+cp .env.example .env
+bash scripts/llama/start_main.sh
+```
+
+In another terminal:
+
+```bash
+. .venv/bin/activate
+python -m duck_core.api
+```
+
+Open `http://127.0.0.1:8000/`.
diff --git a/TASK_3.md b/TASK_3.md
deleted file mode 100644
index 2f9461c..0000000
--- a/TASK_3.md
+++ /dev/null
@@ -1,1255 +0,0 @@
-Ты — senior AI systems engineer и principal backend architect.
-
-Твоя задача: спроектировать и реализовать полноценный локальный multi-model AI agent runtime.
-
-Это НЕ чат-бот.
-Это НЕ demo script.
-Это НЕ один большой файл с вызовами моделей и shell.
-
-Это автономная локальная система исполнения задач с:
-
-- central runtime loop
-- несколькими локальными GGUF-моделями с жёсткими ролями
-- tools
-- planning
-- critic loop
-- долговременной memory
-- permission gating
-- event bus
-- state persistence
-- streaming
-- конфигурируемым runtime
-
-Система должна быть расширяемой, тестируемой, отказоустойчивой и пригодной для дальнейшего развития.
-
-==================================================
-1. PRODUCT GOAL
-==================================================
-
-Построить локальный AI runtime, который:
-
-- принимает пользовательскую задачу
-- извлекает релевантную память
-- собирает контекст
-- принимает orchestration-решение
-- при необходимости строит план
-- исполняет шаги через tools и coder
-- оценивает результаты через critic
-- сохраняет полезные результаты в memory
-- публикует события исполнения
-- поддерживает streaming клиенту
-- требует подтверждения на опасные действия
-- умеет восстанавливаться после сбоя
-- полностью управляется через конфиги
-
-Система должна быть local-first.
-
-==================================================
-2. NON-GOALS
-==================================================
-
-На первом этапе НЕ нужно:
-
-- строить UI frontend
-- делать distributed execution
-- делать multi-user auth
-- делать Kubernetes deployment
-- делать сложный scheduler для множества параллельных задач
-- делать self-modifying runtime
-
-Telegram bot допускается только как thin stub.
-
-==================================================
-3. CENTRAL ARCHITECTURAL PRINCIPLE
-==================================================
-
-Центр системы — `Runtime Loop Controller`.
-
-Не router.
-Не отдельная LLM.
-Не execution engine.
-
-Именно runtime loop замыкает полный цикл:
-
-`task -> state load -> context build -> orchestrator -> plan/decision -> execute -> critic -> memory policy -> state checkpoint -> next step`
-
-Целевая форма архитектуры:
-
-```text
-Runtime Loop Controller
-  -> State Store / Checkpoints
-  -> Context Builder
-  -> Orchestrator / Planner
-  -> Router (policy + decision suggestion)
-  -> Execution Engine / Scheduler
-  -> Tools / Coder / Critic
-  -> Memory System
-  -> Event Bus / Event Store
-  -> Streaming Projection
-  -> back into Runtime Loop
-```
-
-Любой critical transition должен проходить через runtime loop.
-
-==================================================
-4. MODELS AND HARD ROLES
-==================================================
-
-Используй отдельные модели с жёстким разделением ответственности.
-
-4.1 Orchestrator / Planner
-
-Модель:
-- LLaMA-family GGUF
-
-Роль:
-- orchestration reasoning
-- decomposition of user task
-- decision whether planning is needed
-- plan generation in strict JSON format
-- next-step suggestion
-
-Ограничения:
-- не выполняет tools напрямую
-- не пишет итоговый код, кроме инструкций для coder
-- не оценивает финальную корректность результата
-
-4.2 Coder
-
-Модель:
-- X-CODER GGUF
-
-Роль:
-- generate_code
-- fix_code
-- refactor_code
-- generate helper scripts when explicitly requested by runtime
-
-Ограничения:
-- не принимает orchestration-решения
-- не строит execution plan
-- не вызывает tools напрямую
-
-4.3 Critic
-
-Модель:
-- Gemma-family GGUF
-
-Роль:
-- оценивает результаты tools
-- оценивает результаты coder
-- предлагает memory usefulness score
-- предлагает safety/usefulness judgment
-
-Ограничения:
-- не планирует
-- не исполняет действия
-- не принимает финальное решение о memory write
-
-4.4 Embeddings Engine
-
-Модель:
-- MiniLM или совместимая embeddings model
-
-Роль:
-- embeddings generation
-- semantic retrieval
-
-Ограничения:
-- не участвует в reasoning
-- не участвует в planning
-
-==================================================
-5. GLOBAL RULES
-==================================================
-
-Обязательные правила:
-
-- Все execution transitions проходят через runtime loop controller.
-- Все tool calls проходят через execution layer, permission layer и sandbox layer.
-- Все prompts и model settings вынесены в `config/`.
-- Все межмодульные контракты оформлены через типы/Pydantic models/dataclasses.
-- Все важные действия публикуются как события.
-- Task lifecycle не должен храниться только in-memory.
-- Система должна корректно деградировать при сбое отдельных подсистем.
-
-Hard decision rule:
-
-- Все decision-producing components должны возвращать только структурированные decision objects.
-- Ни один decision-producing component не должен напрямую исполнять tools.
-- Ни один decision-producing component не должен напрямую мутировать task state.
-- Ни один decision-producing component не должен неявно вызывать другие компоненты в обход runtime loop.
-
-Обязательная деградация:
-
-- если critic недоступен, runtime продолжает работу по fallback policy
-- если memory retrieval недоступен, задача выполняется без retrieval
-- если streaming недоступен, система возвращает sync response
-- если planner вернул невалидный план, runtime делает controlled replan или graceful fail
-
-==================================================
-6. IMPLEMENTATION ORDER
-==================================================
-
-Реализация должна идти итерациями в таком порядке:
-
-1. project skeleton
-2. typed contracts
-3. runtime loop skeleton
-4. event bus + event schema
-5. state persistence + checkpoints
-6. config loader
-7. context builder skeleton
-8. FastAPI skeleton
-9. router
-10. execution engine / scheduler
-11. permission system
-12. tool sandbox layer
-13. local tools
-14. coder integration
-15. critic integration
-16. memory system
-17. memory write policy engine
-18. streaming projection
-19. CLI
-20. optional Telegram stub
-
-После каждого шага ты обязан:
-
-- показать изменённые файлы
-- показать структуру директорий
-- кратко объяснить, что уже работает
-- явно указать, что ещё stub
-
-==================================================
-7. MVP BOUNDARY
-==================================================
-
-Первая рабочая версия обязана поддерживать end-to-end сценарий:
-
-- пользователь отправляет задачу
-- runtime loop создаёт task state
-- context builder собирает контекст
-- orchestrator решает direct action или planning
-- execution engine исполняет шаги
-- shell/file tools реально работают
-- опасная команда требует подтверждения
-- critic оценивает результат
-- memory policy принимает решение о записи
-- события пишутся в event store
-- task state чекпоинтится
-- клиент получает streaming или sync результат
-
-Минимальный набор tools для MVP:
-
-- `shell_exec`
-- `file_read`
-- `file_write`
-
-Второй приоритет:
-
-- `web_search`
-- `web_fetch`
-
-==================================================
-8. REQUIRED PROJECT STRUCTURE
-==================================================
-
-Ожидаемая структура:
-
-```text
-ducklm/
-  app/
-    api/
-    core/
-    runtime/
-    events/
-    state/
-    tools/
-    memory/
-    permissions/
-    streaming/
-    cli/
-    models/
-    services/
-  config/
-    models.json
-    prompts.json
-    permissions.json
-    runtime.json
-  data/
-    memory/
-    state/
-    events/
-    permissions/
-  tests/
-  main.py
-```
-
-Допускается разумная адаптация, но separation of concerns обязателен.
-
-==================================================
-9. REQUIRED DOMAIN CONTRACTS
-==================================================
-
-Сначала зафиксируй typed contracts.
-
-Минимально обязательны:
-
-9.1 `UserTask`
-
-```json
-{
-  "task_id": "uuid",
-  "session_id": "uuid",
-  "input": "string",
-  "context": {},
-  "created_at": "iso-datetime"
-}
-```
-
-9.2 `PlanStep`
-
-```json
-{
-  "id": "step-1",
-  "kind": "tool|coder|memory|respond",
-  "tool": "shell_exec",
-  "args": {},
-  "description": "human readable step description",
-  "requires_confirmation": false,
-  "depends_on": []
-}
-```
-
-Rules:
-
-- `kind` обязателен
-- `args` всегда объект
-- `depends_on` обязателен, даже если пустой
-- `tool` обязателен только для `kind=tool`
-
-9.3 `ToolCall`
-
-```json
-{
-  "tool": "shell_exec",
-  "args": {},
-  "task_id": "uuid",
-  "step_id": "step-1"
-}
-```
-
-9.4 `ToolResult`
-
-```json
-{
-  "tool": "shell_exec",
-  "ok": true,
-  "output": "stdout/stderr/parsed data",
-  "error": null,
-  "metadata": {
-    "exit_code": 0,
-    "duration_ms": 120
-  }
-}
-```
-
-9.5 `CoderRequest`
-
-```json
-{
-  "mode": "generate|fix|refactor",
-  "instruction": "string",
-  "context": {},
-  "task_id": "uuid"
-}
-```
-
-9.6 `CriticScore`
-
-```json
-{
-  "correctness": 0.0,
-  "usefulness": 0.0,
-  "safety": 0.0,
-  "memory_store": true,
-  "weight": 0.0,
-  "explanation": "string"
-}
-```
-
-Rules:
-
-- все numeric scores в диапазоне `0..1`
-- `weight` используется как сигнал, а не как безусловная команда записи
-
-9.7 `MemoryEntry`
-
-```json
-{
-  "id": "uuid",
-  "text": "string",
-  "kind": "tool_result|plan|critique|fact|summary|user_preference",
-  "source": "tool|critic|user|system",
-  "weight": 0.85,
-  "task_id": "uuid",
-  "session_id": "uuid",
-  "metadata": {},
-  "created_at": "iso-datetime",
-  "embedding_model": "string",
-  "embedding_dim": 384
-}
-```
-
-9.8 `PermissionDecision`
-
-```json
-{
-  "action_type": "shell_command",
-  "pattern": "rm",
-  "decision": "allow_once|allow_always|deny|ask_always",
-  "created_at": "iso-datetime"
-}
-```
-
-9.9 `RuntimeEvent`
-
-```json
-{
-  "event_id": "uuid",
-  "task_id": "uuid",
-  "session_id": "uuid",
-  "sequence": 42,
-  "type": "task_received",
-  "timestamp": "iso-datetime",
-  "payload": {},
-  "causation_id": "uuid|null",
-  "correlation_id": "uuid"
-}
-```
-
-9.10 `TaskCheckpoint`
-
-```json
-{
-  "task_id": "uuid",
-  "status": "executing_step",
-  "active_step_id": "step-2",
-  "plan_snapshot": {},
-  "context_snapshot": {},
-  "updated_at": "iso-datetime"
-}
-```
-
-9.11 `ExecutionDirective`
-
-```json
-{
-  "type": "plan|tool|coder|respond|replan|store_memory|request_permission|complete|fail|noop",
-  "payload": {},
-  "requires_permission": false,
-  "confidence": 0.0,
-  "reason": "string"
-}
-```
-
-Rules:
-
-- все decision-producing components должны возвращать либо `ExecutionDirective`, либо коллекцию совместимых директив
-- `confidence` находится в диапазоне `0..1`
-- `payload` всегда объект
-- директива описывает намерение, а не исполняет действие сама
-
-==================================================
-10. RUNTIME LOOP CONTROLLER
-==================================================
-
-Создай:
-
-- `app/runtime/runtime_loop.py`
-- `app/runtime/runtime_controller.py`
-
-`Runtime Loop Controller` — heart of system.
-
-Он обязан:
-
-- принять task
-- загрузить или создать task state
-- опубликовать стартовые события
-- инициировать context assembly
-- вызвать orchestrator
-- определить `direct action / planning / replan / fail`
-- передать исполнение в execution engine
-- принять результаты tools/coder
-- вызвать critic
-- передать результат в memory write policy engine
-- сохранить checkpoint
-- опубликовать события
-- решить `continue / replan / complete / fail`
-
-Runtime loop не должен:
-
-- собирать prompts inline вручную
-- содержать raw tool logic
-- подменять собой router
-- подменять собой execution engine
-- принимать policy-level решения вместо других компонентов
-
-Runtime loop обязан:
-
-- применять уже возвращённые decision objects
-- переводить систему между состояниями
-- координировать вызовы между компонентами
-
-Runtime loop не должен содержать скрытую бизнес-логику policy-уровня.
-
-==================================================
-11. CONTEXT BUILDER
-==================================================
-
-Создай:
-
-- `app/core/context_builder.py`
-
-Context builder обязан собирать:
-
-- user input
-- session context
-- retrieved memory
-- current task state
-- current plan or active step
-- recent tool results
-- permission state
-- runtime constraints and safety limits
-
-Rules:
-
-- любой вызов orchestrator/planner идёт только через context builder
-- context builder должен быть token-budget aware
-- low-priority context должен отбрасываться при переполнении
-- prompt assembly не должна дублироваться по проекту
-
-Минимальный результат:
-
-```json
-{
-  "system_prompt": "string",
-  "task_summary": "string",
-  "memory_context": [],
-  "execution_context": {},
-  "tool_context": [],
-  "safety_context": {},
-  "constraints": {}
-}
-```
-
-==================================================
-12. ORCHESTRATION, PLANNING, ROUTER
-==================================================
-
-Planning — это режим orchestration model, а не отдельная модель.
-
-Router должен быть только:
-
-- policy evaluator
-- decision suggester
-
-Создай:
-
-- `app/core/router.py`
-
-Router обязан определять:
-
-- нужен ли retrieval
-- нужен ли planning
-- direct step vs multi-step flow
-- когда нужен coder
-- когда нужен critic
-- когда нужен replan
-- когда требуется permission gate
-
-Rules:
-
-- router должен быть pure function по контракту
-- router принимает input state + assembled context
-- router возвращает только structured decision object
-- router не имеет side effects
-- router не мутирует state
-- router не вызывает tools
-- router не управляет execution lifecycle
-- router не владеет task lifecycle
-- router не исполняет шаги
-- runtime loop применяет router decisions
-
-Planner rules:
-
-- planner mode возвращает только строгий JSON
-- невалидный план не исполняется
-- runtime делает bounded retry или graceful fail
-
-==================================================
-13. TASK GRAPH MODEL
-==================================================
-
-План не должен жить только как список шагов.
-
-Даже если MVP исполняет шаги последовательно, внутренняя модель должна быть graph-compatible.
-
-Используй внутреннюю task graph representation:
-
-```json
-{
-  "nodes": [
-    {
-      "id": "step-1",
-      "kind": "tool",
-      "tool": "shell_exec",
-      "args": {"command": "hostnamectl"},
-      "depends_on": []
-    },
-    {
-      "id": "step-2",
-      "kind": "respond",
-      "depends_on": ["step-1"]
-    }
-  ]
-}
-```
-
-Rules:
-
-- scheduler валидирует отсутствие циклов
-- planner может возвращать `PlanStep[]` как transport format
-- после валидации план преобразуется во внутренний task graph
-- MVP может использовать sequential DAG scheduler
-
-==================================================
-14. EXECUTION ENGINE AND SCHEDULER
-==================================================
-
-Создай:
-
-- `app/core/execution_engine.py`
-- `app/core/execution_scheduler.py`
-
-Execution engine работает под управлением runtime loop.
-
-Execution engine обязан:
-
-- принимать валидированный task graph
-- поддерживать execution cursor
-- выбирать следующий исполнимый шаг
-- учитывать зависимости шагов
-- вызывать tools/coder через adapters
-- возвращать структурированные результаты в runtime loop
-- публиковать execution events
-
-Минимальные состояния:
-
-- `received`
-- `retrieving_memory`
-- `orchestrating`
-- `planning`
-- `awaiting_permission`
-- `executing_step`
-- `critic_evaluating`
-- `storing_memory`
-- `completed`
-- `failed`
-
-Execution engine не должен заменять runtime loop.
-
-==================================================
-15. EVENT BUS, EVENT STORE, REPLAY
-==================================================
-
-Streaming events недостаточно.
-Нужен внутренний event backbone.
-
-Создай:
-
-- `app/events/event_bus.py`
-- `app/events/event_types.py`
-- `app/events/event_store.py`
-
-EventBus обязан:
-
-- принимать runtime domain events
-- гарантировать ordering per task
-- выдавать monotonic sequence number per task
-- публиковать события подписчикам
-- писать события в durable store
-- поддерживать projection в streaming layer
-
-Delivery guarantees:
-
-- ordering guarantee per task обязателен
-- delivery model минимально `at least once`
-- consumer-side idempotency обязательна
-- deduplication key: `task_id + sequence`
-- replay не должен ломать состояние при повторном применении уже известных событий
-
-Минимальные event types:
-
-- `task_received`
-- `context_built`
-- `llm_called`
-- `llm_result_received`
-- `plan_created`
-- `step_started`
-- `tool_called`
-- `tool_completed`
-- `coder_called`
-- `coder_completed`
-- `critic_called`
-- `critic_completed`
-- `memory_write_suggested`
-- `memory_write_decided`
-- `memory_written`
-- `permission_requested`
-- `permission_resolved`
-- `checkpoint_saved`
-- `task_completed`
-- `task_failed`
-
-Event sourcing baseline:
-
-- каждое значимое действие должно порождать событие
-- execution history должна быть воспроизводимой
-- должна быть replay capability step-by-step
-
-Каждое событие должно быть idempotent и deduplicatable по:
-
-- `task_id + sequence`
-
-Streaming transport не является source of truth.
-
-==================================================
-16. STATE PERSISTENCE AND CHECKPOINTING
-==================================================
-
-In-memory only state запрещён для autonomous mode.
-
-Создай:
-
-- `app/state/task_state_store.py`
-- `app/state/checkpoint_store.py`
-
-Используй:
-
-- SQLite как минимум для MVP
-
-State persistence layer обязан поддерживать:
-
-- task creation
-- current task status
-- active step
-- current plan/task graph snapshot
-- latest context summary
-- latest safe checkpoint
-- resume after restart/crash
-
-Обязательные правила:
-
-- checkpoint после critical transitions
-- periodic checkpointing
-- resume from last valid checkpoint
-
-==================================================
-17. ASYNC EXECUTION ISOLATION
-==================================================
-
-Нужна явная изоляция между LLM loop и tool execution.
-
-Обязательные требования:
-
-- долгие tool operations не должны блокировать runtime loop
-- блокирующие операции должны идти через async adapter / isolated runner
-- streaming и event publishing должны продолжаться во время исполнения tool
-
-Минимум:
-
-- async tool runner
-- timeout wrapper
-- cancellation handling
-- bounded concurrency policy
-
-==================================================
-18. TOOL SANDBOX LAYER
-==================================================
-
-Помимо permission checks нужен sandbox layer.
-
-Особенно для:
-
-- `shell_exec`
-- `web_fetch` with browser fallback
-- generated helper scripts
-
-Минимальные требования:
-
-- execution context isolation
-- resource caps
-- timeout enforcement
-- working directory restrictions
-- optional environment variable allowlist
-
-Для shell нужно предусмотреть:
-
-- CPU / wall time limits
-- path restrictions where possible
-- запрет неявного escalation
-
-==================================================
-19. TOOLS SYSTEM
-==================================================
-
-Нужен tool registry и единый tool interface.
-
-Обязательные tools для MVP:
-
-- `shell_exec`
-- `file_read`
-- `file_write`
-
-Второй этап:
-
-- `web_search`
-- `web_fetch`
-
-Требования:
-
-- единый base tool interface
-- единый `ToolResult`
-- централизованный logging
-- timeout/error isolation
-- tool execution только через tool layer
-
-==================================================
-20. TOOL SAFETY AND PERMISSIONS
-==================================================
-
-Перед потенциально опасным действием система обязана проверить policy.
-
-Источники policy:
-
-- `config/permissions.json`
-- persistent store пользовательских решений
-
-Поддерживаемые режимы:
-
-- `allow_once`
-- `allow_always`
-- `deny`
-- `ask_always`
-
-Минимум опасных shell patterns:
-
-- `rm`
-- `mv` в sensitive paths
-- `chmod`
-- `chown`
-- package managers
-- `curl | bash`
-- `sudo`
-- `shutdown`
-- `reboot`
-
-Rules:
-
-- опасная команда не исполняется до решения пользователя
-- решения пользователя сохраняются
-- execution layer получает уже разрешённое или отклонённое действие
-
-==================================================
-21. MEMORY SYSTEM
-==================================================
-
-JSON file не использовать как primary memory store.
-
-Используй:
-
-- SQLite как primary metadata store
-- FAISS или hnswlib как vector index
-
-Memory обязана поддерживать:
-
-- insert
-- semantic search
-- delete
-- update weight
-- filtering by kind/session/task/source
-- embedding versioning
-- reindex
-
-Минимальные таблицы или эквивалент:
-
-`memory_items`
-- id
-- text
-- kind
-- source
-- weight
-- task_id
-- session_id
-- metadata_json
-- created_at
-- updated_at
-
-`embeddings_index_map`
-- memory_id
-- embedding_model
-- embedding_dim
-- vector_slot
-- created_at
-
-Rules:
-
-- retrieval учитывает semantic score и memory weight
-- low-value memories не должны загрязнять context
-- смена embedding model требует reindex path
-
-==================================================
-22. MEMORY WRITE POLICY ENGINE
-==================================================
-
-Critic только предлагает.
-Memory write policy engine решает.
-
-Создай:
-
-- `app/memory/write_policy.py`
-
-Policy engine должен учитывать:
-
-- critic score
-- thresholds из config
-- kind/source memory candidate
-- deduplication signals
-- session/task scope
-- safety constraints
-- runtime weight modifiers
-
-Решения policy engine:
-
-- `store`
-- `store_with_weight`
-- `skip`
-- `merge_with_existing`
-
-Policy engine должен быть детерминированной функцией.
-
-Минимальная форма:
-
-`(critic_score + memory_type + runtime_weight + dedup_state + safety_state) -> decision`
-
-Нельзя ограничиваться примитивным правилом вида:
-
-- `if score > 0.7 then store`
-
-Нужно зафиксировать:
-
-- threshold model
-- scoring formula or weighted rule set
-- conflict resolution for near-duplicate memories
-- merge policy for same-fact updates
-
-==================================================
-23. CRITIC LOOP
-==================================================
-
-Critic получает:
-
-- tool result
-- coder output
-- optional execution context
-
-Возвращает:
-
-```json
-{
-  "correctness": 0.91,
-  "usefulness": 0.77,
-  "safety": 1.0,
-  "memory_store": true,
-  "weight": 0.84,
-  "explanation": "Result is correct and safe, useful for future similar tasks"
-}
-```
-
-Critic должен вызываться:
-
-- после tool execution
-- после coder output
-- перед memory write suggestion
-
-Critic failure не должен ломать execution path.
-Critic возвращает suggestion, а не final write decision.
-
-==================================================
-24. RETRY AND RECOVERY POLICY
-==================================================
-
-Нужна явная retry/recovery стратегия.
-
-Обязательные политики:
-
-Planning retry:
-
-- ограниченное число replan attempts
-- каждый retry логируется как событие
-
-Tool retry:
-
-- только для idempotent operations или явно разрешённых tools
-- policy зависит от типа ошибки
-
-Partial failure recovery:
-
-- `fail task`
-- `retry step`
-- `skip step`
-- `replan`
-
-Critic recovery:
-
-- critic failure переводится в fallback policy
-
-Минимальные поля в `config/runtime.json`:
-
-- `planner_retry_limit`
-- `tool_retry_limit`
-- `replan_limit`
-- `step_timeout_ms`
-- `task_timeout_ms`
-- `allow_recovery_replan`
-- `checkpoint_policy`
-- `event_retention_policy`
-
-==================================================
-25. STREAMING SYSTEM
-==================================================
-
-Требуется FastAPI WebSocket streaming.
-
-Но streaming должен быть projection from event bus, а не отдельным источником правды.
-
-Минимальные внешние события:
-
-```json
-{ "type": "status", "data": "planning" }
-{ "type": "token", "data": "..." }
-{ "type": "plan", "data": [...] }
-{ "type": "tool_start", "tool": "shell_exec", "step_id": "step-1" }
-{ "type": "tool_result", "tool": "shell_exec", "data": {...} }
-{ "type": "critic", "data": {...} }
-{ "type": "permission_required", "data": {...} }
-{ "type": "final", "data": {...} }
-```
-
-==================================================
-26. CONFIG SYSTEM
-==================================================
-
-Всё должно жить в `config/`.
-
-Обязательные файлы:
-
-`config/models.json`
-- model paths
-- model roles
-- inference params
-- context sizes
-
-`config/prompts.json`
-- orchestration prompt
-- planning prompt
-- coder prompt
-- critic prompt
-
-`config/permissions.json`
-- dangerous command policies
-- sensitive paths
-- default approval behavior
-
-`config/runtime.json`
-- timeouts
-- streaming settings
-- critic fallback policy
-- memory thresholds
-- retrieval top_k
-- replan limits
-- max execution steps
-- checkpoint policy
-- event retention policy
-
-Hard rule:
-
-- никаких хардкодов prompts и critical thresholds в коде
-
-==================================================
-27. API SERVER
-==================================================
-
-Сделай FastAPI backend.
-
-Минимальные endpoints:
-
-- `POST /chat`
-- `WS /stream`
-- `POST /tool/execute`
-- `GET /memory/search`
-- `DELETE /memory/item/{id}`
-- `GET /health`
-
-Требования:
-
-- Pydantic request/response models
-- единый error handling
-- dependency injection where разумно
-
-==================================================
-28. CODER MODULE
-==================================================
-
-Создай:
-
-- `app/core/coder.py`
-
-Минимальный интерфейс:
-
-- `generate_code()`
-- `fix_code()`
-- `refactor_code()`
-
-Используется только coder model.
-
-==================================================
-29. CLI
-==================================================
-
-Добавить CLI для локального использования.
-
-Минимум:
-
-- отправить задачу
-- получить sync result
-- показать streaming mode
-- выполнить memory search
-
-==================================================
-30. TELEGRAM BOT
-==================================================
-
-Только optional stub.
-
-Если реализуешь:
-
-- не связывай core runtime с Telegram-specific code
-- делай только thin adapter layer
-
-==================================================
-31. RELIABILITY AND TESTING
-==================================================
-
-Обязательные инженерные требования:
-
-- structured logging
-- typed exceptions
-- timeout handling
-- graceful failures
-- no silent pass
-- no giant mixed-responsibility files
-
-Минимальные тесты:
-
-- runtime loop transitions
-- event ordering
-- checkpoint save/load
-- replay path
-- plan validation
-- permission policy checks
-- tool registry
-- shell safety path
-- memory insert/search
-- memory write policy
-- router basic flow
-
-==================================================
-32. FORBIDDEN SHORTCUTS
-==================================================
-
-Запрещено:
-
-- single-model architecture
-- hardcoded prompts in code
-- bypassing runtime loop
-- bypassing router for policy decisions
-- tool execution outside tool layer
-- dangerous command execution without permission check
-- JSON file as primary memory store
-- in-memory-only task lifecycle for autonomous mode
-- direct streaming transport as substitute for event bus
-- critic-only memory write decision path
-- accepting invalid planner JSON as-is
-- giant monolithic runtime file
-
-==================================================
-33. DEFINITION OF DONE
-==================================================
-
-Работа считается выполненной, если:
-
-1. Есть модульная структура проекта.
-2. Есть typed contracts для core entities.
-3. Есть Runtime Loop Controller как центральный control loop.
-4. Есть Context Builder.
-5. Есть Router как policy evaluator / decision suggester.
-6. Есть Execution Engine / Scheduler.
-7. Есть EventBus + EventStore + replay-capable history.
-8. Есть state persistence + checkpointing + resume.
-9. Есть permission-gated tools.
-10. Есть tool sandbox layer.
-11. Есть coder integration.
-12. Есть critic integration.
-13. Есть memory на SQLite + vector index.
-14. Есть memory write policy engine.
-15. Есть FastAPI API.
-16. Есть streaming как projection от event bus.
-17. Есть CLI.
-18. Есть базовые тесты critical path.
-
-==================================================
-34. REQUIRED DELIVERY STYLE
-==================================================
-
-Работай итеративно.
-
-После каждого шага:
-
-- показывай код
-- показывай структуру файлов
-- кратко объясняй решение
-- явно отмечай допущения
-- прямо помечай stubs
-
-Не перескакивай к финальному “всё готово”, если каркас ещё не выстроен.
-
-Начни с:
-
-1. project structure
-2. typed contracts
-3. runtime loop skeleton
-4. event bus skeleton
-5. state persistence skeleton
-6. config loader
-7. context builder skeleton
-8. FastAPI skeleton
-9. router
-10. execution engine / scheduler
-
-Сначала построй правильный каркас.
-Потом наполняй его логикой.
-
-КОНЕЦ ЗАДАНИЯ.
diff --git a/app/__init__.py b/app/__init__.py
deleted file mode 100644
index 9b29354..0000000
--- a/app/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""ducklm application package."""
-
diff --git a/app/api/__init__.py b/app/api/__init__.py
deleted file mode 100644
index 92d0da2..0000000
--- a/app/api/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""API layer."""
-
diff --git a/app/api/server.py b/app/api/server.py
deleted file mode 100644
index fbf9fff..0000000
--- a/app/api/server.py
+++ /dev/null
@@ -1,170 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-from contextlib import asynccontextmanager
-from pathlib import Path
-
-from fastapi import FastAPI, WebSocket, WebSocketDisconnect
-from fastapi.responses import FileResponse
-from pydantic import BaseModel
-
-
-class CriticFeedbackRequest(BaseModel):
-    feedback: str
-    task_id: str | None = None
-    session_id: str | None = None
-    feedback_type: str | None = None
-    severity: str | None = None
-    correction: str | None = None
-    remember: bool = True
-    retry: bool = False
-    assistant_answer: str | None = None
-    correctness_override: float | None = None
-    usefulness_override: float | None = None
-    safety_override: float | None = None
-
-from app.core.permission_resolution import PermissionResolutionRequest, SecretResolutionRequest, PasswordResolutionRequest, ReviewResolutionRequest
-from app.core.contracts import UserTask
-from app.runtime.runtime_controller import RuntimeController
-from app.streaming.manager import StreamingManager
-
-
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    """Load models on startup."""
-    print("Lifespan: Starting model loading...")
-    try:
-        print("Lifespan: Loading models...")
-        runtime.load_models_at_startup()
-        print("Lifespan: Models loaded")
-
-        # Rebuild vector index if empty but memory store has data.
-        if runtime._memory_interface:
-            store_count = runtime._memory_interface.count()
-            if store_count > 0:
-                idx_count = runtime._memory_interface._vector_index.element_count
-                if idx_count == 0:
-                    print(f"Lifespan: Rebuilding vector index ({store_count} entries)...")
-                    runtime._memory_interface.reindex()
-                    print("Lifespan: Vector index rebuilt")
-    except Exception as e:
-        print(f"Lifespan: Failed to load models: {e}")
-        import traceback
-        traceback.print_exc()
-    
-    yield  # Server runs here
-    
-    print("Lifespan: Shutting down...")
-
-
-app = FastAPI(title="ducklm", lifespan=lifespan)
-runtime = RuntimeController(base_dir=Path(__file__).resolve().parents[2])
-streaming = StreamingManager(runtime.event_bus)
-
-
-@app.get("/")
-def index() -> FileResponse:
-    return FileResponse(Path(__file__).resolve().parent / "static" / "index.html")
-
-
-@app.get("/health")
-def health() -> dict[str, str]:
-    return {"status": "ok"}
-
-
-@app.get("/events")
-def list_events(limit: int = 500) -> dict[str, object]:
-    safe_limit = max(1, min(limit, 2000))
-    return {
-        "events": [
-            event.model_dump(mode="json")
-            for event in runtime.event_bus.list_recent(limit=safe_limit)
-        ]
-    }
-
-
-@app.post("/chat")
-def chat(task: UserTask) -> dict[str, object]:
-    submit = getattr(runtime, "submit_task", None)
-    if callable(submit):
-        return submit(task)
-    return runtime.handle_task(task)
-
-
-@app.post("/permissions/resolve")
-def resolve_permission(request: PermissionResolutionRequest) -> dict[str, object]:
-    submit = getattr(runtime, "submit_permission_resolution", None)
-    if callable(submit):
-        return submit(task_id=request.task_id, decision=request.decision)
-    return runtime.resolve_permission(task_id=request.task_id, decision=request.decision)
-
-
-@app.post("/secrets/resolve")
-def resolve_secret(request: SecretResolutionRequest) -> dict[str, object]:
-    submit = getattr(runtime, "submit_secret_resolution", None)
-    if callable(submit):
-        return submit(task_id=request.task_id, secret=request.secret)
-    return runtime.resolve_secret(task_id=request.task_id, secret=request.secret)
-
-
-@app.post("/password/resolve")
-def resolve_password(request: PasswordResolutionRequest) -> dict[str, object]:
-    submit = getattr(runtime, "submit_password_resolution", None)
-    if callable(submit):
-        return submit(task_id=request.task_id, password=request.password)
-    return runtime.resolve_password(task_id=request.task_id, password=request.password)
-
-
-@app.post("/review/resolve")
-def resolve_review(request: ReviewResolutionRequest) -> dict[str, object]:
-    submit = getattr(runtime, "submit_review_resolution", None)
-    if callable(submit):
-        return submit(task_id=request.task_id, decision=request.decision, correction=request.correction)
-    return runtime.resolve_review(task_id=request.task_id, decision=request.decision, correction=request.correction)
-
-
-@app.post("/critic/feedback")
-def critic_feedback(request: CriticFeedbackRequest) -> dict[str, object]:
-    feedback = runtime.handle_critic_feedback(
-        feedback=request.feedback,
-        task_id=request.task_id,
-        session_id=request.session_id,
-        feedback_type=request.feedback_type,
-        severity=request.severity,
-        correction=request.correction,
-        remember=request.remember,
-        retry=request.retry,
-        assistant_answer=request.assistant_answer,
-        correctness_override=request.correctness_override,
-        usefulness_override=request.usefulness_override,
-        safety_override=request.safety_override,
-    )
-    return feedback
-
-
-@app.websocket("/stream/{task_id}")
-async def stream_task(websocket: WebSocket, task_id: str) -> None:
-    await websocket.accept()
-    replayed_events = streaming.replay_events(task_id)
-    for event in replayed_events:
-        await websocket.send_json(event.model_dump(mode="json"))
-    if replayed_events and replayed_events[-1].type in {"task_completed", "task_failed"}:
-        await websocket.close()
-        return
-
-    queue = streaming.subscribe(task_id)
-    try:
-        while True:
-            try:
-                event = await asyncio.wait_for(queue.get(), timeout=30)
-            except asyncio.TimeoutError:
-                await websocket.send_json({"type": "heartbeat", "task_id": task_id})
-                continue
-            await websocket.send_json(event.model_dump(mode="json"))
-            if event.type in {"task_completed", "task_failed", "task_awaiting_permission", "task_awaiting_input", "task_awaiting_review"}:
-                break
-    except WebSocketDisconnect:
-        pass
-    finally:
-        streaming.unsubscribe(task_id, queue)
-        await websocket.close()
diff --git a/app/api/static/favicon.ico b/app/api/static/favicon.ico
deleted file mode 100644
index 1566b590fcf80dce46fcbaf095889da5164c7f18..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 16958
zcmdU130##`+P|eNm%A_A3n*|!aRbqC-@z2M%)Qn5T1Q(@L0nrDP&2ihn);jyWwB|Q
zZ@#p?_MCFmQd2NRK~ULLb`&u)waowbe_p(fgIpF-r|9>1?t9+zp7Z>l{hasxrBdC(
z{~{t({MVuCJWQpssZ^@YBzCD1NYd}>&tNd9ZZJ7G|GlK=N$W|wNhPE+q>GnGeoLR+
zlQA+@o{?v7G=Dc@{z&q@inNb}7Lv>%bIF`Hg4gRcrw{p?N7_SbDP~vC%iOYttaZKk
zZs|O2$&1))CCL@v*SlZVk~L-Rmh#_{=VQ5kA?bSKYwCQmw(!sr4A=7PV6OS`e-L+F
z`3Nt<)3sn}?laL`r}Q_3-6;#<NqB26o~Dl1aqgABLEKLK&Q^F7UYm-mDdYS(-<sb6
zcBc%4SK+xS*cuxn^Q8TbusdZSJhxN^Vn@Fxy*sfxTj9MWzVLhScPCa?tzc-(#_PK;
zvDDR^zqkJ8=7qt;{#(p(*D80xBADvabA5eHuWj@um<&de4tm;)#$bY%-U_YW!afUp
zy{+)I+hDib;cfR($i~)(*U~R-LnO~R$!4>`WHMc=3<OhsSnKvlJ=w%HOwfCI!`Ebn
zRp$nS*#tLZ04$Dvc=V|y*!bzU$j{t^{H#pmWM^aFfrHqSm5crSeJFcB^0PB>Fe?kW
zS=lb>0LSL!<RF`EW@aX~Z{Lozv@~?+&;e#M*R}b;!1c{mZ_51N`8FB!b#XLSE6aYO
zu*vxKMp%qin7p*`wc6mO_QQbD(RgIuTUhqm=g7`Ii4&z2q+%SeC_#DIQB+hMMP>O>
zoG3qrYEo5MF{#8QmGjxk@={b)l%bO2N{f%;cx5?`6&2v6l`CK{8eueB;OV7-#bT+8
zx9+uIYiQ3Td#bw6G<a<?YGI@gczXDve<ZQTZNSuLzQBq#d+_D9QXDHjMZPOhU2zOQ
zROO?lA{Qqs4&g*u0V+$2P|*lgmL0)y%1x11MKLN$3Q<{7r1Vu+mSW?^FBDraQVx3k
zRr;=vcfnZK=H*)U)RoP`Z`8qL_Eh{mxa$Nw^yCKI_rzD2_FOs=-#myvznhJ3(o0Zu
ztcH9Of5kzZJ)VOfs}52Q1+)obt0Wc|bfTgN)n$dG0)<YN7xK9xg-(<f;AD9bs%S$y
zzTJY3!R=wQO3Z0g27<A^^8-0os83$5`-op>bcfp14tL)B3Z_JF#e~PU<Kg+=<GI%h
zkeG4+tKZ4MmR-d-R&o+2D@$;?G9M?)a&dzAPgGK`6@|6G<+>gcIh`u6pK91&U4BHd
zjlaE}hW3sCn2iP{UMg4^7hGTxtaWW*kBhOc=3u`bMvD>tordENF(2d3$F^eJ)E#&r
zb{n4gOCI7=^YCKoe!RLa1D|cq!@<H6D5Wn{(VtITvgH%hX+tDBZ-~SmDogUP`sEc!
zT=X;o+WJAQRzoK_$zpZIyI{Q%znrbP8ryKc(T=D|Ph-;bFL2kBJ2CF@Z*kA`FEQuk
zJ%~>^j2DvguwrcvUP|4Iw2uy8$G!?2DXBquNh$5%7^$d1I>Gy{sG9f027atMg0DaS
z08zsRWAcQt@bU41-e`hG*Pt(4d7ilHn(#CJ>vaAYdCyXei~a|qrftKxC%544=#LS-
zWGmv==3+@w9ui4QDT`&P`|#>J>G*V0HuCe!P*qZf>Qd%`(xaE@B!6E?68oxLklZ^(
z|EXbo$k_HZA_nxvgmGgqbjT3qLOpGu!CFBuJ8j_F&S|CgF?Ym0kGzVBPkn{aPh?;e
z{eS%9n{a>3msq$u2aA()uryUD8_%a?VR_nqtVqkmi|h8{e>WB5U|t!@N~@S7NTp?r
z>m@i|N}nz+V7z2}aOz)TCG%z_<yBcCIkS|%OF3SsnEL+&1H!wbZ}(7)jv4_QYbzb=
z5vj>s=vFWbeus<uuAaNxuhTQW*t+1}r{2W)8DC%|ZGTMkF51F3xGVZAJQSafMQaaZ
zQBp1vlXDeWEKbSB(lpuyW8;#wd-3{vJMht_9PG*}N8!;^C_Yw2%oV7i9h@pH#OdS3
zjQ7M|l8;lBNALq<W({LP)v-g=J#z=+U(UV^OnvA94D1n#@U9&(ux~F!45bY)7B+xi
za0qsJ&u|%2bAD^7f3<fPjGOi{CePY{yQY7N2{SihV$9dLBYG2}qBmpiYlr9uMWg~O
zT0>t*&8JLq5TAU2Z5EciC34w|7t_-5)`$DBWk(_M^NypcxEhs5N>Fi(^-nQ<gz>V9
zGEnkC#bHz)%|urEMyz>#1tJIbz~EjX7;sxx>OTa1ZtIRwBcl`>XatY*z05V?7yr_j
zgRtt24fyb%1xQ^_`~L6Ecw+trOpN{r<7RBaL(gnM{F)*>Cqy|s{pMjTWGskJJ&1&~
zLwK4pS(H+M#VNU1N#A(ot=;%b+IIZU2Yaz|cLnxlmElP7X{7JY!6%=5&YEvICQllJ
zp0{;ESf~TNdv-*x@J{Fv5{&TBE~F6j?B1RIJ<+>&uf}jmEwsK78M&OpM7yzZpY#6T
zKf({Ef5DGue#K8e{)(FFi}+^yalG)#PCPd61I$^mjs9PNg=_QhY;rypGA}-pl#K*p
zPE0<G#q^QH<U?4Jd=N_&%4Mv`#S8S6SKi9NYj3CH-OqBc`mg`QW3!fF<itl2K6o^O
zLc`IyTNpZrh9RU|PlSY!LVF^tTR6hQ!!cs`?Jjx0gjulfzE=EZ=Klb#7viF$asJ#-
zICuUBTsVIcznngWpMSW3pMC%i6#R%kt=WuaNxNw0Sx8)$M_b6nBH~P7Y+TMb@B(Qm
z<AcazA^qeT+RB18NAYyh5hO5&ER|T1vJcDB_TUBj$tucY<+|;7Y28*VeeHeBPDsMk
z*p+y2T0EvjCm?QaB4T3ZDErDrunG2(M#i|5q12i_dNsN+=I+edi>jY%aOPq)&YUa5
zFO<dkGv`rLa~9R7FW{3cd9?3z#(_*Mrrj@2qYtcMUPvKP4oejLA`|wrW>~bAc0x*|
z-7Fy`rsVM+<(5Pl$}{U|FKIcHMGjV^GM}VmVpU2y-gsv_maI%h+rW<8D>s<`1(#r#
zcdsrnHg?Z!F`<>oh>(%DW8%}%c=eNH9Inp6=?hgj^K&&WoIQ?nKc7Vf?ct@Ae_+w;
z^o4af)GKwGD*DdDLgI}l?#0BnjJjT)nuV3D8-+wJiy1Ex)*eJ6$1X`N#KJcZ;n_F%
zY*Ieud4&B(=tKEf@<t{epO*@qH3Vv%1x8b&xSa2x>&35}XBhR+Qa(o6>oVSZ>&)=h
z){>dO-MNm3(E>L=8$#|Ijo=yM&~etC7`OBh{4M=MRGmA4GZ%j19_R=L3>*qa$Nrf9
zY!Y64=X=Db?IY$w)<=cpzevHim@<h^&SrdMd>}1kY*?7g+`$-`m{v%?IgIC$a%%mD
zW1puTJ->$eBk5a=7#j_Z-T|G~1_OC*On1EReE;6~SmHNaCeF?I^{lZq%&`{P1DoAp
z@8>{=r*4PuoJe$<KMdiqLvYuE|HAvbKEqcTo8ai+53O1cvv)8$_a1|orSIZ}wHeHP
zjQj5q2kWC{>keY!nrvb`K)eNvkwvtZqa^B?F>={Dj!UKOB=5&d@6z_y?Zni1s}T^~
z2YQnq%+!_e(}aDXU>5u??cd0`Y&s*X#I56RHL<$Wri465p1fA`+{<dzz()L@{_be^
zhdyx3j)Y%Ke+0!1K)X2+=r$_~Q85#tb?~f$K55jNp!KxEOWOurdX2<`GndkLH)2)l
zH(0**JMJ0wP^a1SYwjILi(lW5=iki4pZ}VJKc#$+<!@}mD=FXM?njp)C@cc*RtMa4
zq7N-BCbdE)la_mcM*ZLU-9^3nH}O2lm-C7Y1iyz#2Y0g;YLfoa{O)qrM}II=M(!5g
z>(?HE(W7CSJ`|=|gQ1D*1^;<N(S7<z`1TKm+DdsbX4qJt+d044<_$Nk0Xmxl;R8lv
z)THT{@mwlaCT+#bsp(kp<~FQ;I|HkdHe>$cw=rgNEP4+ZOS=n(m%#xKGxvBlEA3v*
z{l6Ag!B2e{8Jioa{{^$eKbQGWeBMeQ7i=O+jo>!(oQd}3$MY7uMh~^d1hdY{JYeB@
zoDr@4^=LO`5Q1j02A@qC%<T_9+Cb--qY*g1KW?#UDM#vsX9rgDXr>&*&UExAIX89F
z*`V?wdHO&_U-n{s+1e1qK3}v_TcPGUI>sA4ZI7=u6utzjmHb)77CE29C8O5+QXHV)
za()*i*w?$b$7keB$xJfA$ob8bt4XJZo#zTR@@jN9z{|@MRx|S;$ztZd+usvi$A_WE
z%z+4dCIbF(Ll8J?I06|L!lK8*J2DI^lP7J~3{UD^c(ssBm*WX#N;~tQtcBE+vlmIr
z^>ws4quEP2b23{zxE_hvEYy!eGMCm0vxf0cW3MI7>l)wXZ;k35Bk|44U{>r%<Z7dT
zS()qHsDsv8J6d`9P&am{6rEY2GI&E}w;<roe&|3SaLl;_fpMeYPh0ShjYJ2^q3aXF
zq3`3sEySqi*{M;m)5pZt6h3JedXt8GH!p?ce9<iNOj`DvOkRv3B;Gg5HSviIg-j%)
z<UNJtx6AV%Ctj(4UG9IB_)h(s$eW$`y=h+>Z`!7J8+4x<i>d#|v#!sN;mL2$AbQu&
zn6~2#9{jol4{puFm@l^@=+&2DjpKR5{M%uf-3M*v^+lIigAg*MFVul%+Iy{DwUmX9
z`gQKr#2*w~#OBQyVIx^dl7Ho_RcuoBo?4RRBt>>?<#}Zekr!oTCN-dYx##@Gtln|L
zhxn<)J*oA5HH>v_yI@S(mx$i`BVxWgkBK{hQQwi+j?cJ=NjrbWJsH1Z;+~7RGvh4A
ze(^bsvEyN%I}`y6`Z724N9a={5HzAU-xcUtJJ7~y*BZ*!NT067E|_Yunk7am#Pcln
zDS4M|E&hMmwiCM&^U1SOe1+5qe%b%Z89=?~ACg1u1`XUKu9G*bmmOh`&BgeA$I+kI
zrtG?i@w<M(s2x9H+-~5m9lv7ou5*~2einD{y+|3H#iY&KVV-qAeB*|q-GY7ym^+a5
z(MW_oG7{RZe#{wsXGEWp{9>eyiJV0Sf=6nLM)+^Zedqay;9phWI#Nrt(d*#Fcr9^A
zr}IYeBhxYQU?uwHp2mHd=McNU2GQT2$CMqwJ+y(z8Rzj}&T&lmZWkigzX!hsQ(>DP
z1;^Y$%!9q)JGU<am>0Uv7>2;HA-L6|fhX6{bN-u*UoZ=Hp?c3hYxlRcdJuiswahzR
zdSLw9A7Jc;%@`8@I@(Wq2&&!_aNBc##~+D(()J^;CMH1rIQOSBM<96mD0Ge+&RU4N
zpVtq*^9M5@4nh0bgV5#4KJW}Q!9yptio8Rl-;-{V{;xd$6dCLxU1d*Vmmai3;VVdE
zL;!8Ty)}7OF(!KWDSmpZ#UBBq@5P*5S!lmB0lKMU(dn@f=rJK2-5wZ#9?_BL!kXAG
zb`Wf{hQWvAm>YpkytZ}=WDUvKcXROzR;TWr?N#qTMCV%OK(T$1ji1hdwpt_WK@)sf
z^R@Fb!(XF?jrE*4tOG_bkAp4d4*32t0zpH%Av~lD+Vu`X@Z_H8I5QHy#BGlqi8isL
zSRY5C{mcji_Tygd5`HE2ah;o>|N7p4T=o7-_^@2c%^KP3@Y;*E;Hm5Z^zdZOV`HDh
zNR6K-^IAW2iiv>Z;l60sD;Ps~CeyWhX9V04N?(Y8_v|6?oy}S(CW<tIW24ZXXLmB^
zP4s_P@4v(jq#n5J1A^bq+TWWbHJ}IWK(IS&;Z}@YttHp+J&T@oy(6+K!lo0~w801(
z7L4v;?GYN@8J#EfWZoMH%Yr_Pe*@5dRwQ!(V?*@qFm(-tmgm&sXE(|J1*6mE>$d;)
z{#)?N-cLwsAn^r}kJLgosl8bjxUuH<WIe8CEz-J;1zjJAMA(cdbfOIe-_5hRyP5B2
z+>SO22EsJIC)&;*fDW-k5jryp0r&QSyAS2b_xR##vYwOVTIfoBT!-I<oTH)l-y)CL
z=B!i8CuKwY?z9h&cHZcC{~&amcL&;X58xNWco!Q1dmQ6_+(0;H4o28a=Km=X@a)KX
ziuTi-^;$Squ+`Oheb-I?{zYm8tDF%?{Y@F@xUad@#{=de4um{Bi1pHNgiRZP&?h1h
z@;LR*KA)i-(5kJPdui7BH@3#PQnpR~{zc@`r0<`^RxOO(-t-MS^MPH%Juvfw#YS5&
zxxvlHo#Y7*e;rx}s?n;A8*a6=LTldBa&Kp5-Pb~WHud@De*YvgXwvs@vTqR#vj6gC
zePlQA8zl{|=?gkN_aL&bVe4g3Lt|m9?A>THCXtoY_}AUX>yt@yzkhT3gnT<#-+W4+
z_`aNF$T^sN_od^$)8Hifg2YVOm&^Hy%p)<rmTJ!x3>+h83=OSu9qtJRC!XfEWY_ig
zuYzB&E9ZdJw->R<d4Wdaz$G#YMulX5D*K$;JR-TonNO|*MprUy>G!W9FZqTz%_Wbb
zTaxHi-nYu0t+rOWgzwUQ*`td+D8#XHW@WhKJ1*zuxE9<Ko`v`3>b$=3GS5}tK}-CT
z+DPKR<N?7W@jxM7i%g_X>KeIUdrnmwGtJC<a;_ua(6j)ft7nDR>xJDZ5AlTt<G~HU
z;R;_v{lcU0>g4-+w-QT#i{AzdPuCjfn=7NiT&u}762~^Auj_G7coCkOi=(CErJgLj
z3HV&eNY-v@P1{neuAZ0ugzTv*Z=xJzEm^a%y^JfKZ$!VGAIQ1F9+E44wX|R6mNn}8
ze)UFRy4E@6Y;6^3AE~97We%CE?s?s{@^-VIbC7rEcUJ59dpD_s<ox!)`3_q8<erQX
z+jhR2xS{+4P^tJ2a89KPly<X9)k<3VKL)V5C~drQTNzLz*9K*nGB81|Z>b&52bAG_
zz}a4q+bY#i&Mtp}&Q_U*6BQ}lDi3G7S*857s@o>i_TN&st?mBL{acqG=k?}Vmaf84
zRNL?2Y@Kp&%Ej3_<>r*5vsKh0ib;@(DEH0MQejTLC=&$!OK(m+Ue+rI@HYPx_5MFC
C9Zc*1

diff --git a/app/api/static/index.html b/app/api/static/index.html
deleted file mode 100644
index eeee4c4..0000000
--- a/app/api/static/index.html
+++ /dev/null
@@ -1,1089 +0,0 @@
-<!doctype html>
-<html lang="ru">
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1" />
-    <title>DuckLM Runtime</title>
-    <style>
-      :root {
-        --bg: #0f0f0f;
-        --panel: #1a1a1a;
-        --panel-light: #242424;
-        --border: #333;
-        --border-light: #444;
-        --text: #e8e8e8;
-        --text-muted: #888;
-        --accent: #4a9eff;
-        --accent-hover: #6aafff;
-        --success: #4caf50;
-        --warning: #ff9800;
-        --danger: #f44336;
-        --user-bubble: #2d4a3e;
-        --system-bubble: #1e3a5f;
-        --event-bg: #1a1a2e;
-        --event-border: #2a2a4e;
-      }
-      * { box-sizing: border-box; margin: 0; padding: 0; }
-      body {
-        font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
-        background: var(--bg);
-        color: var(--text);
-        height: 100vh;
-        overflow: hidden;
-      }
-      .app {
-        display: grid;
-        grid-template-columns: 1fr 380px;
-        height: 100vh;
-      }
-
-      /* === Chat Panel === */
-      .chat-panel {
-        display: flex;
-        flex-direction: column;
-        border-right: 1px solid var(--border);
-        min-width: 0;
-        height: 100vh;
-      }
-      .chat-header {
-        padding: 16px 20px;
-        border-bottom: 1px solid var(--border);
-        display: flex;
-        align-items: center;
-        justify-content: space-between;
-        flex-shrink: 0;
-      }
-      .chat-header h1 { font-size: 1.1rem; font-weight: 600; }
-      .chat-header .status {
-        font-size: 0.75rem;
-        color: var(--text-muted);
-        display: flex;
-        align-items: center;
-        gap: 6px;
-      }
-      .status-dot {
-        width: 8px;
-        height: 8px;
-        border-radius: 50%;
-        background: var(--success);
-      }
-      .status-dot.offline { background: var(--danger); }
-
-      .messages {
-        flex: 1;
-        min-height: 0;
-        overflow-y: auto;
-        padding: 20px;
-        display: flex;
-        flex-direction: column;
-        gap: 12px;
-      }
-      .message {
-        max-width: 85%;
-        padding: 12px 16px;
-        border-radius: 12px;
-        line-height: 1.5;
-        font-size: 0.9rem;
-        word-wrap: break-word;
-      }
-      .message.user {
-        align-self: flex-end;
-        background: var(--user-bubble);
-        color: #c8e6c9;
-        border-bottom-right-radius: 4px;
-      }
-      .message.assistant {
-        align-self: flex-start;
-        background: var(--system-bubble);
-        color: #bbdefb;
-        border-bottom-left-radius: 4px;
-      }
-      .message.system {
-        align-self: center;
-        background: var(--panel-light);
-        color: var(--text-muted);
-        font-size: 0.8rem;
-        padding: 8px 14px;
-        border-radius: 8px;
-      }
-      .message.error {
-        align-self: center;
-        background: #3a1a1a;
-        color: #ef9a9a;
-        border: 1px solid #5a2a2a;
-      }
-      .message .meta {
-        font-size: 0.7rem;
-        color: var(--text-muted);
-        margin-top: 6px;
-        opacity: 0.7;
-      }
-      .message .critic-score {
-        margin-top: 8px;
-        padding: 6px 10px;
-        background: rgba(0,0,0,0.3);
-        border-radius: 6px;
-        font-size: 0.75rem;
-      }
-      .message .critic-score .score-bar {
-        display: flex;
-        gap: 8px;
-        margin-top: 4px;
-      }
-      .message .critic-score .score-item {
-        display: flex;
-        align-items: center;
-        gap: 4px;
-      }
-      .message .critic-score .score-value {
-        font-weight: 600;
-      }
-      .message .critic-score .score-value.good { color: #4caf50; }
-      .message .critic-score .score-value.medium { color: #ff9800; }
-      .message .critic-score .score-value.bad { color: #f44336; }
-
-      .typing-indicator {
-        align-self: flex-start;
-        padding: 12px 16px;
-        background: var(--system-bubble);
-        border-radius: 12px;
-        border-bottom-left-radius: 4px;
-        display: none;
-        margin: 0 20px;
-      }
-      .typing-indicator.visible { display: flex; gap: 4px; }
-      .typing-indicator span {
-        width: 8px;
-        height: 8px;
-        border-radius: 50%;
-        background: var(--text-muted);
-        animation: typing 1.4s infinite;
-      }
-      .typing-indicator span:nth-child(2) { animation-delay: 0.2s; }
-      .typing-indicator span:nth-child(3) { animation-delay: 0.4s; }
-      @keyframes typing {
-        0%, 60%, 100% { transform: translateY(0); opacity: 0.4; }
-        30% { transform: translateY(-6px); opacity: 1; }
-      }
-
-      .input-area {
-        padding: 16px 20px;
-        border-top: 1px solid var(--border);
-        background: var(--panel);
-        flex-shrink: 0;
-      }
-      .input-wrapper {
-        display: flex;
-        gap: 10px;
-        align-items: flex-end;
-      }
-      .input-wrapper textarea {
-        flex: 1;
-        background: var(--panel-light);
-        border: 1px solid var(--border);
-        border-radius: 12px;
-        padding: 12px 16px;
-        color: var(--text);
-        font: inherit;
-        font-size: 0.9rem;
-        resize: none;
-        min-height: 48px;
-        max-height: 150px;
-        line-height: 1.4;
-        outline: none;
-        transition: border-color 0.2s;
-      }
-      .input-wrapper textarea:focus { border-color: var(--accent); }
-      .input-wrapper textarea::placeholder { color: var(--text-muted); }
-      .send-btn {
-        background: var(--accent);
-        color: white;
-        border: none;
-        border-radius: 12px;
-        padding: 12px 20px;
-        font: inherit;
-        font-size: 0.9rem;
-        font-weight: 500;
-        cursor: pointer;
-        transition: background 0.2s;
-        white-space: nowrap;
-        height: 48px;
-      }
-      .send-btn:hover { background: var(--accent-hover); }
-      .send-btn:disabled { opacity: 0.5; cursor: not-allowed; }
-      .input-hint {
-        font-size: 0.7rem;
-        color: var(--text-muted);
-        margin-top: 8px;
-      }
-      .input-hint kbd {
-        background: var(--panel-light);
-        border: 1px solid var(--border);
-        border-radius: 4px;
-        padding: 1px 5px;
-        font-size: 0.65rem;
-      }
-
-      /* === Sidebar === */
-      .sidebar {
-        display: flex;
-        flex-direction: column;
-        background: var(--panel);
-        min-width: 0;
-        height: 100vh;
-      }
-      .sidebar-header {
-        padding: 16px 20px;
-        border-bottom: 1px solid var(--border);
-        display: flex;
-        align-items: center;
-        justify-content: space-between;
-        flex-shrink: 0;
-      }
-      .sidebar-header h2 { font-size: 0.95rem; font-weight: 600; }
-      .sidebar-tabs { display: flex; gap: 4px; }
-      .sidebar-tab {
-        background: transparent;
-        border: 1px solid var(--border);
-        color: var(--text-muted);
-        padding: 6px 12px;
-        border-radius: 8px;
-        font-size: 0.75rem;
-        cursor: pointer;
-        transition: all 0.2s;
-      }
-      .sidebar-tab.active {
-        background: var(--accent);
-        border-color: var(--accent);
-        color: white;
-      }
-      .sidebar-tab:hover:not(.active) {
-        border-color: var(--border-light);
-        color: var(--text);
-      }
-
-      .sidebar-content {
-        flex: 1;
-        min-height: 0;
-        overflow-y: auto;
-        padding: 16px;
-      }
-
-      .events-list {
-        display: flex;
-        flex-direction: column;
-        gap: 8px;
-      }
-      .event-item {
-        background: var(--event-bg);
-        border: 1px solid var(--event-border);
-        border-radius: 8px;
-        padding: 10px 12px;
-        font-size: 0.75rem;
-      }
-      .event-item .event-type {
-        font-weight: 600;
-        color: var(--accent);
-        margin-bottom: 4px;
-      }
-      .event-item .event-type.task_received { color: #81c784; }
-      .event-item .event-type.task_completed { color: #4caf50; }
-      .event-item .event-type.task_failed { color: #f44336; }
-      .event-item .event-type.permission_requested { color: #ff9800; }
-      .event-item .event-type.memory_recall_used { color: #ce93d8; }
-      .event-item .event-type.memory_write_decided { color: #80deea; }
-      .event-item .event-payload {
-        color: var(--text-muted);
-        font-family: 'SF Mono', Monaco, monospace;
-        font-size: 0.7rem;
-        white-space: pre-wrap;
-        word-break: break-all;
-        max-height: 80px;
-        overflow: hidden;
-      }
-      .event-item .event-time {
-        color: #555;
-        font-size: 0.65rem;
-        margin-top: 4px;
-      }
-
-      .runtime-log {
-        display: flex;
-        flex-direction: column;
-        gap: 8px;
-      }
-      .runtime-entry {
-        background: var(--panel-light);
-        border: 1px solid var(--border);
-        border-radius: 8px;
-        padding: 10px 12px;
-        font-size: 0.8rem;
-      }
-      .runtime-entry .title {
-        font-weight: 600;
-        margin-bottom: 4px;
-        color: var(--text);
-      }
-      .runtime-entry .body {
-        color: var(--text-muted);
-        font-size: 0.75rem;
-      }
-
-      /* Permission controls */
-      .permission-controls {
-        background: #2a1a00;
-        border: 1px solid #5a3a00;
-        border-radius: 8px;
-        padding: 12px;
-        margin-top: 8px;
-      }
-      .permission-controls .command {
-        font-family: 'SF Mono', Monaco, monospace;
-        font-size: 0.75rem;
-        color: #ffcc80;
-        margin-bottom: 8px;
-        padding: 6px 8px;
-        background: #1a1000;
-        border-radius: 4px;
-      }
-      .permission-controls .buttons {
-        display: flex;
-        gap: 6px;
-        flex-wrap: wrap;
-      }
-      .permission-controls button {
-        padding: 6px 12px;
-        border-radius: 6px;
-        border: 1px solid var(--border);
-        font-size: 0.75rem;
-        cursor: pointer;
-        transition: all 0.2s;
-      }
-      .permission-controls button.allow {
-        background: #1b5e20;
-        border-color: #2e7d32;
-        color: #a5d6a7;
-      }
-      .permission-controls button.deny {
-        background: #b71c1c;
-        border-color: #c62828;
-        color: #ef9a9a;
-      }
-      .permission-controls button:hover { filter: brightness(1.2); }
-
-      /* Feedback dialog */
-      dialog {
-        background: var(--panel);
-        border: 1px solid var(--border);
-        border-radius: 16px;
-        padding: 0;
-        color: var(--text);
-        max-width: 500px;
-        width: 90vw;
-      }
-      dialog::backdrop { background: rgba(0, 0, 0, 0.7); }
-      .modal-body {
-        padding: 24px;
-        display: flex;
-        flex-direction: column;
-        gap: 16px;
-      }
-      .modal-body h3 { font-size: 1rem; margin-bottom: 4px; }
-      .modal-body label {
-        display: flex;
-        flex-direction: column;
-        gap: 6px;
-        font-size: 0.85rem;
-        color: var(--text-muted);
-      }
-      .modal-body select, .modal-body textarea {
-        background: var(--panel-light);
-        border: 1px solid var(--border);
-        border-radius: 8px;
-        padding: 10px 12px;
-        color: var(--text);
-        font: inherit;
-        font-size: 0.85rem;
-        outline: none;
-      }
-      .modal-body select:focus, .modal-body textarea:focus { border-color: var(--accent); }
-      .modal-body textarea { min-height: 80px; resize: vertical; }
-      .modal-body .inline-label {
-        flex-direction: row;
-        align-items: center;
-        gap: 8px;
-      }
-      .modal-body .inline-label input[type="checkbox"] { width: 16px; height: 16px; }
-      .modal-body .buttons {
-        display: flex;
-        gap: 8px;
-        justify-content: flex-end;
-      }
-      .modal-body button {
-        padding: 10px 20px;
-        border-radius: 8px;
-        border: 1px solid var(--border);
-        font: inherit;
-        font-size: 0.85rem;
-        cursor: pointer;
-        transition: all 0.2s;
-      }
-      .modal-body button.primary {
-        background: var(--accent);
-        border-color: var(--accent);
-        color: white;
-      }
-      .modal-body button.secondary {
-        background: transparent;
-        color: var(--text-muted);
-      }
-      .modal-body button:hover { filter: brightness(1.1); }
-
-      /* Scrollbar */
-      ::-webkit-scrollbar { width: 6px; }
-      ::-webkit-scrollbar-track { background: transparent; }
-      ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
-      ::-webkit-scrollbar-thumb:hover { background: var(--border-light); }
-
-      /* Responsive */
-      @media (max-width: 768px) {
-        .app { grid-template-columns: 1fr; }
-        .sidebar { display: none; }
-      }
-    </style>
-  </head>
-  <body>
-    <div class="app">
-      <div class="chat-panel">
-        <div class="chat-header">
-          <h1>🦆 DuckLM</h1>
-          <div class="status">
-            <span class="status-dot" id="statusDot"></span>
-            <span id="statusText">Connecting...</span>
-          </div>
-        </div>
-        <div class="messages" id="messages"></div>
-        <div class="typing-indicator" id="typing">
-          <span></span><span></span><span></span>
-        </div>
-        <div class="input-area">
-          <div class="input-wrapper">
-            <textarea id="prompt" placeholder="Опишите задачу..." rows="1"></textarea>
-            <button class="send-btn" id="sendBtn">Отправить</button>
-          </div>
-          <div class="input-hint">
-            <kbd>Enter</kbd> — отправить, <kbd>Shift+Enter</kbd> — перенос строки
-          </div>
-        </div>
-      </div>
-
-      <div class="sidebar">
-        <div class="sidebar-header">
-          <h2>Runtime</h2>
-          <div class="sidebar-tabs">
-            <button class="sidebar-tab active" data-tab="events">События</button>
-            <button class="sidebar-tab" data-tab="log">Лог</button>
-          </div>
-        </div>
-        <div class="sidebar-content" id="sidebarContent">
-          <div class="events-list" id="eventsList"></div>
-          <div class="runtime-log" id="runtimeLog" style="display:none"></div>
-        </div>
-      </div>
-    </div>
-
-    <dialog id="feedbackDialog">
-      <form method="dialog" class="modal-body" id="feedbackForm">
-        <h3>Что было неверно?</h3>
-        <label>Тип ошибки
-          <select id="feedbackType">
-            <option value="misunderstood_task">Неправильно понял задачу</option>
-            <option value="wrong_tool">Выбрал не тот инструмент</option>
-            <option value="wrong_command">Выполнил не ту команду</option>
-            <option value="should_have_checked">Ответил без проверки</option>
-            <option value="hallucination">Выдумал факт</option>
-            <option value="incomplete">Неполный ответ</option>
-            <option value="unsafe">Опасное действие</option>
-            <option value="bad_format">Плохой формат ответа</option>
-            <option value="other">Другое</option>
-          </select>
-        </label>
-        <label>Критичность
-          <select id="feedbackSeverity">
-            <option value="minor">Мелкая ошибка</option>
-            <option value="major" selected>Существенная ошибка</option>
-            <option value="critical">Критическая ошибка</option>
-          </select>
-        </label>
-        <label>Комментарий
-          <textarea id="feedbackText" placeholder="Что именно было неверно?"></textarea>
-        </label>
-        <label>Как должно было быть
-          <textarea id="feedbackCorrection" placeholder="Корректировка"></textarea>
-        </label>
-        <label class="inline-label">
-          <input type="checkbox" id="feedbackRemember" checked />
-          Запомнить
-        </label>
-        <label class="inline-label">
-          <input type="checkbox" id="feedbackRetry" />
-          Исправить сейчас
-        </label>
-        <div class="buttons">
-          <button type="submit" class="primary" value="submit">Отправить</button>
-          <button type="button" class="secondary" value="cancel" onclick="feedbackDialog.close()">Отмена</button>
-        </div>
-      </form>
-    </dialog>
-
-    <script>
-      // === DOM ===
-      const messagesEl = document.getElementById("messages");
-      const eventsListEl = document.getElementById("eventsList");
-      const runtimeLogEl = document.getElementById("runtimeLog");
-      const promptEl = document.getElementById("prompt");
-      const sendBtn = document.getElementById("sendBtn");
-      const typingEl = document.getElementById("typing");
-      const statusDot = document.getElementById("statusDot");
-      const statusText = document.getElementById("statusText");
-      const feedbackDialog = document.getElementById("feedbackDialog");
-      const feedbackForm = document.getElementById("feedbackForm");
-
-      // === State ===
-      let currentSessionId = localStorage.getItem("ducklm_session") || "web-" + Date.now();
-      localStorage.setItem("ducklm_session", currentSessionId);
-      let currentTaskId = null;
-      let isProcessing = false;
-      let activeStream = null;
-      let activeStreamMessage = null;
-      let activeStreamText = "";
-      const streamedToolOutputTasks = new Set();
-      const seenEvents = new Set();
-
-      // === Load history from localStorage ===
-      function loadHistory() {
-        const savedMessages = localStorage.getItem("ducklm_messages");
-        const savedEvents = localStorage.getItem("ducklm_events");
-        if (savedMessages) {
-          try {
-            const msgs = JSON.parse(savedMessages);
-            msgs.forEach(m => addMessage(m.type, m.text, m.meta, false));
-          } catch(e) {}
-        }
-        if (savedEvents) {
-          try {
-            const events = JSON.parse(savedEvents);
-            events.forEach(e => addEventToSidebar(e, false));
-          } catch(e) {}
-        }
-        scrollToBottom();
-      }
-
-      function saveMessages() {
-        const msgs = [];
-        messagesEl.querySelectorAll(".message").forEach(el => {
-          msgs.push({
-            type: el.className.replace("message ", ""),
-            text: el.dataset.text || el.textContent,
-            meta: el.dataset.meta || ""
-          });
-        });
-        localStorage.setItem("ducklm_messages", JSON.stringify(msgs));
-      }
-
-      function saveEvents() {
-        const events = [];
-        eventsListEl.querySelectorAll(".event-item").forEach(el => {
-          events.push({
-            type: el.dataset.type || "",
-            payload: el.dataset.payload || "",
-            time: el.dataset.time || ""
-          });
-        });
-        localStorage.setItem("ducklm_events", JSON.stringify(events));
-      }
-
-      // === Auto-resize textarea ===
-      promptEl.addEventListener("input", () => {
-        promptEl.style.height = "auto";
-        promptEl.style.height = Math.min(promptEl.scrollHeight, 150) + "px";
-      });
-
-      // === Enter to send, Shift+Enter for newline ===
-      promptEl.addEventListener("keydown", (e) => {
-        if (e.key === "Enter" && !e.shiftKey) {
-          e.preventDefault();
-          sendTask();
-        }
-      });
-
-      sendBtn.addEventListener("click", sendTask);
-
-      // === Sidebar tabs ===
-      document.querySelectorAll(".sidebar-tab").forEach(tab => {
-        tab.addEventListener("click", () => {
-          document.querySelectorAll(".sidebar-tab").forEach(t => t.classList.remove("active"));
-          tab.classList.add("active");
-          const tabName = tab.dataset.tab;
-          eventsListEl.style.display = tabName === "events" ? "flex" : "none";
-          runtimeLogEl.style.display = tabName === "log" ? "flex" : "none";
-        });
-      });
-
-      // === Health check ===
-      async function checkHealth() {
-        try {
-          const resp = await fetch("/health");
-          if (resp.ok) {
-            statusDot.classList.remove("offline");
-            statusText.textContent = "Connected";
-          } else { throw new Error("not ok"); }
-        } catch {
-          statusDot.classList.add("offline");
-          statusText.textContent = "Disconnected";
-        }
-      }
-      checkHealth();
-      setInterval(checkHealth, 10000);
-
-      // === Scroll to bottom ===
-      function scrollToBottom() {
-        messagesEl.scrollTop = messagesEl.scrollHeight;
-      }
-
-      // === Send task ===
-      async function sendTask() {
-        const text = promptEl.value.trim();
-        if (!text || isProcessing) return;
-
-        isProcessing = true;
-        sendBtn.disabled = true;
-        promptEl.value = "";
-        promptEl.style.height = "auto";
-
-        addMessage("user", text);
-        typingEl.classList.add("visible");
-        scrollToBottom();
-
-        const taskId = "web-" + Date.now();
-        currentTaskId = taskId;
-        seenEvents.clear();  // Clear seen events for new request
-
-        try {
-          openTaskStream(taskId);
-          const response = await fetch("/chat", {
-            method: "POST",
-            headers: { "Content-Type": "application/json" },
-            body: JSON.stringify({ input: text, task_id: taskId, session_id: currentSessionId, context: {} })
-          });
-          const data = await response.json();
-
-          if (!data || !["accepted", "completed"].includes(data.status)) {
-            addMessage("error", "Invalid response from server");
-            typingEl.classList.remove("visible");
-            isProcessing = false;
-            sendBtn.disabled = false;
-            return;
-          }
-          if (data.events) {
-            processEvents(data.events, data);
-            renderResult(data);
-          }
-
-        } catch (err) {
-          typingEl.classList.remove("visible");
-          addMessage("error", "Network error: " + err.message);
-          isProcessing = false;
-          sendBtn.disabled = false;
-          promptEl.focus();
-          scrollToBottom();
-        }
-      }
-
-      function openTaskStream(taskId) {
-        if (activeStream) {
-          activeStream.close();
-        }
-        const protocol = window.location.protocol === "https:" ? "wss" : "ws";
-        activeStream = new WebSocket(`${protocol}://${window.location.host}/stream/${taskId}`);
-        activeStream.onmessage = (message) => {
-          const event = JSON.parse(message.data);
-          processEvents([event]);
-        };
-        activeStream.onerror = () => {
-          typingEl.classList.remove("visible");
-          isProcessing = false;
-          sendBtn.disabled = false;
-        };
-        activeStream.onclose = () => {
-          activeStream = null;
-        };
-      }
-
-      // === Process events ===
-      function processEvents(events, data) {
-        for (const event of events) {
-          if (event.type === "heartbeat") continue;
-          const eventKey = `${event.task_id}:${event.sequence}`;
-          if (seenEvents.has(eventKey)) continue;
-          seenEvents.add(eventKey);
-          addEventToSidebar(event);
-
-          if (event.type === "memory_recall_used") {
-            const recall = event.payload;
-            addRuntimeLog("🧠 Memory Recall", `Query: ${recall.query}\nResults: ${recall.results_count}\nReason: ${recall.reason}`);
-          }
-          if (event.type === "memory_write_decided") {
-            const write = event.payload;
-            addRuntimeLog("💾 Memory Write", `Kind: ${write.kind}\nDecision: ${write.decision}\nPreview: ${write.text_preview}`);
-          }
-          if (event.type === "tool_output_chunk") {
-            appendToolOutput(event.payload.chunk || "");
-          }
-          if (event.type === "task_completed" || event.type === "task_failed" || event.type === "task_awaiting_permission" || event.type === "task_awaiting_input" || event.type === "task_awaiting_review") {
-            typingEl.classList.remove("visible");
-            isProcessing = false;
-            sendBtn.disabled = false;
-            promptEl.focus();
-            const result = event.payload.execution_result || event.payload;
-            renderResult({
-              task_id: event.task_id,
-              status: event.type.replace("task_", ""),
-              result
-            });
-            saveMessages();
-            saveEvents();
-          }
-        }
-      }
-
-      function appendToolOutput(chunk) {
-        if (!chunk) return;
-        if (!activeStreamMessage) {
-          activeStreamText = "";
-          activeStreamMessage = document.createElement("div");
-          activeStreamMessage.className = "message assistant";
-          messagesEl.appendChild(activeStreamMessage);
-        }
-        activeStreamText += chunk;
-        if (currentTaskId) streamedToolOutputTasks.add(currentTaskId);
-        activeStreamMessage.dataset.text = activeStreamText;
-        activeStreamMessage.innerHTML = escapeHtml(activeStreamText);
-        saveMessages();
-        scrollToBottom();
-      }
-
-      // === Add event to sidebar ===
-      function addEventToSidebar(event, save = true) {
-        const el = document.createElement("div");
-        el.className = "event-item";
-        el.dataset.type = event.type;
-        el.dataset.payload = JSON.stringify(event.payload || {});
-        el.dataset.time = event.timestamp || "";
-
-        const typeClass = event.type.replace(/_/g, "_");
-        const time = event.timestamp ? new Date(event.timestamp).toLocaleTimeString() : "";
-
-        el.innerHTML = `
-          <div class="event-type ${typeClass}">${formatEventType(event.type)}</div>
-          <div class="event-payload">${escapeHtml(JSON.stringify(event.payload || {}, null, 2).slice(0, 200))}</div>
-          <div class="event-time">${time}</div>
-        `;
-        eventsListEl.insertBefore(el, eventsListEl.firstChild);
-        if (save) saveEvents();
-      }
-
-      // === Add runtime log ===
-      function addRuntimeLog(title, body) {
-        const el = document.createElement("div");
-        el.className = "runtime-entry";
-        el.innerHTML = `<div class="title">${escapeHtml(title)}</div><div class="body">${escapeHtml(body)}</div>`;
-        runtimeLogEl.insertBefore(el, runtimeLogEl.firstChild);
-      }
-
-      // === Render result ===
-      function renderResult(data) {
-        activeStreamMessage = null;
-        activeStreamText = "";
-        const stepResults = data.result?.step_results || [];
-        for (const step of stepResults) {
-          const toolResult = step.result?.result || step.result;
-          if (toolResult?.output && !streamedToolOutputTasks.has(data.task_id)) {
-            let html = escapeHtml(String(toolResult.output));
-            if (step.result?.critic_score) {
-              const score = step.result.critic_score;
-              html += renderCriticScore(score);
-            }
-            addMessage("assistant", html, step.step_id);
-          } else if (toolResult?.error) {
-            addMessage("error", `Step ${step.step_id}: ${toolResult.error}`);
-          }
-        }
-
-        // Priority: response_directive > stepResults > message
-        if (data.result?.response_directive?.payload?.text) {
-          addMessage("assistant", data.result.response_directive.payload.text);
-        } else if (data.result?.message && !stepResults.length) {
-          addMessage("assistant", data.result.message);
-        }
-
-        if (data.status === "awaiting_permission") {
-          renderPermissionRequest(data);
-        } else if (data.status === "awaiting_input") {
-          renderSecretRequest(data);
-        } else if (data.status === "awaiting_review") {
-          renderReviewRequest(data);
-        } else if (data.status === "failed") {
-          addMessage("error", data.result?.error || "Task failed");
-        }
-
-        scrollToBottom();
-      }
-
-      function renderCriticScore(score) {
-        const c = score.correctness;
-        const u = score.usefulness;
-        const s = score.safety;
-        const cClass = c >= 0.7 ? "good" : c >= 0.4 ? "medium" : "bad";
-        const uClass = u >= 0.7 ? "good" : u >= 0.4 ? "medium" : "bad";
-        const sClass = s >= 0.7 ? "good" : s >= 0.4 ? "medium" : "bad";
-        return `<div class="critic-score">
-          <div>Critic: ${score.explanation || ""}</div>
-          <div class="score-bar">
-            <div class="score-item">✓ <span class="score-value ${cClass}">${c.toFixed(2)}</span></div>
-            <div class="score-item">⚡ <span class="score-value ${uClass}">${u.toFixed(2)}</span></div>
-            <div class="score-item">🛡 <span class="score-value ${sClass}">${s.toFixed(2)}</span></div>
-          </div>
-        </div>`;
-      }
-
-      function renderPermissionRequest(data) {
-        const permReq = data.result?.permission_request;
-        if (!permReq) return;
-        const el = document.createElement("div");
-        el.className = "message system";
-        el.innerHTML = `<div class="permission-controls">
-          <div>⚠️ Требуется разрешение:</div>
-          <div class="command">${escapeHtml(permReq.command || JSON.stringify(permReq))}</div>
-          <div class="buttons">
-            <button class="allow" onclick="resolvePermission('${data.task_id}', 'allow_once')">Разрешить</button>
-            ${permReq.allow_always !== false ? `<button class="allow" onclick="resolvePermission('${data.task_id}', 'allow_always')">Навсегда</button>` : ""}
-            <button class="deny" onclick="resolvePermission('${data.task_id}', 'deny')">Запретить</button>
-          </div>
-        </div>`;
-        messagesEl.appendChild(el);
-      }
-
-      function renderSecretRequest(data) {
-        const secretReq = data.result?.secret_request;
-        if (!secretReq) return;
-        const el = document.createElement("div");
-        el.className = "message system";
-        el.innerHTML = `<div class="permission-controls">
-          <div>🔑 ${escapeHtml(secretReq.prompt || "Требуется ввод")}</div>
-          <input type="password" id="secretInput" placeholder="Введите..." style="width:100%;margin:8px 0;padding:8px;background:#1a1a1a;border:1px solid #333;border-radius:6px;color:#e8e8e8" />
-          <div class="buttons"><button class="allow" onclick="resolveSecret('${data.task_id}')">Отправить</button></div>
-        </div>`;
-        messagesEl.appendChild(el);
-      }
-
-      function renderReviewRequest(data) {
-        const review = data.result?.review;
-        if (!review) return;
-        const assessment = review.critic_assessment || {};
-        const diagnosis = review.diagnosis || {};
-        const el = document.createElement("div");
-        el.className = "message system";
-        el.innerHTML = `<div class="permission-controls">
-          <div>Critic оценил действие как: ${escapeHtml(assessment.classification || "requires_review")}</div>
-          <div class="command">${escapeHtml(review.command || "")}</div>
-          <div>${escapeHtml(assessment.explanation || diagnosis.type || "")}</div>
-          <textarea id="reviewCorrection" placeholder="Комментарий или исправление..." style="width:100%;margin:8px 0;padding:8px;background:#1a1a1a;border:1px solid #333;border-radius:6px;color:#e8e8e8"></textarea>
-          <div class="buttons">
-            <button class="deny" onclick="resolveReview('${data.task_id}', 'wrong_action')">Ошибочное действие</button>
-            <button class="allow" onclick="resolveReview('${data.task_id}', 'correct_action')">Всё верно</button>
-          </div>
-        </div>`;
-        messagesEl.appendChild(el);
-      }
-
-      async function resolvePermission(taskId, decision) {
-        // Disable all permission buttons to prevent double-click
-        document.querySelectorAll(".permission-controls button").forEach(btn => {
-          btn.disabled = true;
-          btn.style.opacity = "0.5";
-        });
-
-        try {
-          openTaskStream(taskId);
-          const resp = await fetch("/permissions/resolve", {
-            method: "POST",
-            headers: { "Content-Type": "application/json" },
-            body: JSON.stringify({ task_id: taskId, decision })
-          });
-          const data = await resp.json();
-
-          // Remove the permission request UI
-          const permEl = document.querySelector(".permission-controls")?.closest(".message.system");
-          if (permEl) {
-            permEl.innerHTML = `<div style="color:#81c784">✓ Permission ${decision}: ${data.status}</div>`;
-          }
-
-          // Synchronous fallback for tests/older runtimes.
-          if (data.status === "completed" || data.status === "failed") {
-            if (data.result?.response_directive?.payload?.text) {
-              addMessage("assistant", data.result.response_directive.payload.text);
-            } else if (data.result?.step_results?.length) {
-              renderResult(data);
-            } else if (data.result?.message) {
-              addMessage("assistant", data.result.message);
-            }
-            if (data.status === "failed") {
-              addMessage("error", data.result?.error || "Task failed");
-            }
-          } else if (data.status === "awaiting_input") {
-            // Need password/secret input
-            renderSecretRequest(data);
-            if (data.events) {
-              seenEvents.clear();
-              processEvents(data.events, data);
-            }
-          } else if (data.status === "awaiting_permission") {
-            // Still needs more permissions — render new request
-            renderPermissionRequest(data);
-          }
-          // Process any new events (clear seen to allow re-processing)
-          if (data.events) {
-            seenEvents.clear();
-            processEvents(data.events, data);
-          }
-          saveMessages();
-          saveEvents();
-          scrollToBottom();
-
-        } catch (err) {
-          addMessage("error", "Failed to resolve: " + err.message);
-          // Re-enable buttons on error
-          document.querySelectorAll(".permission-controls button").forEach(btn => {
-            btn.disabled = false;
-            btn.style.opacity = "1";
-          });
-        }
-      }
-
-      async function resolveSecret(taskId) {
-        const input = document.getElementById("secretInput");
-        if (!input?.value) return;
-        try {
-          openTaskStream(taskId);
-          const resp = await fetch("/secrets/resolve", {
-            method: "POST",
-            headers: { "Content-Type": "application/json" },
-            body: JSON.stringify({ task_id: taskId, secret: input.value })
-          });
-          const data = await resp.json();
-          addMessage("system", `Secret submitted: ${data.status}`);
-          // Synchronous fallback for tests/older runtimes.
-          if (data.status === "completed" || data.status === "failed") {
-            renderResult(data);
-          }
-          if (data.events) {
-            seenEvents.clear();
-            processEvents(data.events, data);
-          }
-          saveMessages();
-          saveEvents();
-          scrollToBottom();
-        } catch (err) { addMessage("error", "Failed to submit: " + err.message); }
-      }
-
-      async function resolveReview(taskId, decision) {
-        const correction = document.getElementById("reviewCorrection")?.value || "";
-        document.querySelectorAll(".permission-controls button").forEach(btn => {
-          btn.disabled = true;
-          btn.style.opacity = "0.5";
-        });
-        try {
-          openTaskStream(taskId);
-          const resp = await fetch("/review/resolve", {
-            method: "POST",
-            headers: { "Content-Type": "application/json" },
-            body: JSON.stringify({ task_id: taskId, decision, correction })
-          });
-          const data = await resp.json();
-          addMessage("system", `Review submitted: ${data.status}`);
-          if (data.events) {
-            seenEvents.clear();
-            processEvents(data.events, data);
-          }
-        } catch (err) {
-          addMessage("error", "Failed to submit review: " + err.message);
-        }
-      }
-
-      // === Add message ===
-      function addMessage(type, text, meta, save = true) {
-        const el = document.createElement("div");
-        el.className = `message ${type}`;
-        el.dataset.text = text;
-        el.dataset.meta = meta || "";
-        el.innerHTML = text + (meta ? `<div class="meta">${escapeHtml(meta)}</div>` : "");
-        messagesEl.appendChild(el);
-        if (save) saveMessages();
-      }
-
-      // === Feedback ===
-      feedbackForm.addEventListener("submit", async (e) => {
-        e.preventDefault();
-        const btn = feedbackForm.querySelector('button[value="submit"]');
-        if (btn.value !== "submit") { feedbackDialog.close(); return; }
-        const data = await submitFeedback({
-          feedback_type: document.getElementById("feedbackType").value,
-          severity: document.getElementById("feedbackSeverity").value,
-          feedback: document.getElementById("feedbackText").value,
-          correction: document.getElementById("feedbackCorrection").value,
-          remember: document.getElementById("feedbackRemember").checked,
-          retry: document.getElementById("feedbackRetry").checked,
-          task_id: currentTaskId
-        });
-        feedbackDialog.close();
-        if (data?.status === "ok") addMessage("system", "Обратная связь сохранена");
-      });
-
-      async function submitFeedback(params) {
-        try {
-          const resp = await fetch("/critic/feedback", {
-            method: "POST",
-            headers: { "Content-Type": "application/json" },
-            body: JSON.stringify(params)
-          });
-          return await resp.json();
-        } catch { return null; }
-      }
-
-      // === Helpers ===
-      function escapeHtml(value) {
-        return String(value).replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;");
-      }
-
-      function formatEventType(type) {
-        const labels = {
-          task_received: "📥 Received", context_built: "🔧 Context",
-          thinker_called: "🤔 Thinker", json_compiler_called: "📋 Compiler",
-          orchestrator_result: "✅ Orchestrator", step_started: "▶️ Step",
-          tool_called: "🔨 Tool", tool_completed: "✔️ Tool Done",
-          permission_requested: "⚠️ Permission", task_completed: "✅ Completed",
-          task_awaiting_review: "🧭 Review", review_resolved: "🗳 Review",
-          task_failed: "❌ Failed", memory_recall_used: "🧠 Recall",
-          memory_write_decided: "💾 Write", checkpoint_saved: "💾 Checkpoint",
-          critic_called: "🔍 Critic", critic_result: "📊 Critic Result"
-        };
-        return labels[type] || type;
-      }
-
-      // === Init ===
-      loadHistory();
-      promptEl.focus();
-    </script>
-  </body>
-</html>
diff --git a/app/cli/__init__.py b/app/cli/__init__.py
deleted file mode 100644
index 43164b3..0000000
--- a/app/cli/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""CLI layer."""
-
diff --git a/app/core/__init__.py b/app/core/__init__.py
deleted file mode 100644
index b18a4b9..0000000
--- a/app/core/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""Core orchestration components."""
-
diff --git a/app/core/async_router.py b/app/core/async_router.py
deleted file mode 100644
index dc23d48..0000000
--- a/app/core/async_router.py
+++ /dev/null
@@ -1,542 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import json
-import logging
-import re
-from typing import Any
-
-from app.core.contracts import ExecutionDirective
-from app.core.intent_parser import IntentParser
-from app.events.event_bus import EventBus
-from app.events.event_types import (
-    ORCHESTRATOR_CALLED,
-    ORCHESTRATOR_FALLBACK_USED,
-    ORCHESTRATOR_RETRY,
-    ORCHESTRATOR_RESULT,
-    ORCHESTRATOR_UNAVAILABLE,
-    THINKER_CALLED,
-    THINKER_RESULT,
-    JSON_COMPILER_CALLED,
-    JSON_COMPILER_RESULT,
-)
-from app.models.async_adapters import AsyncOrchestratorAdapter
-
-logger = logging.getLogger(__name__)
-
-
-class AsyncRouter:
-    """Async router using Thinker + JSON Compiler pipeline."""
-
-    def __init__(
-        self,
-        thinker: AsyncOrchestratorAdapter | None = None,
-        json_compiler: AsyncOrchestratorAdapter | None = None,
-        intent_parser: IntentParser | None = None,
-        prompts: dict[str, str] | None = None,
-        event_bus: EventBus | None = None,
-        tool_registry=None,
-        retry_limit: int = 2,
-        debug: bool = False,
-        log_length: int = 500,
-        json_fix_retry_limit: int = 2,
-        json_fix_use_sys_util: bool = True,
-        intent_classifier: str = "thinker",
-    ) -> None:
-        self._thinker = thinker
-        self._json_compiler = json_compiler
-        self._intent_classifier = intent_classifier
-        self._sys_util = None
-        self._intent_parser = intent_parser or IntentParser()
-        self._prompts = prompts or {}
-        self._event_bus = event_bus
-        self._tool_registry = tool_registry
-        self._retry_limit = retry_limit
-        self._debug = debug
-        self._log_length = log_length
-        self._json_fix_retry_limit = json_fix_retry_limit
-        self._json_fix_use_sys_util = json_fix_use_sys_util
-        self._orchestrator = None  # Set separately if needed for classification
-
-    def set_event_bus(self, event_bus: EventBus) -> None:
-        self._event_bus = event_bus
-
-    def set_thinker(self, thinker: AsyncOrchestratorAdapter) -> None:
-        self._thinker = thinker
-
-    def set_json_compiler(self, json_compiler: AsyncOrchestratorAdapter) -> None:
-        self._json_compiler = json_compiler
-
-    def set_sys_util(self, sys_util: AsyncOrchestratorAdapter) -> None:
-        self._sys_util = sys_util
-
-    def set_orchestrator(self, orchestrator: AsyncOrchestratorAdapter) -> None:
-        self._orchestrator = orchestrator
-
-    def set_tool_registry(self, tool_registry) -> None:
-        self._tool_registry = tool_registry
-
-    async def decide(
-        self,
-        state: dict[str, Any],
-        context: dict[str, Any],
-        task_id: str | None = None,
-        session_id: str | None = None,
-    ) -> ExecutionDirective:
-        task_context = context.get("task_context", {})
-        requested_tool = task_context.get("requested_tool")
-        task_summary = str(context.get("task_summary", ""))
-
-        if requested_tool:
-            self._emit_event(
-                ORCHESTRATOR_RESULT,
-                {"reason": "explicit_tool_request", "tool": requested_tool},
-                task_id,
-                session_id,
-            )
-            return ExecutionDirective(
-                type="tool",
-                payload={
-                    "tool": requested_tool,
-                    "args": task_context.get("tool_args", {}),
-                },
-                requires_permission=requested_tool in {"shell_exec", "file_write"},
-                confidence=0.9,
-                reason="Task context explicitly requested a tool execution.",
-            )
-
-        parsed_intent = self._intent_parser.parse(task_summary)
-        if parsed_intent:
-            self._emit_event(
-                ORCHESTRATOR_RESULT,
-                {"reason": "deterministic_intent_parser", "directive": parsed_intent.model_dump(mode="json")},
-                task_id,
-                session_id,
-            )
-            return parsed_intent
-
-        if self._thinker is None:
-            fallback = self._fallback_directive(task_summary)
-            self._emit_event(
-                ORCHESTRATOR_FALLBACK_USED,
-                {"reason": "thinker_unavailable", "directive": fallback.model_dump(mode="json")},
-                task_id,
-                session_id,
-            )
-            return fallback
-
-        if self._json_compiler is None:
-            fallback = self._fallback_directive(task_summary)
-            self._emit_event(
-                ORCHESTRATOR_FALLBACK_USED,
-                {"reason": "json_compiler_unavailable", "directive": fallback.model_dump(mode="json")},
-                task_id,
-                session_id,
-            )
-            return fallback
-
-        mode_hint = await self._classify_intent(task_summary)
-        thinker_prompt = self._build_thinker_prompt(task_summary, context, mode_hint)
-
-        for thinker_attempt in range(self._retry_limit + 1):
-            if thinker_attempt > 0:
-                self._emit_event(
-                    ORCHESTRATOR_RETRY,
-                    {"attempt": thinker_attempt, "prompt": thinker_prompt},
-                    task_id,
-                    session_id,
-                )
-                thinker_prompt = self._add_thinker_feedback(thinker_prompt, last_thinker_error, thinker_attempt)
-
-            self._emit_event(
-                THINKER_CALLED,
-                {"attempt": thinker_attempt, "mode": mode_hint},
-                task_id,
-                session_id,
-            )
-
-            try:
-                thinker_result = await self._thinker.generate(thinker_prompt)
-            except Exception as e:
-                logger.warning(f"Thinker generate failed: {e}")
-                last_thinker_error = str(e)
-                continue
-
-            logger.info(f"Thinker result (attempt {thinker_attempt + 1}): {thinker_result}")
-            self._emit_event(
-                THINKER_RESULT,
-                {"result": thinker_result, "attempt": thinker_attempt},
-                task_id,
-                session_id,
-            )
-
-            if mode_hint == "conversation" and self._looks_like_tool_plan(thinker_result):
-                mode_hint = "execution"
-                self._emit_event(
-                    ORCHESTRATOR_FALLBACK_USED,
-                    {"reason": "thinker_proposed_tool_plan_despite_conversation_hint"},
-                    task_id,
-                    session_id,
-                )
-
-            if self._is_simple_response(thinker_result):
-                json_compiler_prompt = self._build_json_compiler_prompt(thinker_result)
-            else:
-                json_compiler_prompt = self._build_json_compiler_prompt(thinker_result)
-
-            for compiler_attempt in range(self._json_fix_retry_limit + 1):
-                self._emit_event(
-                    JSON_COMPILER_CALLED,
-                    {"attempt": compiler_attempt, "plan": thinker_result},
-                    task_id,
-                    session_id,
-                )
-
-                try:
-                    compiler_result = await self._json_compiler.generate(json_compiler_prompt)
-                except Exception as e:
-                    logger.warning(f"JSON Compiler generate failed: {e}")
-                    compiler_result = None
-
-                if compiler_result:
-                    logger.info(f"JSON Compiler result (attempt {compiler_attempt + 1}): {compiler_result}")
-                    self._emit_event(
-                        JSON_COMPILER_RESULT,
-                        {"result": compiler_result, "attempt": compiler_attempt},
-                        task_id,
-                        session_id,
-                    )
-
-                directive = self._validate_directive(compiler_result, mode_hint) if compiler_result else None
-                if directive is not None:
-                    directive = self._guard_rail_check(directive)
-                    self._emit_event(
-                        ORCHESTRATOR_RESULT,
-                        {"directive": directive.model_dump(mode="json"), "thinker_attempt": thinker_attempt, "compiler_attempt": compiler_attempt},
-                        task_id,
-                        session_id,
-                    )
-                    return directive
-
-                if compiler_result:
-                    logger.warning(f"JSON Compiler validation failed, attempting fix (attempt {compiler_attempt + 1})")
-                    fix_result = await self._fix_invalid_json(compiler_result, compiler_attempt, task_id, session_id)
-                    if fix_result:
-                        fixed_directive = self._validate_directive(fix_result, mode_hint)
-                        if fixed_directive is not None:
-                            fixed_directive = self._guard_rail_check(fixed_directive)
-                            self._emit_event(
-                                ORCHESTRATOR_RESULT,
-                                {"directive": fixed_directive.model_dump(mode="json"), "fixed": True},
-                                task_id,
-                                session_id,
-                            )
-                            return fixed_directive
-
-            last_thinker_error = f"JSON Compiler failed after {self._json_fix_retry_limit + 1} attempts"
-
-        self._emit_event(
-            ORCHESTRATOR_UNAVAILABLE,
-            {"reason": "retry_exhausted", "last_error": last_thinker_error},
-            task_id,
-            session_id,
-        )
-        raise RuntimeError(f"Thinker/Compiler pipeline failed after {self._retry_limit + 1} attempts")
-
-    def _fallback_directive(self, task_summary: str) -> ExecutionDirective:
-        parsed = self._intent_parser.parse(task_summary)
-        if parsed:
-            return parsed
-
-        return ExecutionDirective(
-            type="respond",
-            payload={"text": f"Runtime accepted task: {task_summary}"},
-            requires_permission=False,
-            confidence=0.4,
-            reason="Fallback response because local orchestration models are not loaded.",
-        )
-
-    def _is_simple_response(self, thinker_result: str) -> bool:
-        result_lower = thinker_result.lower().strip()
-        return result_lower.startswith("ответ:") or result_lower.startswith("response:") or "не нужно" in result_lower
-
-    def _extract_conversation_response(self, thinker_result: str) -> str:
-        """Extract text response from thinker result for conversation mode."""
-        result_lower = thinker_result.lower()
-        
-        # Skip the ПЛАН lines, just get the ОТВЕТ part
-        lines = thinker_result.split('\n')
-        response_lines = []
-        capture = False
-        
-        for line in lines:
-            if line.strip().lower().startswith('ответ:') or line.strip().lower().startswith('response:'):
-                capture = True
-                response_lines.append(line)
-            elif capture and line.strip():
-                # Check if this is a new ПЛАН or step
-                if line.strip().lower().startswith('план') or line.strip().lower().startswith('step'):
-                    break
-                response_lines.append(line)
-        
-        if response_lines:
-            return '\n'.join(response_lines).replace('ответ:', '').replace('response:', '').strip()
-        
-        # Fallback: return first few sentences
-        sentences = thinker_result.split('.')[:3]
-        return '. '.join(sentences).strip()
-
-    def _looks_like_tool_plan(self, thinker_result: str) -> bool:
-        result = thinker_result.lower()
-        tool_names = set()
-        if self._tool_registry:
-            tool_names = set(self._tool_registry.list_names())
-        tool_markers = {"shell_exec", "file_read", "file_write", "memory", *tool_names}
-        plan_markers = ("план:", "шаг", "step", "tool", "инструмент")
-        return any(marker in result for marker in tool_markers) and any(marker in result for marker in plan_markers)
-
-    def _build_thinker_prompt(
-        self, task_summary: str, context: dict[str, Any], mode_hint: str
-    ) -> str:
-        base_prompt = self._prompts.get("thinker", "")
-        memory_context = context.get("memory_context", [])
-
-        tools_json = "[]"
-        if self._tool_registry:
-            schemas = self._tool_registry.list_schemas()
-            tools_json = json.dumps(schemas, ensure_ascii=False, indent=2)
-
-        prompt_lines = [
-            base_prompt,
-            "",
-            f"Task: {task_summary}",
-            f"Mode hint: {mode_hint}",
-        ]
-
-        if memory_context:
-            memory_text = "\n".join([f"- {m.get('text', '')}" for m in memory_context[:5]])
-            prompt_lines.append(f"\nRelevant memory:\n{memory_text}")
-
-        session_history = context.get("session_history", [])
-        if session_history:
-            history_text = "\n".join([f"- {h.get('text', '')}" for h in session_history[:3]])
-            prompt_lines.append(f"\nPrevious requests in this session:\n{history_text}")
-
-        # Active memory recall results
-        memory_recall = context.get("memory_recall")
-        if memory_recall:
-            prompt_lines.append("\n=== ИЗ ДОЛГОВРЕМЕННОЙ ПАМЯТИ (ACTIVE RECALL) ===")
-            prompt_lines.append(f"Поисковый запрос: {memory_recall.get('query', '')}")
-            prompt_lines.append(memory_recall.get("summary", ""))
-            prompt_lines.append("=== КОНЕЦ ПАМЯТИ ===")
-
-        prompt_lines.extend([
-            "",
-            f"AVAILABLE TOOLS (JSON):",
-            tools_json,
-            "",
-        ])
-
-        return "\n".join(prompt_lines)
-
-    def _build_json_compiler_prompt(self, thinker_result: str) -> str:
-        base_prompt = self._prompts.get("json_compiler", "")
-
-        prompt_lines = [
-            base_prompt,
-            "",
-            "Thinker's plan:",
-            thinker_result,
-            "",
-        ]
-
-        return "\n".join(prompt_lines)
-
-    def _determine_mode_from_context(self, context: dict[str, Any]) -> str:
-        """Legacy method - kept for compatibility"""
-        task_summary = str(context.get("task_summary", "")).lower()
-        keywords = ["запусти", "выполни", "создай", "напиши", "удали", "run", "execute", "create"]
-        for kw in keywords:
-            if kw in task_summary:
-                return "execution"
-        return "conversation"
-
-    async def _classify_intent(self, task_summary: str) -> str:
-        """LLM-based intent classification"""
-        if self._intent_classifier == "orchestrator" and self._orchestrator:
-            classifier_model = self._orchestrator
-        else:
-            classifier_model = self._thinker
-        
-        if not classifier_model:
-            logger.warning("No classifier model available, using default")
-            return "conversation"
-        
-        classification_prompt = f"""Классифицируй запрос пользователя: "{task_summary}"
-
-Классы:
-- execution: чтобы ответить, агенту нужно обратиться к локальной среде, файлам, shell, tools, памяти, сети или выполнить проверку/операцию. Это включает вопросы о текущем состоянии ПК, установленных пакетах, файлах, процессах, времени работы, обновлениях, логах.
-- conversation: можно ответить сразу из диалога и общих знаний, без проверки локальной среды и без tools.
-- clarification_needed: нельзя понять, что именно пользователь хочет.
-
-Верни ровно один токен без рассуждений: execution или conversation или clarification_needed"""
-        
-        try:
-            result = await classifier_model.generate(classification_prompt)
-            classification = self._extract_classification(result)
-            if classification:
-                logger.info(f"Intent classified: {classification} for task: {task_summary}")
-                return classification
-            
-            logger.warning(f"Invalid classification result: {result}, defaulting to conversation")
-            return "conversation"
-        except Exception as e:
-            logger.warning(f"Intent classification failed: {e}, defaulting to conversation")
-            return "conversation"
-
-    def _extract_classification(self, raw_result: str) -> str | None:
-        result = raw_result.strip().lower()
-        allowed = {"execution", "conversation", "clarification_needed"}
-        if result in allowed:
-            return result
-
-        result = re.sub(r"<think>.*?</think>", " ", result, flags=re.DOTALL)
-        if (
-            "shell_exec" in result
-            or "execute command" in result
-            or "command execution" in result
-            or "use the tool" in result
-            or "use a tool" in result
-        ):
-            return "execution"
-        tokens = re.findall(r"\b(execution|conversation|clarification_needed)\b", result)
-        if tokens:
-            return tokens[-1]
-
-        first_word = result.split()[0] if result.split() else ""
-        if first_word in allowed:
-            return first_word
-
-        return None
-
-    def _validate_directive(self, output: str, mode_hint: str) -> ExecutionDirective | None:
-        if not output:
-            return None
-
-        try:
-            json_start = output.find("{")
-            json_end = output.rfind("}") + 1
-            if json_start < 0 or json_end <= 0:
-                return None
-
-            json_str = output[json_start:json_end]
-            data = json.loads(json_str)
-
-            if "type" not in data:
-                return None
-
-            msg_type = data.get("type", "")
-            payload = data.get("payload", {})
-            
-            if msg_type == "step" and "tool" in payload:
-                tool = payload.get("tool", "")
-                args = payload.get("args", {})
-                payload = {"tool": tool, "args": args}
-            
-            if msg_type == "plan":
-                payload = {"steps": payload.get("steps", [])}
-            
-            return ExecutionDirective(
-                type=msg_type,
-                payload=payload,
-                confidence=data.get("confidence", 0.9),
-                reason=data.get("reason", ""),
-            )
-        except (json.JSONDecodeError, ValueError, TypeError) as e:
-            logger.warning(f"Directive JSON validation failed: {e}")
-            return None
-
-    def _guard_rail_check(self, directive: ExecutionDirective) -> ExecutionDirective:
-        tool_name = directive.payload.get("tool", "")
-        if tool_name in {"shell_exec", "file_write", "file_delete"}:
-            return ExecutionDirective(
-                type=directive.type,
-                payload=directive.payload,
-                requires_permission=True,
-                confidence=directive.confidence,
-                reason=directive.reason,
-            )
-        return directive
-
-    def _add_thinker_feedback(self, prompt: str, error: str, attempt: int) -> str:
-        feedback = f"\n[ATTEMPT {attempt + 1} FAILED: {error}]\n"
-        feedback += "Provide a valid semantic plan.\n"
-        return prompt + feedback
-
-    def _emit_event(
-        self,
-        event_type: str,
-        payload: dict[str, Any],
-        task_id: str | None,
-        session_id: str | None,
-    ) -> None:
-        if self._event_bus and task_id:
-            from app.core.contracts import RuntimeEvent
-            event = RuntimeEvent(
-                task_id=task_id,
-                session_id=session_id or "unknown",
-                sequence=self._event_bus.next_sequence(task_id),
-                type=event_type,
-                payload=payload,
-            )
-            self._event_bus.publish(event)
-
-    SYS_UTIL_PROMPT = None
-
-    async def _fix_invalid_json(self, invalid_result: str, attempt: int, task_id: str | None, session_id: str | None) -> str | None:
-        """Try to fix invalid JSON using sys_util model."""
-        if not self._sys_util:
-            return None
-
-        first_brace = invalid_result.find('{')
-        last_brace = invalid_result.rfind('}')
-        if first_brace < 0 or last_brace <= first_brace:
-            return None
-
-        truncated_json = invalid_result[first_brace:last_brace + 1]
-
-        error_msg = ""
-        try:
-            json.loads(truncated_json)
-        except json.JSONDecodeError as e:
-            error_msg = str(e)
-
-        sys_util_prompt = (
-            self._prompts.get("sys_util")
-            if self._prompts
-            else self.SYS_UTIL_PROMPT or (
-                "You are a STRICT JSON repair engine. "
-                "Your job is ONLY to fix invalid JSON syntax. "
-                "You MUST output valid JSON or nothing else."
-            )
-        )
-        fix_prompt = f"""{sys_util_prompt}
-
- {error_msg}
-
- Fixed JSON:"""
-
-        try:
-            logger.info(f"JSON fix using sys_util model (attempt {attempt + 1})")
-            fixed_result = await self._sys_util.generate(fix_prompt)
-
-            fixed_first = fixed_result.find('{')
-            fixed_last = fixed_result.rfind('}')
-            if fixed_first >= 0 and fixed_last > fixed_first:
-                return fixed_result[fixed_first:fixed_last + 1]
-
-            return None
-
-        except Exception as e:
-            logger.warning(f"JSON fix failed: {e}")
-            return None
diff --git a/app/core/command_analyzer.py b/app/core/command_analyzer.py
deleted file mode 100644
index b2a1e4e..0000000
--- a/app/core/command_analyzer.py
+++ /dev/null
@@ -1,60 +0,0 @@
-from __future__ import annotations
-
-import re
-import shlex
-from typing import Any
-
-from app.core.permission_service import PermissionService
-
-
-class CommandAnalyzer:
-    """Deterministic shell action analyzer for structured critic evidence."""
-
-    _SPLIT_RE = re.compile(r"\s*(?:&&|;)\s*")
-
-    def __init__(self, permission_service: PermissionService) -> None:
-        self._permission_service = permission_service
-
-    def analyze(self, command: str, task_id: str, session_id: str) -> dict[str, Any]:
-        segments = [segment.strip() for segment in self._SPLIT_RE.split(command) if segment.strip()]
-        root_required: list[str] = []
-        elevated: list[str] = []
-        unelevated_root: list[str] = []
-
-        for segment in segments:
-            normalized, is_elevated = self._strip_sudo(segment)
-            check = self._permission_service.check_shell_command(
-                task_id=task_id,
-                session_id=session_id,
-                command=normalized,
-            )
-            if check.get("requires_sudo"):
-                root_required.append(normalized)
-                if is_elevated:
-                    elevated.append(normalized)
-                else:
-                    unelevated_root.append(normalized)
-
-        diagnosis_type = "privilege_scope_error" if unelevated_root else "ok"
-        return {
-            "type": diagnosis_type,
-            "command": command,
-            "segments": segments,
-            "root_required_segments": root_required,
-            "elevated_segments": elevated,
-            "unelevated_root_segments": unelevated_root,
-        }
-
-    def _strip_sudo(self, segment: str) -> tuple[str, bool]:
-        try:
-            parts = shlex.split(segment)
-        except ValueError:
-            return segment, segment.strip().startswith("sudo ")
-        if not parts or parts[0] != "sudo":
-            return segment, False
-        index = 1
-        while index < len(parts) and parts[index].startswith("-"):
-            index += 1
-            if index < len(parts) and parts[index - 1] in {"-p", "--prompt"}:
-                index += 1
-        return " ".join(shlex.quote(part) for part in parts[index:]), True
diff --git a/app/core/config.py b/app/core/config.py
deleted file mode 100644
index 2e7090b..0000000
--- a/app/core/config.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from __future__ import annotations
-
-import json
-from pathlib import Path
-from typing import Any
-
-from pydantic import BaseModel, Field
-
-
-class ModelsConfig(BaseModel):
-    orchestrator_path: str = "models/llama.gguf"
-    coder_path: str = "models/xcoder.gguf"
-    critic_path: str = "models/gemma.gguf"
-    embeddings_path: str = "models/all-MiniLM-L6-v2"
-    inference: dict[str, Any] = Field(default_factory=dict)
-    thinker: dict[str, Any] = Field(default_factory=dict)
-    json_compiler: dict[str, Any] = Field(default_factory=dict)
-    orchestrator: dict[str, Any] = Field(default_factory=dict)
-    coder: dict[str, Any] = Field(default_factory=dict)
-    critic: dict[str, Any] = Field(default_factory=dict)
-    sys_util: dict[str, Any] = Field(default_factory=dict)
-    embeddings: dict[str, Any] = Field(default_factory=dict)
-
-
-class PromptsConfig(BaseModel):
-    orchestration_prompt: str = ""
-    planning_prompt: str = ""
-    coder_prompt: str = ""
-    critic_prompt: str = ""
-
-
-class PermissionsConfig(BaseModel):
-    dangerous_commands: dict[str, str] = Field(default_factory=dict)
-    sensitive_paths: list[str] = Field(default_factory=list)
-    default_approval_behavior: str = "ask_always"
-
-
-class RuntimeConfig(BaseModel):
-    step_timeout_ms: int = 30_000
-    task_timeout_ms: int = 300_000
-    shell_command_timeout_ms: int = 3_600_000
-    shell_idle_timeout_ms: int = 600_000
-    planner_retry_limit: int = 2
-    tool_retry_limit: int = 1
-    replan_limit: int = 1
-    max_execution_steps: int = 20
-    retrieval_top_k: int = 5
-    max_context_tokens: int = 8192
-    context_budgets: dict[str, int] = Field(default_factory=lambda: {
-        "system": 512,
-        "task": 512,
-        "memory": 2048,
-        "execution": 2048,
-        "tools": 1024,
-        "safety": 512,
-    })
-    reserve_for_generation_pct: int = 25
-    orchestrator_retry_limit: int = 2
-    intent_classifier: str = "thinker"
-    recall_model: str = "sys_util"
-    memory_thresholds: dict[str, float] = Field(default_factory=dict)
-    critic_fallback_policy: str = "continue_without_critic"
-    checkpoint_policy: dict[str, Any] = Field(default_factory=dict)
-    event_retention_policy: dict[str, Any] = Field(default_factory=dict)
-    streaming_settings: dict[str, Any] = Field(default_factory=dict)
-    debug: bool = False
-    debug_orchestrator_log_length: int = 500
-    json_fix_retry_limit: int = 2
-    json_fix_use_sys_util: bool = True
-    recall_model: str = "json_compiler"
-    critic_retry_limit: int = 2
-
-
-class AppConfig(BaseModel):
-    models: ModelsConfig
-    prompts: PromptsConfig
-    permissions: PermissionsConfig
-    runtime: RuntimeConfig
-
-
-def _load_json(path: Path) -> dict[str, Any]:
-    with path.open("r", encoding="utf-8") as handle:
-        return json.load(handle)
-
-
-def load_app_config(config_dir: str | Path) -> AppConfig:
-    config_path = Path(config_dir)
-    return AppConfig(
-        models=ModelsConfig.model_validate(_load_json(config_path / "models.json")),
-        prompts=PromptsConfig.model_validate(_load_json(config_path / "prompts.json")),
-        permissions=PermissionsConfig.model_validate(_load_json(config_path / "permissions.json")),
-        runtime=RuntimeConfig.model_validate(_load_json(config_path / "runtime.json")),
-    )
diff --git a/app/core/context_builder.py b/app/core/context_builder.py
deleted file mode 100644
index 77f4b32..0000000
--- a/app/core/context_builder.py
+++ /dev/null
@@ -1,172 +0,0 @@
-from __future__ import annotations
-
-import logging
-from typing import Any
-
-from app.core.contracts import TaskCheckpoint, UserTask
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_BUDGETS = {
-    "system": 512,
-    "task": 512,
-    "memory": 2048,
-    "execution": 2048,
-    "tools": 1024,
-    "safety": 512,
-}
-
-
-class ContextBuilder:
-    def __init__(
-        self,
-        memory_interface=None,
-        tool_registry=None,
-        config: dict[str, Any] | None = None,
-    ) -> None:
-        self._memory = memory_interface
-        self._tool_registry = tool_registry
-        self._config = config or {}
-        self._max_tokens = self._config.get("max_context_tokens", 8192)
-        self._budgets = self._config.get("context_budgets", DEFAULT_BUDGETS)
-        self._reserve_pct = self._config.get("reserve_for_generation_pct", 25)
-
-    def build(
-        self,
-        task: UserTask,
-        checkpoint: TaskCheckpoint | None = None,
-        query: str | None = None,
-    ) -> dict[str, Any]:
-        task_summary = task.input
-        search_query = query or task_summary
-        session_id = task.session_id
-
-        memory_context = []
-        if self._memory:
-            memory_context = self._retrieve_memory(search_query, session_id=session_id)
-
-        budgets = self._calculate_budgets()
-        reserved = self._reserve_for_generation()
-
-        system_budget = budgets.get("system", 512)
-        task_budget = budgets.get("task", 512)
-        safety_budget = budgets.get("safety", 512)
-        memory_budget = budgets.get("memory", 2048)
-
-        truncated_memory = self._truncate_memory(
-            memory_context, memory_budget
-        )
-
-        # Get session history for follow-up context
-        session_history = self._get_session_history(session_id)
-
-        context = {
-            "system_prompt": "",
-            "task_summary": task_summary[:task_budget],
-            "task_context": task.context,
-            "memory_context": truncated_memory,
-            "session_history": session_history,
-            "execution_context": checkpoint.model_dump() if checkpoint else {},
-            "tool_context": self._get_tool_context(),
-            "safety_context": {},
-            "constraints": {
-                "budgets": budgets,
-                "reserved_for_generation": reserved,
-                "original_memory_count": len(memory_context),
-                "truncated_memory_count": len(truncated_memory),
-            },
-        }
-
-        return context
-
-    def _get_tool_context(self) -> list[dict[str, Any]]:
-        """Expose available tools to orchestrator."""
-        if not self._tool_registry:
-            return []
-        
-        tools = []
-        for name in self._tool_registry.list_names():
-            tool = self._tool_registry.get(name)
-            tools.append({
-                "name": name,
-                "description": getattr(tool, "description", ""),
-            })
-        return tools
-
-    def _calculate_budgets(self) -> dict[str, int]:
-        return dict(self._budgets)
-
-    def _reserve_for_generation(self) -> int:
-        return int(self._max_tokens * self._reserve_pct / 100)
-
-    def _retrieve_memory(
-        self,
-        query: str,
-        session_id: str | None = None,
-        top_k: int = 5,
-    ) -> list[dict[str, Any]]:
-        if not self._memory:
-            return []
-
-        try:
-            results = self._memory.search(query, top_k=top_k, session_id=session_id)
-            return [
-                {
-                    "id": entry.id,
-                    "text": entry.text,
-                    "kind": entry.kind,
-                    "source": entry.source,
-                    "weight": entry.weight,
-                    "score": score,
-                }
-                for entry, score in results
-            ]
-        except Exception as e:
-            logger.warning(f"Memory retrieval failed: {e}")
-            return []
-
-    def _get_session_history(self, session_id: str | None = None) -> list[dict[str, Any]]:
-        """Get previous task summaries from the same session for context."""
-        if not self._memory or not session_id:
-            return []
-
-        try:
-            # Get recent entries from same session
-            entries = self._memory.get_by_session(session_id, limit=5)
-            # Filter to only task summaries
-            summaries = [
-                {
-                    "id": entry.id,
-                    "text": entry.text,
-                    "kind": entry.kind,
-                    "source": entry.source,
-                    "weight": entry.weight,
-                }
-                for entry in entries
-                if entry.kind in ("summary", "tool_result")
-            ]
-            return summaries
-        except Exception as e:
-            logger.warning(f"Session history retrieval failed: {e}")
-            return []
-
-    def _truncate_memory(
-        self,
-        memory_context: list[dict[str, Any]],
-        budget: int,
-    ) -> list[dict[str, Any]]:
-        if not memory_context:
-            return []
-
-        estimated_per_entry = 50
-        max_entries = max(budget // estimated_per_entry, 1)
-
-        if len(memory_context) > max_entries:
-            return memory_context[:max_entries]
-
-        return memory_context
-
-    def estimate_tokens(self, text: str) -> int:
-        if not text:
-            return 0
-        return len(text.split()) * 4 // 3
\ No newline at end of file
diff --git a/app/core/contracts.py b/app/core/contracts.py
deleted file mode 100644
index 2a1baa8..0000000
--- a/app/core/contracts.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from __future__ import annotations
-
-from datetime import datetime, timezone
-from typing import Any, Literal
-from uuid import uuid4
-
-from pydantic import BaseModel, Field
-
-
-def utc_now() -> datetime:
-    return datetime.now(timezone.utc)
-
-
-class UserTask(BaseModel):
-    task_id: str = Field(default_factory=lambda: str(uuid4()))
-    session_id: str = Field(default_factory=lambda: str(uuid4()))
-    input: str
-    context: dict[str, Any] = Field(default_factory=dict)
-    created_at: datetime = Field(default_factory=utc_now)
-
-
-class PlanStep(BaseModel):
-    id: str
-    kind: Literal["tool", "coder", "memory", "respond"]
-    tool: str | None = None
-    args: dict[str, Any] = Field(default_factory=dict)
-    description: str
-    requires_confirmation: bool = False
-    depends_on: list[str] = Field(default_factory=list)
-
-
-class ToolCall(BaseModel):
-    tool: str
-    args: dict[str, Any] = Field(default_factory=dict)
-    task_id: str
-    step_id: str
-
-
-class ToolResult(BaseModel):
-    tool: str
-    ok: bool
-    output: Any = None
-    error: str | None = None
-    metadata: dict[str, Any] = Field(default_factory=dict)
-
-
-class CoderRequest(BaseModel):
-    mode: Literal["generate", "fix", "refactor"]
-    instruction: str
-    context: dict[str, Any] = Field(default_factory=dict)
-    task_id: str
-
-
-class CriticScore(BaseModel):
-    correctness: float = Field(ge=0.0, le=1.0)
-    usefulness: float = Field(ge=0.0, le=1.0)
-    safety: float = Field(ge=0.0, le=1.0)
-    memory_store: bool
-    weight: float = Field(ge=0.0, le=1.0)
-    explanation: str
-
-
-class MemoryEntry(BaseModel):
-    id: str = Field(default_factory=lambda: str(uuid4()))
-    text: str
-    kind: Literal["tool_result", "plan", "critique", "fact", "summary", "user_preference"]
-    source: Literal["tool", "critic", "user", "system"]
-    weight: float = Field(ge=0.0, le=1.0)
-    task_id: str | None = None
-    session_id: str | None = None
-    metadata: dict[str, Any] = Field(default_factory=dict)
-    created_at: datetime = Field(default_factory=utc_now)
-    embedding_model: str
-    embedding_dim: int
-
-
-class PermissionDecision(BaseModel):
-    action_type: str
-    pattern: str
-    decision: Literal["allow_once", "allow_always", "deny", "ask_always"]
-    created_at: datetime = Field(default_factory=utc_now)
-
-
-class RuntimeEvent(BaseModel):
-    event_id: str = Field(default_factory=lambda: str(uuid4()))
-    task_id: str
-    session_id: str
-    sequence: int
-    type: str
-    timestamp: datetime = Field(default_factory=utc_now)
-    payload: dict[str, Any] = Field(default_factory=dict)
-    causation_id: str | None = None
-    correlation_id: str = Field(default_factory=lambda: str(uuid4()))
-
-
-class TaskCheckpoint(BaseModel):
-    task_id: str
-    status: str
-    active_step_id: str | None = None
-    plan_snapshot: dict[str, Any] = Field(default_factory=dict)
-    context_snapshot: dict[str, Any] = Field(default_factory=dict)
-    updated_at: datetime = Field(default_factory=utc_now)
-
-
-class PermissionRequest(BaseModel):
-    task_id: str
-    session_id: str
-    action_type: str
-    pattern: str
-    command: str | None = None
-    path: str | None = None
-    requires_password: bool = False
-
-
-class SecretRequest(BaseModel):
-    task_id: str
-    session_id: str
-    kind: str
-    prompt: str
-    command: str | None = None
-
-
-class PasswordRequest(BaseModel):
-    task_id: str
-    session_id: str
-    command: str
-    reason: str
-    attempts: int = 0
-    max_attempts: int = 3
-
-
-class ExecutionDirective(BaseModel):
-    type: Literal[
-        "plan",
-        "tool",
-        "coder",
-        "respond",
-        "replan",
-        "store_memory",
-        "request_permission",
-        "complete",
-        "fail",
-        "noop",
-    ]
-    payload: dict[str, Any] = Field(default_factory=dict)
-    requires_permission: bool = False
-    confidence: float = Field(ge=0.0, le=1.0, default=0.0)
-    reason: str = ""
diff --git a/app/core/execution_engine.py b/app/core/execution_engine.py
deleted file mode 100644
index cb0d8c1..0000000
--- a/app/core/execution_engine.py
+++ /dev/null
@@ -1,975 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import json
-import logging
-from typing import Any
-
-from app.core.contracts import (
-    CriticScore,
-    ExecutionDirective,
-    PermissionDecision,
-    PermissionRequest,
-    RuntimeEvent,
-    SecretRequest,
-    ToolCall,
-    ToolResult,
-    UserTask,
-)
-from app.core.command_analyzer import CommandAnalyzer
-from app.core.execution_scheduler import ExecutionScheduler
-from app.events.event_bus import EventBus
-from app.events.event_types import (
-    CRITIC_CALLED,
-    CRITIC_RESULT,
-    PERMISSION_REQUESTED,
-    PERMISSION_RESOLVED,
-    PLAN_FAILED,
-    PLAN_STARTED,
-    SECRET_REQUESTED,
-    STEP_STARTED,
-    STEPPED_COMPLETED,
-    TOOL_CALLED,
-    TOOL_COMPLETED,
-    TOOL_OUTPUT_CHUNK,
-)
-from app.models.async_adapters import AsyncCriticAdapter, AsyncCoderAdapter
-from app.memory.write_policy import MemoryWritePolicy
-from app.memory.interface import MemoryInterface
-
-logger = logging.getLogger(__name__)
-
-
-class ExecutionEngine:
-    def __init__(
-        self,
-        event_bus: EventBus,
-        tool_registry,
-        permission_service,
-        scheduler: ExecutionScheduler | None = None,
-        critic: AsyncCriticAdapter | None = None,
-        memory_policy: MemoryWritePolicy | None = None,
-        memory_interface: MemoryInterface | None = None,
-        prompts: dict[str, str] | None = None,
-        recovery_limit: int = 1,
-        critic_retry_limit: int = 2,
-        command_analyzer: CommandAnalyzer | None = None,
-    ) -> None:
-        self._event_bus = event_bus
-        self._tool_registry = tool_registry
-        self._permission_service = permission_service
-        self._scheduler = scheduler or ExecutionScheduler()
-        self._critic = critic
-        self._coder: AsyncCoderAdapter | None = None
-        self._memory_policy = memory_policy
-        self._memory_interface = memory_interface
-        self._prompts = prompts or {}
-        self._recovery_limit = recovery_limit
-        self._critic_retry_limit = critic_retry_limit
-        self._command_analyzer = command_analyzer
-
-    def set_critic(self, critic: AsyncCriticAdapter) -> None:
-        self._critic = critic
-
-    def set_coder(self, coder: AsyncCoderAdapter) -> None:
-        self._coder = coder
-
-    def set_memory_policy(self, policy: MemoryWritePolicy) -> None:
-        self._memory_policy = policy
-
-    def execute(
-        self,
-        task: UserTask,
-        directive: ExecutionDirective,
-        permission_override: PermissionDecision | None = None,
-        secret_override: str | None = None,
-        password_override: str | None = None,
-    ) -> dict[str, Any]:
-        scheduled = self._scheduler.next_directive(directive)
-        self._publish(task, STEP_STARTED, {"directive_type": scheduled.type})
-
-        if scheduled.type == "plan":
-            return self._execute_plan(
-                task=task,
-                directive=scheduled,
-                permission_override=permission_override,
-                secret_override=secret_override,
-                password_override=password_override,
-            )
-
-        if scheduled.type == "tool":
-            return self._execute_tool(
-                task=task,
-                directive=scheduled,
-                permission_override=permission_override,
-                secret_override=secret_override,
-                password_override=password_override,
-            )
-
-        if scheduled.type == "respond":
-            return {
-                "status": "completed",
-                "result": {
-                    "message": scheduled.payload.get("text", f"Runtime accepted task: {task.input}"),
-                    "mode": scheduled.payload.get("mode", "direct_response"),
-                },
-                "directive": scheduled.model_dump(mode="json"),
-            }
-
-        if scheduled.type == "coder":
-            return self._execute_coder(
-                task=task,
-                directive=scheduled,
-            )
-
-        if scheduled.type == "fail":
-            return {
-                "status": "failed",
-                "result": {"error": scheduled.reason or "Execution failed."},
-            }
-
-        return {
-            "status": "completed",
-            "result": {
-                "message": "Directive accepted.",
-                "directive_type": scheduled.type,
-            },
-        }
-
-    def _execute_plan(
-        self,
-        task: UserTask,
-        directive: ExecutionDirective,
-        permission_override: PermissionDecision | None = None,
-        secret_override: str | None = None,
-        password_override: str | None = None,
-    ) -> dict[str, Any]:
-        # Unified format: {"type": "plan", "payload": {"steps": [...]}}
-        # Need to extract steps from nested payload
-        import json
-        
-        payload = directive.payload
-        steps_data = []
-        
-        # If payload has "steps" directly, use them
-        if "steps" in payload:
-            steps_data = payload.get("steps", [])
-        # If payload is a string (JSON), parse it
-        elif isinstance(payload, str) and payload.strip().startswith("{"):
-            try:
-                parsed = json.loads(payload)
-                steps_data = parsed.get("payload", {}).get("steps", [])
-            except:
-                steps_data = []
-        
-        if steps_data:
-            plan_json = json.dumps({"type": "plan", "payload": {"steps": steps_data}})
-        else:
-            plan_json = json.dumps(payload)
-
-        plan_steps = self._scheduler.parse_plan_steps(plan_json, task.task_id)
-
-        if not plan_steps:
-            return {
-                "status": "failed",
-                "result": {"error": "Failed to parse plan steps from directive"},
-            }
-
-        if not self._scheduler.validate_no_cycles(plan_steps):
-            self._publish(task, PLAN_FAILED, {"error": "Cycle detected in plan"})
-            return {
-                "status": "failed",
-                "result": {"error": "Cycle detected in plan"},
-            }
-
-        graph = self._scheduler.build_task_graph(plan_steps)
-        self._publish(task, PLAN_STARTED, {"steps": len(plan_steps)})
-
-        completed_steps: set[str] = set()
-        step_results: list[dict[str, Any]] = []
-        critic_retries_used = 0  # Track critic→replan cycles
-
-        ready_steps = self._get_ready_steps(graph, completed_steps)
-
-        while ready_steps:
-            step = ready_steps.pop(0)
-
-            # Handle respond kind directly without tool execution
-            if step.kind == "respond":
-                result = {
-                    "status": "completed",
-                    "result": {
-                        "message": step.args.get("text", step.description),
-                    },
-                }
-            else:
-                step_directive = ExecutionDirective(
-                    type=step.kind,
-                    payload={
-                        "tool": step.tool,
-                        "args": step.args,
-                    },
-                    requires_permission=step.requires_confirmation,
-                    reason=step.description,
-                )
-
-                result = self._execute_tool(
-                    task=task,
-                    directive=step_directive,
-                    permission_override=permission_override,
-                    secret_override=secret_override,
-                    password_override=password_override,
-                )
-
-            # If tool needs human input/review - return immediately.
-            if result.get("status") in (
-                "awaiting_permission",
-                "awaiting_input",
-                "awaiting_password",
-                "awaiting_review",
-            ):
-                return {
-                    "status": result.get("status"),
-                    "result": result.get("result", {}),
-                    "step_results": step_results,
-                }
-
-            step_results.append({
-                "step_id": step.id,
-                "result": result,
-            })
-
-            completed_steps.add(step.id)
-            self._publish(task, STEPPED_COMPLETED, {
-                "step_id": step.id,
-                "status": result.get("status"),
-            })
-
-            # === Critic evaluation ===
-            if self._critic and result.get("status") == "completed":
-                critic_score = self._evaluate_with_critic(task, step, result)
-                if critic_score:
-                    result["critic_score"] = {
-                        "correctness": critic_score.correctness,
-                        "usefulness": critic_score.usefulness,
-                        "safety": critic_score.safety,
-                        "memory_store": critic_score.memory_store,
-                        "weight": critic_score.weight,
-                        "explanation": critic_score.explanation,
-                    }
-                    self._save_critique_to_memory(task, step, critic_score)
-
-                    # Check if step result is satisfactory
-                    min_correctness = 0.5
-                    if critic_score.correctness < min_correctness:
-                        # Step failed critic check — try to recover
-                        if critic_retries_used < self._critic_retry_limit and step.kind != "respond":
-                            critic_retries_used += 1
-                            self._publish(task, CRITIC_RESULT, {
-                                "step_id": step.id,
-                                "score": critic_score.model_dump(mode="json"),
-                                "action": "retry",
-                                "retry": critic_retries_used,
-                            })
-                            # Retry the same step — rebuild directive
-                            retry_directive = ExecutionDirective(
-                                type=step.kind,
-                                payload={"tool": step.tool, "args": step.args},
-                                requires_permission=step.requires_confirmation,
-                                reason=step.description,
-                            )
-                            retry_result = self._execute_tool(
-                                task=task,
-                                directive=retry_directive,
-                                permission_override=permission_override,
-                                secret_override=secret_override,
-                                password_override=password_override,
-                            )
-                            if retry_result.get("status") == "completed":
-                                result = retry_result
-                                step_results[-1]["result"] = result
-                                # Re-evaluate after retry
-                                critic_score2 = self._evaluate_with_critic(task, step, result)
-                                if critic_score2 and critic_score2.correctness >= min_correctness:
-                                    # Retry succeeded
-                                    continue
-                            # If retry also failed, continue to next step
-                        else:
-                            self._publish(task, CRITIC_RESULT, {
-                                "step_id": step.id,
-                                "score": critic_score.model_dump(mode="json"),
-                                "action": "give_up",
-                                "reason": f"Critic retry limit ({self._critic_retry_limit}) reached",
-                            })
-
-            # Handle failed step
-            if result.get("status") == "failed":
-                review = self._build_failed_step_review(task, step, result)
-                if review:
-                    return {
-                        "status": "awaiting_review",
-                        "result": {
-                            "error": f"Step {step.id} requires review before replanning",
-                            "failed_step": step.id,
-                            "step_results": step_results,
-                            "review": review,
-                        },
-                    }
-                recovery = self._recover_failed_step(
-                    task=task,
-                    step=step,
-                    result=result,
-                    step_results=step_results,
-                    permission_override=permission_override,
-                    secret_override=secret_override,
-                    password_override=password_override,
-                )
-                if recovery.get("status") == "awaiting_permission":
-                    return recovery
-                if recovery.get("status") == "completed":
-                    recovered_result = recovery.get("result")
-                    if recovered_result:
-                        step_results[-1]["result"] = recovered_result
-                    if recovery.get("finish"):
-                        return {
-                            "status": "completed",
-                            "result": {
-                                "message": recovery.get("message", "Recovered from failed step"),
-                                "step_results": step_results,
-                            },
-                        }
-                else:
-                    return {
-                        "status": "failed",
-                        "result": {
-                            "error": f"Step {step.id} failed",
-                            "failed_step": step.id,
-                            "step_results": step_results,
-                            "recovery": recovery.get("result"),
-                        },
-                    }
-
-            ready_steps = self._get_ready_steps(graph, completed_steps)
-
-        return {
-            "status": "completed",
-            "result": {
-                "message": f"Plan executed: {len(completed_steps)} steps completed",
-                "step_results": step_results,
-            },
-        }
-
-    def _build_failed_step_review(self, task: UserTask, step, result: dict[str, Any]) -> dict[str, Any] | None:
-        if step.tool != "shell_exec" or not self._command_analyzer:
-            return None
-        command = str((step.args or {}).get("command", ""))
-        if not command:
-            return None
-        diagnosis = self._command_analyzer.analyze(
-            command=command,
-            task_id=task.task_id,
-            session_id=task.session_id,
-        )
-        if diagnosis.get("type") == "ok":
-            return None
-        return {
-            "step_id": step.id,
-            "tool": step.tool,
-            "command": command,
-            "diagnosis": diagnosis,
-            "critic_assessment": {
-                "classification": "model_planning_error",
-                "needs_replan": True,
-                "explanation": "Structured command analysis found a model action error before recovery.",
-            },
-        }
-
-    def _recover_failed_step(
-        self,
-        task: UserTask,
-        step,
-        result: dict[str, Any],
-        step_results: list[dict[str, Any]],
-        permission_override: PermissionDecision | None = None,
-        secret_override: str | None = None,
-        password_override: str | None = None,
-    ) -> dict[str, Any]:
-        if self._recovery_limit <= 0 or not self._critic:
-            return {"status": "failed", "result": {"reason": "recovery_unavailable"}}
-
-        decision = self._evaluate_recovery(task, step, result, step_results)
-        action = decision.get("action", "fail")
-
-        if action == "continue":
-            recovered = dict(result)
-            recovered["status"] = "completed"
-            recovered["recovery_decision"] = decision
-            return {"status": "completed", "result": recovered}
-
-        if action == "respond":
-            recovered = dict(result)
-            recovered["status"] = "completed"
-            recovered["recovery_decision"] = decision
-            return {
-                "status": "completed",
-                "result": recovered,
-                "finish": True,
-                "message": decision.get("message") or decision.get("reason") or "Recovered by responding to user",
-            }
-
-        if action == "retry":
-            retry_tool = decision.get("tool") or step.tool
-            retry_args = decision.get("args") or step.args
-            retry_result = self._execute_tool(
-                task=task,
-                directive=ExecutionDirective(
-                    type="tool",
-                    payload={"tool": retry_tool, "args": retry_args},
-                    requires_permission=True,
-                    reason=decision.get("reason", "Recovery retry"),
-                ),
-                permission_override=permission_override,
-                secret_override=secret_override,
-                password_override=password_override,
-            )
-            if retry_result.get("status") == "awaiting_permission":
-                return retry_result
-            retry_result["recovery_decision"] = decision
-            if retry_result.get("status") == "completed":
-                return {"status": "completed", "result": retry_result}
-            return {"status": "failed", "result": {"decision": decision, "retry_result": retry_result}}
-
-        return {"status": "failed", "result": decision}
-
-    def _evaluate_recovery(
-        self,
-        task: UserTask,
-        step,
-        result: dict[str, Any],
-        step_results: list[dict[str, Any]],
-    ) -> dict[str, Any]:
-        prompt = self._build_recovery_prompt(task, step, result, step_results)
-        self._publish(task, CRITIC_CALLED, {"step_id": step.id, "mode": "recovery"})
-
-        try:
-            output = asyncio.run(self._critic.generate(prompt, max_tokens=512))
-            decision = self._parse_recovery_decision(output)
-            self._publish(task, CRITIC_RESULT, {
-                "step_id": step.id,
-                "mode": "recovery",
-                "decision": decision,
-                "raw": output,
-            })
-            return decision
-        except Exception as e:
-            logger.warning(f"Recovery evaluation failed: {e}")
-            self._publish(task, CRITIC_RESULT, {
-                "step_id": step.id,
-                "mode": "recovery",
-                "error": str(e),
-            })
-            return {"action": "fail", "reason": str(e)}
-
-    def _build_recovery_prompt(
-        self,
-        task: UserTask,
-        step,
-        result: dict[str, Any],
-        step_results: list[dict[str, Any]],
-    ) -> str:
-        return f"""You are a recovery controller for an agent runtime.
-
-Decide what to do after a failed tool step. A non-zero exit code is not always fatal.
-Interpret the failure in context.
-
-Allowed actions:
-- continue: failure is acceptable information; continue the plan.
-- retry: try one alternative tool call. Include "tool" and "args".
-- respond: stop and answer the user with available information. Include "message".
-- fail: real failure; stop the task.
-
-Return ONLY JSON:
-{{"action":"continue|retry|respond|fail","reason":"...","tool":"shell_exec","args":{{...}},"message":"..."}}
-
-Task:
-{task.input}
-
-Failed step:
-id={step.id}
-tool={step.tool}
-args={json.dumps(step.args, ensure_ascii=False)}
-description={step.description}
-
-Failed result:
-{json.dumps(result, ensure_ascii=False, indent=2)}
-
-Previous step results:
-{json.dumps(step_results, ensure_ascii=False, indent=2)}
-"""
-
-    def _parse_recovery_decision(self, output: str) -> dict[str, Any]:
-        try:
-            json_start = output.find("{")
-            json_end = output.rfind("}") + 1
-            if json_start < 0 or json_end <= 0:
-                return {"action": "fail", "reason": "Recovery output was not JSON"}
-            data = json.loads(output[json_start:json_end])
-            action = data.get("action", "fail")
-            if action not in {"continue", "retry", "respond", "fail"}:
-                action = "fail"
-            data["action"] = action
-            return data
-        except (json.JSONDecodeError, TypeError, ValueError) as e:
-            return {"action": "fail", "reason": f"Recovery JSON parse failed: {e}"}
-
-    def _get_ready_steps(
-        self,
-        graph: dict[str, Any],
-        completed: set[str],
-    ) -> list:
-        if not graph or not graph.get("nodes"):
-            return []
-
-        step_map: dict = graph.get("step_map", {})
-        ready = []
-
-        for node in graph["nodes"]:
-            node_id = node["id"]
-            if node_id in completed:
-                continue
-
-            deps = node.get("depends_on", [])
-            if all(dep in completed for dep in deps):
-                step = step_map.get(node_id)
-                if step:
-                    ready.append(step)
-
-        return ready
-
-    def _evaluate_with_critic(
-        self,
-        task: UserTask,
-        step,
-        result: dict[str, Any],
-    ) -> CriticScore | None:
-        if not self._critic:
-            return None
-
-        critic_prompt = self._build_critic_prompt(step, result)
-
-        self._publish(task, CRITIC_CALLED, {"step_id": step.id})
-
-        try:
-            critic_output = asyncio.run(self._critic.generate(critic_prompt))
-            score = self._parse_critic_score(critic_output)
-
-            self._publish(task, CRITIC_RESULT, {
-                "step_id": step.id,
-                "score": score.model_dump(mode="json") if score else None,
-            })
-
-            if score:
-                result["critic_score"] = {
-                    "correctness": score.correctness,
-                    "usefulness": score.usefulness,
-                    "safety": score.safety,
-                    "memory_store": score.memory_store,
-                    "weight": score.weight,
-                    "explanation": score.explanation,
-                }
-
-            return score
-
-        except Exception as e:
-            logger.warning(f"Critic evaluation failed: {e}")
-            self._publish(task, CRITIC_RESULT, {
-                "step_id": step.id,
-                "error": str(e),
-            })
-            return None
-
-    def _save_critique_to_memory(
-        self,
-        task: UserTask,
-        step,
-        score: CriticScore,
-    ) -> None:
-        """Save critic evaluation as critique entry in memory, using MemoryWritePolicy."""
-        if not self._memory_interface:
-            return
-
-        try:
-            # Check with policy before saving
-            if self._memory_policy:
-                decision = self._memory_policy.decide(
-                    critic_score=score,
-                    memory_type="critique",
-                    session_id=task.session_id,
-                )
-                if decision == "skip":
-                    logger.info(f"MemoryWritePolicy skipped critique for {step.tool}")
-                    return
-                # For "store_with_weight", we could adjust weight, but critic score already has weight
-
-            tool_name = step.tool
-            tool_args = step.args or {}
-            args_str = ", ".join([f"{k}={v}" for k, v in tool_args.items()])
-
-            critique_text = f"Tool: {tool_name}({args_str}) | Task: {task.input[:100]} | Scores: correctness={score.correctness}, usefulness={score.usefulness}, safety={score.safety} | {score.explanation}"
-
-            metadata = {
-                "task_input": task.input,
-                "tool": tool_name,
-                "args": tool_args,
-                "step_id": step.id,
-                "scores": {
-                    "correctness": score.correctness,
-                    "usefulness": score.usefulness,
-                    "safety": score.safety,
-                },
-            }
-
-            self._memory_interface.insert(
-                text=critique_text,
-                kind="critique",
-                source="critic",
-                task_id=task.task_id,
-                session_id=task.session_id,
-                weight=score.weight,
-                metadata=metadata,
-            )
-            logger.info(f"Saved critique to memory: {tool_name} task_id={task.task_id}")
-
-        except Exception as e:
-            logger.warning(f"Failed to save critique to memory: {e}")
-
-    def _build_critic_prompt(self, step, result: dict[str, Any]) -> str:
-        base_prompt = self._prompts.get("critic", "")
-        tool_result = result.get("result", {})
-
-        # Truncate long outputs to avoid exceeding context window
-        # Keep output under ~2000 chars to leave room for prompt + generation
-        output = tool_result.get("output", "")
-        if isinstance(output, str) and len(output) > 2000:
-            output = output[:2000] + "\n... [truncated]"
-        elif not isinstance(output, str):
-            output_str = json.dumps(output, ensure_ascii=False)
-            if len(output_str) > 2000:
-                output = output_str[:2000] + "\n... [truncated]"
-            else:
-                output = output_str
-
-        # Build a compact result representation
-        compact_result = {
-            "ok": tool_result.get("ok"),
-            "output": output,
-            "error": tool_result.get("error"),
-            "exit_code": tool_result.get("metadata", {}).get("exit_code"),
-        }
-
-        return f"""{base_prompt}
-
-Step: {step.description}
-Tool: {step.tool}
-Args: {step.args}
-
-Result:
-{json.dumps(compact_result, indent=2, ensure_ascii=False)}
-
-Evaluate and respond with JSON:
-{{"correctness": 0.0-1.0, "usefulness": 0.0-1.0, "safety": 0.0-1.0, "memory_store": true|false, "weight": 0.0-1.0, "explanation": "..."}}"""
-
-    def _parse_critic_score(self, output: str) -> CriticScore | None:
-        try:
-            json_start = output.find("{")
-            json_end = output.rfind("}") + 1
-            if json_start < 0:
-                return None
-
-            json_str = output[json_start:json_end]
-            data = json.loads(json_str)
-
-            return CriticScore(
-                correctness=data.get("correctness", 0.5),
-                usefulness=data.get("usefulness", 0.5),
-                safety=data.get("safety", 1.0),
-                memory_store=data.get("memory_store", False),
-                weight=data.get("weight", 0.5),
-                explanation=data.get("explanation", ""),
-            )
-
-        except (json.JSONDecodeError, ValueError, TypeError) as e:
-            logger.warning(f"Critic score parsing failed: {e}")
-            return None
-
-    def _execute_coder(
-        self,
-        task: UserTask,
-        directive: ExecutionDirective,
-    ) -> dict[str, Any]:
-        if not self._coder:
-            return {"status": "failed", "result": {"error": "Coder model not available"}}
-
-        coder_task = directive.payload.get("task", "")
-        if not coder_task:
-            return {"status": "failed", "result": {"error": "Missing task for coder"}}
-
-        try:
-            output = asyncio.run(self._coder.generate(coder_task))
-
-            return {
-                "status": "completed",
-                "result": {"code": output},
-            }
-        except Exception as e:
-            logger.warning(f"Coder execution failed: {e}")
-            return {"status": "failed", "result": {"error": str(e)}}
-
-    def _execute_tool(
-        self,
-        task: UserTask,
-        directive: ExecutionDirective,
-        permission_override: PermissionDecision | None = None,
-        secret_override: str | None = None,
-        password_override: str | None = None,
-    ) -> dict[str, Any]:
-        tool_name = str(directive.payload.get("tool", "")).strip()
-        tool_args = dict(directive.payload.get("args", {}))
-
-        if password_override:
-            tool_args["password"] = password_override
-
-        if not tool_name:
-            return {"status": "failed", "result": {"error": "Missing tool name"}}
-
-        # Tool-first: validate tool exists in registry
-        available_tools = self._tool_registry.list_names()
-        if tool_name not in available_tools:
-            return {"status": "failed", "result": {"error": f"Unknown tool: {tool_name}. Available tools: {available_tools}"}}
-
-        permission_result = None
-
-        # If permission_override is provided, skip permission check
-        if permission_override is not None:
-            permission_result = {
-                "decision": permission_override.decision,
-                "command": tool_args.get("command", ""),
-                "cached": True,
-            }
-        # Check permission for shell_exec and file_write
-        elif tool_name == "shell_exec":
-            permission_result = self._permission_service.check_shell_command(
-                task_id=task.task_id,
-                session_id=task.session_id,
-                command=str(tool_args.get("command", "")),
-            )
-        elif tool_name == "file_write":
-            # Allow writing to runtime data directory without permission check
-            write_path = str(tool_args.get("path", ""))
-            if "allowed_commands.json" in write_path or "/data/runtime" in write_path:
-                # Internal system write - allow without permission
-                permission_result = {"decision": "allowed", "path": write_path}
-            else:
-                permission_result = self._permission_service.check_write_path(
-                    task_id=task.task_id,
-                    session_id=task.session_id,
-                    path=write_path,
-                )
-
-        # Handle permission result
-        if permission_result:
-            decision = permission_result.get("decision", "unknown")
-            
-            # Hard stop - deny execution
-            if decision == "hard_stop":
-                self._publish(task, PERMISSION_REQUESTED, permission_result)
-                return {
-                    "status": "failed",
-                    "result": {
-                        "error": f"Command blocked: {permission_result.get('reason', 'Hard stop command')}",
-                        "command": permission_result.get("command", ""),
-                    },
-                }
-            
-            # Cached - already allowed
-            if decision in ("allowed_always", "allowed") or permission_result.get("cached"):
-                self._publish(task, PERMISSION_RESOLVED, permission_result)
-            
-            # Need user confirmation - return immediately, don't continue execution
-            elif decision == "prompt":
-                self._publish(task, PERMISSION_REQUESTED, permission_result)
-                return {
-                    "status": "awaiting_permission",
-                    "result": {
-                        "error": "Permission required before execution.",
-                        "permission_request": permission_result,
-                    },
-                }
-            
-            # Hard stop - return immediately
-            elif decision == "deny":
-                self._publish(task, PERMISSION_RESOLVED, permission_result)
-                return {
-                    "status": "failed",
-                    "result": {
-                        "error": "Permission denied",
-                        "command": permission_result.get("command", ""),
-                    },
-                }
-            
-            # Deny
-            elif decision == "deny":
-                self._publish(task, PERMISSION_RESOLVED, permission_result)
-                return {
-                    "status": "failed",
-                    "result": {
-                        "error": "Permission denied",
-                        "command": permission_result.get("command", ""),
-                    },
-                }
-
-        if tool_name == "shell_exec":
-            command = str(tool_args.get("command", ""))
-
-            # Determine if sudo password is needed:
-            # 1. Command explicitly starts with "sudo"
-            # 2. Command is a known sudo-requiring command (apt, systemctl, etc.) — flagged by permission service
-            needs_password = command.startswith("sudo ") or (permission_result is not None and permission_result.get("requires_sudo", False))
-
-            if needs_password and secret_override is None:
-                secret_request = SecretRequest(
-                    task_id=task.task_id,
-                    session_id=task.session_id,
-                    kind="sudo_password",
-                    prompt="Sudo password required",
-                    command=command,
-                )
-                self._publish(task, SECRET_REQUESTED, secret_request.model_dump(mode="json"))
-                return {
-                    "status": "awaiting_input",
-                    "result": {
-                        "error": "Secret required",
-                        "secret_request": secret_request.model_dump(mode="json"),
-                    },
-                }
-            if needs_password and secret_override is not None:
-                # Inject sudo -S for explicit sudo commands, or prepend sudo -S for implicit ones
-                if command.startswith("sudo "):
-                    tool_args["command"] = f"sudo -S -p '' {command[len('sudo '):]}"
-                else:
-                    tool_args["command"] = f"sudo -S -p '' {command}"
-                tool_args["stdin_secret"] = f"{secret_override}\n"
-
-        tool_call = ToolCall(
-            tool=tool_name,
-            args=tool_args,
-            task_id=task.task_id,
-            step_id="step-1",
-        )
-        self._publish(task, TOOL_CALLED, tool_call.model_dump(mode="json"))
-        if tool_name == "shell_exec":
-            tool_args["__output_callback"] = lambda stream, chunk: self._publish(
-                task,
-                TOOL_OUTPUT_CHUNK,
-                {
-                    "tool": tool_name,
-                    "step_id": "step-1",
-                    "stream": stream,
-                    "chunk": chunk,
-                },
-            )
-        tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args)
-        self._publish(task, TOOL_COMPLETED, tool_result.model_dump(mode="json"))
-
-        metadata = tool_result.metadata or {}
-        needs_sudo = metadata.get("needs_sudo", False)
-        sudo_auth_failed = metadata.get("sudo_auth_failed", False) or self._looks_like_sudo_auth_failure(tool_result)
-
-        if tool_name == "shell_exec" and not tool_result.ok and sudo_auth_failed:
-            original_command = str(directive.payload.get("args", {}).get("command", tool_args.get("command", "")))
-            secret_request = SecretRequest(
-                task_id=task.task_id,
-                session_id=task.session_id,
-                kind="sudo_password",
-                prompt="Sudo password incorrect. Try again",
-                command=original_command,
-            )
-            self._publish(task, SECRET_REQUESTED, secret_request.model_dump(mode="json"))
-            return {
-                "status": "awaiting_input",
-                "result": {
-                    "error": "Sudo password failed",
-                    "secret_request": secret_request.model_dump(mode="json"),
-                    "attempt_failed": True,
-                    "tool_result": tool_result.model_dump(mode="json"),
-                },
-            }
-        
-        if not tool_result.ok and needs_sudo:
-            return {
-                "status": "awaiting_password",
-                "result": {
-                    "task_id": task.task_id,
-                    "needs_sudo": True,
-                    "command": tool_args.get("command", ""),
-                    "error": tool_result.error or "Permission denied",
-                    "tool_result": tool_result.model_dump(mode="json"),
-                },
-            }
-
-        if tool_name == "shell_exec" and not tool_result.ok and self._command_analyzer:
-            original_command = str(directive.payload.get("args", {}).get("command", tool_args.get("command", "")))
-            diagnosis = self._command_analyzer.analyze(
-                command=original_command,
-                task_id=task.task_id,
-                session_id=task.session_id,
-            )
-            if diagnosis.get("type") != "ok":
-                return {
-                    "status": "awaiting_review",
-                    "result": {
-                        "error": "Tool action requires review before replanning",
-                        "review": {
-                            "step_id": "step-1",
-                            "tool": tool_name,
-                            "command": original_command,
-                            "diagnosis": diagnosis,
-                            "critic_assessment": {
-                                "classification": "model_planning_error",
-                                "needs_replan": True,
-                                "explanation": "Structured command analysis found a model action error before recovery.",
-                            },
-                        },
-                        "tool_result": tool_result.model_dump(mode="json"),
-                    },
-                }
-
-        return {
-            "status": "completed" if tool_result.ok else "failed",
-            "result": tool_result.model_dump(mode="json"),
-        }
-
-    def _looks_like_sudo_auth_failure(self, tool_result: ToolResult) -> bool:
-        output = f"{tool_result.output or ''}\n{tool_result.error or ''}".lower()
-        return any(
-            marker in output
-            for marker in (
-                "incorrect password",
-                "incorrect password attempt",
-                "sudo: no password was provided",
-                "sorry, try again",
-                "authentication failure",
-            )
-        )
-
-    def _publish(self, task: UserTask, event_type: str, payload: dict[str, Any]) -> None:
-        if not self._event_bus:
-            return
-        event = RuntimeEvent(
-            task_id=task.task_id,
-            session_id=task.session_id,
-            sequence=self._event_bus.next_sequence(task.task_id),
-            type=event_type,
-            payload=payload,
-        )
-        self._event_bus.publish(event)
diff --git a/app/core/execution_scheduler.py b/app/core/execution_scheduler.py
deleted file mode 100644
index fe2e19e..0000000
--- a/app/core/execution_scheduler.py
+++ /dev/null
@@ -1,212 +0,0 @@
-from __future__ import annotations
-
-import json
-import logging
-from collections import deque
-from typing import Any
-
-from app.core.contracts import ExecutionDirective, PlanStep
-
-logger = logging.getLogger(__name__)
-
-
-class ExecutionScheduler:
-    def __init__(self, retry_limit: int = 2) -> None:
-        self._retry_limit = retry_limit
-
-    def parse_plan_steps(
-        self,
-        json_str: str,
-        task_id: str | None = None,
-    ) -> list[PlanStep]:
-        try:
-            json_start = json_str.find("{")
-            json_end = json_str.rfind("}") + 1
-            if json_start < 0:
-                return []
-
-            json_str = json_str[json_start:json_end]
-            data = json.loads(json_str)
-
-            # Unified format: {"type": "plan", "payload": {"steps": [...]}}
-            # or direct: {"type": "step", "payload": {"tool": "...", "args": {...}}}
-            if isinstance(data, dict):
-                msg_type = data.get("type", "")
-                
-                # Single step format: {"type": "step", "payload": {"tool": ..., "args": ...}}
-                if msg_type == "step":
-                    payload = data.get("payload", {})
-                    step = {
-                        "id": "step-0",
-                        "kind": "tool",
-                        "tool": payload.get("tool"),
-                        "args": payload.get("args", {}),
-                        "description": payload.get("description", ""),
-                        "depends_on": payload.get("depends_on", []),
-                    }
-                    data = [step]
-                
-                # Plan format: {"type": "plan", "payload": {"steps": [...]}}
-                elif msg_type == "plan":
-                    payload = data.get("payload", {})
-                    steps_data = payload.get("steps", [])
-                    
-                    # Normalize steps: handle {"type": "step", "payload": {"tool": ...}}
-                    normalized = []
-                    for step in steps_data:
-                        if isinstance(step, dict) and step.get("type") == "step":
-                            inner = step.get("payload", {})
-                            normalized.append({
-                                "tool": inner.get("tool"),
-                                "args": inner.get("args", {}),
-                                "description": inner.get("description", ""),
-                                "depends_on": inner.get("depends_on", []),
-                            })
-                        else:
-                            normalized.append(step)
-                    steps_data = normalized
-                    
-                    data = steps_data if steps_data else []
-                
-                # Old format compatibility
-                elif "steps" in data:
-                    data = data["steps"]
-                elif "plan" in data:
-                    data = data["plan"]
-                else:
-                    data = [data]
-            elif isinstance(data, str):
-                data = json.loads(data)
-                if isinstance(data, dict):
-                    data = [data]
-
-            steps = []
-            for i, step_data in enumerate(data):
-                if isinstance(step_data, str):
-                    step_data = {"id": f"step-{i}", "kind": "respond", "text": step_data}
-
-                if not isinstance(step_data, dict):
-                    continue
-
-                step_data.setdefault("id", f"step-{i}")
-
-                # Tool-first: scheduler получает tool напрямую, без трансформаций
-                # kind определяется по наличию tool name
-                # args передаются напрямую
-                if step_data.get("tool"):
-                    step_data["kind"] = "tool"
-
-                step_data.setdefault("kind", step_data.get("kind", "respond"))
-                step_data.setdefault("tool", step_data.get("tool"))
-                step_data.setdefault("args", step_data.get("args", {}))
-                step_data.setdefault("description", step_data.get("description", ""))
-                step_data.setdefault("requires_confirmation", False)
-                step_data.setdefault("depends_on", [])
-
-                if "description" not in step_data:
-                    step_data["description"] = f"Step {i}"
-
-                steps.append(PlanStep(**step_data))
-
-            return steps
-
-        except (json.JSONDecodeError, ValueError, TypeError) as e:
-            logger.warning(f"Plan parsing failed: {e}")
-            return []
-
-    def validate_no_cycles(self, steps: list[PlanStep]) -> bool:
-        if not steps:
-            return True
-
-        graph: dict[str, set[str]] = {}
-        for step in steps:
-            graph[step.id] = set(step.depends_on)
-
-        visited: set[str] = set()
-        rec_stack: set[str] = set()
-
-        def has_cycle(node: str) -> bool:
-            if node in rec_stack:
-                return True
-            if node in visited:
-                return False
-
-            visited.add(node)
-            rec_stack.add(node)
-
-            for dep in graph.get(node, []):
-                if has_cycle(dep):
-                    return True
-
-            rec_stack.remove(node)
-            return False
-
-        for step in steps:
-            if step.id not in visited:
-                if has_cycle(step.id):
-                    logger.warning(f"Cycle detected in plan: {step.id}")
-                    return False
-
-        return True
-
-    def build_task_graph(
-        self,
-        steps: list[PlanStep],
-    ) -> dict[str, Any]:
-        if not steps:
-            return {"nodes": [], "edges": []}
-
-        if not self.validate_no_cycles(steps):
-            return {"nodes": [], "edges": [], "error": "Cycle detected in plan"}
-
-        nodes = []
-        edges = []
-
-        step_map = {s.id: s for s in steps}
-
-        for step in steps:
-            nodes.append({
-                "id": step.id,
-                "kind": step.kind,
-                "tool": step.tool,
-                "args": step.args,
-                "ready": len(step.depends_on) == 0,
-            })
-
-            for dep_id in step.depends_on:
-                edges.append({
-                    "from": dep_id,
-                    "to": step.id,
-                })
-
-        return {"nodes": nodes, "edges": edges, "step_map": step_map}
-
-    def get_ready_steps(
-        self,
-        graph: dict[str, Any],
-        completed: set[str],
-    ) -> list[PlanStep]:
-        if not graph or not graph.get("nodes"):
-            return []
-
-        step_map: dict[str, PlanStep] = graph.get("step_map", {})
-        ready = []
-
-        for node in graph["nodes"]:
-            node_id = node["id"]
-            if node_id in completed:
-                continue
-
-            deps = node.get("depends_on", [])
-            if all(dep in completed for dep in deps):
-                step = step_map.get(node_id)
-                if step:
-                    ready.append(step)
-
-        return ready
-
-    def next_directive(
-        self,
-        directive: ExecutionDirective,
-    ) -> ExecutionDirective:
-        return directive
\ No newline at end of file
diff --git a/app/core/intent_parser.py b/app/core/intent_parser.py
deleted file mode 100644
index 61adda1..0000000
--- a/app/core/intent_parser.py
+++ /dev/null
@@ -1,104 +0,0 @@
-from __future__ import annotations
-
-import re
-from typing import Any
-
-from app.core.contracts import ExecutionDirective
-
-SHELL_PREFIXES = (
-    "run ",
-    "execute ",
-    "launch ",
-    "запусти ",
-    "выполни ",
-    "выполнить ",
-)
-
-MEMORY_STORE_PATTERNS = (
-    r"запомни\s+(.+)",
-    r"сохрани\s+(.+)",
-    r"запиши\s+(.+)",
-    r"remember\s+(.+)",
-    r"save\s+(.+)",
-)
-
-MEMORY_SEARCH_PATTERNS = (
-    r"вспомни\s+(.+)",
-    r"search memory\s+(.+)",
-)
-
-
-class IntentParser:
-    """Extracts explicit tool intents from natural-language task text."""
-
-    def __init__(self) -> None:
-        self._store_patterns = [re.compile(p, re.IGNORECASE) for p in MEMORY_STORE_PATTERNS]
-        self._search_patterns = [re.compile(p, re.IGNORECASE) for p in MEMORY_SEARCH_PATTERNS]
-
-    def parse(self, task_input: str) -> ExecutionDirective | None:
-        normalized = task_input.strip()
-        lowered = normalized.lower()
-
-        if matched := self._match_patterns(self._store_patterns, normalized):
-            return ExecutionDirective(
-                type="tool",
-                payload={
-                    "tool": "memory_insert",
-                    "args": {
-                        "text": matched.group(1).strip(),
-                        "kind": "fact",
-                        "source": "user",
-                    },
-                },
-                requires_permission=False,
-                confidence=0.85,
-                reason="User explicitly requested to store in memory.",
-            )
-
-        if matched := self._match_patterns(self._search_patterns, normalized):
-            return ExecutionDirective(
-                type="tool",
-                payload={
-                    "tool": "memory_search",
-                    "args": {"query": matched.group(1).strip()},
-                },
-                requires_permission=False,
-                confidence=0.85,
-                reason="User explicitly requested to search memory.",
-            )
-
-        for prefix in SHELL_PREFIXES:
-            if lowered.startswith(prefix):
-                command = normalized[len(prefix) :].strip()
-                if command:
-                    return ExecutionDirective(
-                        type="tool",
-                        payload={
-                            "tool": "shell_exec",
-                            "args": {"command": command},
-                        },
-                        requires_permission=True,
-                        confidence=0.92,
-                        reason="Natural-language task explicitly requested shell execution.",
-                    )
-
-        quoted = re.match(r"^`(.+)`$", normalized)
-        if quoted:
-            return ExecutionDirective(
-                type="tool",
-                payload={
-                    "tool": "shell_exec",
-                    "args": {"command": quoted.group(1)},
-                },
-                requires_permission=True,
-                confidence=0.75,
-                reason="Backticked input treated as direct shell command.",
-            )
-
-        return None
-
-    def _match_patterns(self, patterns: list[re.Pattern], text: str):
-        for pattern in patterns:
-            if match := pattern.match(text):
-                return match
-        return None
diff --git a/app/core/permission_resolution.py b/app/core/permission_resolution.py
deleted file mode 100644
index afd9d06..0000000
--- a/app/core/permission_resolution.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from __future__ import annotations
-
-from pydantic import BaseModel
-
-
-class PermissionResolutionRequest(BaseModel):
-    task_id: str
-    decision: str
-
-
-class SecretResolutionRequest(BaseModel):
-    task_id: str
-    secret: str
-
-
-class PasswordResolutionRequest(BaseModel):
-    task_id: str
-    password: str
-
-
-class ReviewResolutionRequest(BaseModel):
-    task_id: str
-    decision: str
-    correction: str | None = None
diff --git a/app/core/permission_service.py b/app/core/permission_service.py
deleted file mode 100644
index dd0f852..0000000
--- a/app/core/permission_service.py
+++ /dev/null
@@ -1,370 +0,0 @@
-from __future__ import annotations
-
-import hashlib
-import json
-import logging
-import os
-import re
-import shlex
-from pathlib import Path
-from typing import Any
-
-logger = logging.getLogger(__name__)
-
-
-class PermissionService:
-    """Permission-first model - user is the authority."""
-
-    def __init__(self, config: dict[str, Any] | None = None, cache_file: Path | None = None):
-        self._config = config or self._load_config()
-        self._settings = self._config.get("settings", {})
-        self._cache_file = cache_file
-        self._categories = self._config.get("command_categories", {})
-        self._path_settings = self._config.get("path_settings", {})
-        self._legacy_dangerous_commands = self._config.get("dangerous_commands", {})
-        self._legacy_sensitive_paths = self._config.get("sensitive_paths", [])
-
-    def _load_config(self) -> dict[str, Any]:
-        try:
-            config_path = Path(__file__).parents[2] / "config" / "permissions.json"
-            with open(config_path) as f:
-                return json.load(f)
-        except Exception as e:
-            logger.warning(f"Failed to load permissions config: {e}")
-            return {"settings": {}, "command_categories": {}}
-
-    def _get_cache_file(self) -> Path:
-        if self._cache_file:
-            return self._cache_file
-        
-        base_dir = Path(__file__).parents[2]
-        cache_relative = self._settings.get("cache_file", "data/runtime/allowed_commands.json")
-        return base_dir / cache_relative
-
-    def _load_cache(self) -> dict[str, Any]:
-        cache_file = self._get_cache_file()
-        try:
-            if cache_file.exists():
-                with open(cache_file) as f:
-                    return json.load(f)
-        except Exception as e:
-            logger.warning(f"Failed to load cache: {e}")
-        
-        return {"allowed_once": {}, "allowed_always": {}}
-
-    def _save_cache(self, cache: dict[str, Any]) -> None:
-        cache_file = self._get_cache_file()
-        cache_file.parent.mkdir(parents=True, exist_ok=True)
-        with open(cache_file, "w") as f:
-            json.dump(cache, f, indent=2)
-
-    def check_shell_command(
-        self,
-        task_id: str,
-        session_id: str,
-        command: str,
-    ) -> dict[str, Any]:
-        """Check if shell command requires permission."""
-        normalized = self._normalize_command(command)
-        command_hash = self._hash_command(normalized)
-        
-        cache = self._load_cache()
-        
-        # Check cache first
-        if command_hash in cache.get("allowed_always", {}):
-            return {
-                "decision": "allowed_always",
-                "command": normalized,
-                "cached": True,
-                "requires_sudo": _requires_sudo(normalized),
-            }
-
-        if command_hash in cache.get("allowed_once", {}):
-            cached = cache["allowed_once"][command_hash]
-            if cached.get("task_id") == task_id:
-                return {
-                    "decision": "allowed_once",
-                    "command": normalized,
-                    "cached": True,
-                    "requires_sudo": _requires_sudo(normalized),
-                }
-        
-        # Check hard stop
-        if self._is_hard_stop(normalized):
-            return {
-                "decision": "hard_stop",
-                "command": normalized,
-                "reason": "Hard stop command - execution denied",
-            }
-
-        if not self._categories and self._legacy_dangerous_commands:
-            if self._matches_legacy_dangerous(normalized):
-                return {
-                    "decision": "prompt",
-                    "command": normalized,
-                    "category": "legacy_dangerous",
-                    "allow_always": False,
-                    "task_id": task_id,
-                    "session_id": session_id,
-                }
-            return {
-                "decision": "allowed",
-                "command": normalized,
-                "category": "legacy_safe",
-                "task_id": task_id,
-                "session_id": session_id,
-            }
-        
-        # Check no_always category
-        category = self._get_category(normalized)
-        can_always = self._categories.get(category, {}).get("allow_always", True)
-
-        # Check if command requires sudo (e.g. apt, systemctl without explicit sudo prefix)
-        requires_sudo = _requires_sudo(normalized)
-
-        # Need user confirmation
-        result = {
-            "decision": "prompt",
-            "command": normalized,
-            "category": category,
-            "allow_always": can_always,
-            "requires_sudo": requires_sudo,
-            "task_id": task_id,
-            "session_id": session_id,
-        }
-        return result
-
-    def check_write_path(
-        self,
-        task_id: str,
-        session_id: str,
-        path: str,
-    ) -> dict[str, Any]:
-        """Check if write path requires permission."""
-        if not self._path_settings and self._legacy_sensitive_paths:
-            if any(path.startswith(sensitive) for sensitive in self._legacy_sensitive_paths):
-                return {
-                    "decision": "prompt",
-                    "path": path,
-                    "task_id": task_id,
-                    "session_id": session_id,
-                }
-            return {"decision": "allowed", "path": path}
-
-        allow_write_paths = self._path_settings.get("allow_write_paths", [])
-        
-        # Check if path is in allowed list
-        for allowed in allow_write_paths:
-            if path.startswith(allowed):
-                return {"decision": "allowed", "path": path}
-        
-        # Otherwise require permission
-        return {
-            "decision": "prompt",
-            "path": path,
-            "task_id": task_id,
-            "session_id": session_id,
-        }
-
-    def resolve_permission(
-        self,
-        task_id: str,
-        session_id: str,
-        command: str,
-        decision: str,
-    ) -> dict[str, Any]:
-        """Resolve permission decision from user."""
-        normalized = self._normalize_command(command)
-        command_hash = self._hash_command(normalized)
-        
-        cache = self._load_cache()
-        
-        if decision == "allow_once":
-            cache.setdefault("allowed_once", {})[command_hash] = {
-                "command": normalized,
-                "task_id": task_id,
-                "session_id": session_id,
-            }
-            self._save_cache(cache)
-            return {"status": "allowed_once", "command": normalized}
-        
-        elif decision == "allow_always":
-            cache.setdefault("allowed_always", {})[command_hash] = {
-                "command": normalized,
-                "task_id": task_id,
-                "session_id": session_id,
-            }
-            self._save_cache(cache)
-            return {"status": "allowed_always", "command": normalized}
-        
-        elif decision == "deny":
-            return {"status": "denied", "command": normalized}
-        
-        return {"status": "unknown", "decision": decision}
-
-    def clear_cache(self) -> dict[str, Any]:
-        """Clear permission cache."""
-        cache = {"allowed_once": {}, "allowed_always": {}}
-        self._save_cache(cache)
-        return {"status": "cache_cleared"}
-
-    def _normalize_command(self, command: str) -> str:
-        """Normalize command for consistent hashing."""
-        if not self._settings.get("normalize_commands", True):
-            return command.strip()
-        
-        normalized = command.strip()
-        
-        # Split chained commands if enabled
-        if self._settings.get("split_chained", True):
-            # Replace ; and || with && for splitting
-            normalized = normalized.replace(";", " && ")
-            normalized = normalized.replace("||", " && ")
-        
-        # Resolve environment variables
-        try:
-            normalized = os.path.expandvars(normalized)
-        except:
-            pass
-        
-        # Resolve home directory
-        normalized = normalized.replace("~", os.path.expanduser("~"))
-        
-        # Remove extra whitespace
-        normalized = " ".join(normalized.split())
-        
-        return normalized
-
-    def _hash_command(self, command: str) -> str:
-        """Generate hash for command."""
-        return hashlib.sha256(command.encode()).hexdigest()[:16]
-
-    def _matches_legacy_dangerous(self, command: str) -> bool:
-        cmd_lower = command.lower()
-        for pattern in self._legacy_dangerous_commands:
-            if pattern.lower() in cmd_lower:
-                return True
-        return False
-
-    def _is_hard_stop(self, command: str) -> bool:
-        """Check if command is hard stop."""
-        hard_stop_commands = self._categories.get("hard_stop", {}).get("commands", [])
-
-        cmd_lower = command.lower().strip()
-        cmd_tokens = cmd_lower.split()
-
-        for hs in hard_stop_commands:
-            hs_lower = hs.lower().strip()
-            # For "rm -rf /" and "rm -rf /*", only match exact command
-            # Don't match "rm -rf /tmp/nonexistent" as hard stop
-            if hs_lower in ("rm -rf /", "rm -rf /*"):
-                if cmd_lower == hs_lower:
-                    return True
-                continue
-            # For other patterns, use substring match
-            if hs_lower in cmd_lower:
-                return True
-
-        return False
-
-    def _get_category(self, command: str) -> str:
-        """Get command category."""
-        cmd_lower = command.lower().strip()
-        cmd_first_word = cmd_lower.split()[0] if cmd_lower.split() else ""
-
-        # Check no_always category — match by first word or known multi-word prefixes
-        no_always = self._categories.get("no_always", {}).get("commands", [])
-        for pattern in no_always:
-            pat_lower = pattern.lower().strip()
-            # Match if first word matches (e.g. "apt" matches "apt list --upgradable")
-            # or if command starts with the pattern (e.g. "systemctl stop" matches "systemctl stop nginx")
-            if cmd_first_word == pat_lower or cmd_lower.startswith(pat_lower + " "):
-                return "no_always"
-
-        # Check hard_stop by first word
-        hard_stop = self._categories.get("hard_stop", {}).get("commands", [])
-        for pattern in hard_stop:
-            pat_lower = pattern.lower().strip()
-            if cmd_first_word == pat_lower or cmd_lower.startswith(pat_lower + " "):
-                return "hard_stop"
-
-        # Default to normal
-        return "normal"
-
-
-SUDO_COMMANDS = {
-    "sudo",
-    "apt", "apt-get", "dpkg", "yum", "dnf", "pacman", "zypper",
-    "systemctl", "service", "mount", "umount",
-    "shutdown", "reboot", "halt", "poweroff",
-    "useradd", "usermod", "userdel", "groupadd", "groupmod",
-    "chmod", "chown", "chgrp",
-    "iptables", "ufw",
-    "kill", "killall", "pkill",
-}
-
-
-def _requires_sudo(command: str) -> bool:
-    """Check if command requires sudo."""
-    if not command:
-        return False
-    cmd_lower = command.lower().strip()
-    first_word = cmd_lower.split()[0] if cmd_lower.split() else ""
-    return first_word in SUDO_COMMANDS
-
-
-class PermissionRequest:
-    """Permission request to user."""
-
-    def __init__(
-        self,
-        task_id: str,
-        session_id: str,
-        command: str,
-        category: str = "normal",
-        allow_always: bool = True,
-    ) -> None:
-        self.task_id = task_id
-        self.session_id = session_id
-        self.command = command
-        self.category = category
-        self.allow_always = allow_always
-        self.requires_password = _requires_sudo(command)
-
-    def to_dict(self) -> dict[str, Any]:
-        return {
-            "task_id": self.task_id,
-            "session_id": self.session_id,
-            "command": self.command,
-            "category": self.category,
-            "allow_always": self.allow_always,
-            "requires_password": self.requires_password,
-            "buttons": self._get_buttons(),
-        }
-
-    def _get_buttons(self) -> list[dict[str, str]]:
-        buttons = [{"action": "deny", "label": "Запретить"}]
-        
-        if self.allow_always:
-            buttons.insert(0, {"action": "allow_always", "label": "Разрешить навсегда"})
-        
-        if self.requires_password:
-            buttons.insert(0, {"action": "allow_with_password", "label": "Разрешить с паролем"})
-        else:
-            buttons.insert(0, {"action": "allow_once", "label": "Разрешить"})
-        
-        return buttons
-
-
-class PermissionDecision:
-    """Permission decision."""
-
-    def __init__(
-        self,
-        decision: str,
-        command: str | None = None,
-        cached: bool = False,
-    ) -> None:
-        self.decision = decision
-        self.command = command
-        self.cached = cached
diff --git a/app/events/__init__.py b/app/events/__init__.py
deleted file mode 100644
index b89dc5d..0000000
--- a/app/events/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""Event bus and event store."""
-
diff --git a/app/events/event_bus.py b/app/events/event_bus.py
deleted file mode 100644
index fee048f..0000000
--- a/app/events/event_bus.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from __future__ import annotations
-
-from typing import Callable
-
-from app.core.contracts import RuntimeEvent
-from app.events.event_store import SQLiteEventStore
-
-
-Subscriber = Callable[[RuntimeEvent], None]
-
-
-class EventBus:
-    """Per-task ordered event publishing with durable storage."""
-
-    def __init__(self, event_store: SQLiteEventStore) -> None:
-        self._store = event_store
-        self._subscribers: list[Subscriber] = []
-
-    def next_sequence(self, task_id: str) -> int:
-        return self._store.get_latest_sequence(task_id) + 1
-
-    def publish(self, event: RuntimeEvent) -> RuntimeEvent:
-        self._store.append(event)
-        for subscriber in self._subscribers:
-            subscriber(event)
-        return event
-
-    def subscribe(self, subscriber: Subscriber) -> None:
-        self._subscribers.append(subscriber)
-
-    def list_for_task(self, task_id: str) -> list[RuntimeEvent]:
-        return self._store.list_for_task(task_id)
-
-    def list_recent(self, limit: int = 500) -> list[RuntimeEvent]:
-        return self._store.list_recent(limit=limit)
diff --git a/app/events/event_store.py b/app/events/event_store.py
deleted file mode 100644
index 111d373..0000000
--- a/app/events/event_store.py
+++ /dev/null
@@ -1,122 +0,0 @@
-from __future__ import annotations
-
-import json
-import sqlite3
-from pathlib import Path
-
-from app.core.contracts import RuntimeEvent
-
-
-class SQLiteEventStore:
-    """Append-only event store with per-task ordered history."""
-
-    def __init__(self, db_path: str | Path) -> None:
-        self._db_path = Path(db_path)
-        self._db_path.parent.mkdir(parents=True, exist_ok=True)
-        self._initialize()
-
-    def append(self, event: RuntimeEvent) -> None:
-        with sqlite3.connect(self._db_path) as conn:
-            conn.execute(
-                """
-                INSERT INTO events (
-                    event_id, task_id, session_id, sequence, type, timestamp,
-                    payload_json, causation_id, correlation_id
-                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
-                """,
-                (
-                    event.event_id,
-                    event.task_id,
-                    event.session_id,
-                    event.sequence,
-                    event.type,
-                    event.timestamp.isoformat(),
-                    json.dumps(event.payload),
-                    event.causation_id,
-                    event.correlation_id,
-                ),
-            )
-            conn.commit()
-
-    def list_for_task(self, task_id: str) -> list[RuntimeEvent]:
-        with sqlite3.connect(self._db_path) as conn:
-            rows = conn.execute(
-                """
-                SELECT event_id, task_id, session_id, sequence, type, timestamp,
-                       payload_json, causation_id, correlation_id
-                FROM events
-                WHERE task_id = ?
-                ORDER BY sequence ASC
-                """,
-                (task_id,),
-            ).fetchall()
-        return [
-            RuntimeEvent(
-                event_id=row[0],
-                task_id=row[1],
-                session_id=row[2],
-                sequence=row[3],
-                type=row[4],
-                timestamp=row[5],
-                payload=json.loads(row[6]),
-                causation_id=row[7],
-                correlation_id=row[8],
-            )
-            for row in rows
-        ]
-
-    def list_recent(self, limit: int = 500) -> list[RuntimeEvent]:
-        with sqlite3.connect(self._db_path) as conn:
-            rows = conn.execute(
-                """
-                SELECT event_id, task_id, session_id, sequence, type, timestamp,
-                       payload_json, causation_id, correlation_id
-                FROM events
-                ORDER BY timestamp DESC, task_id DESC, sequence DESC
-                LIMIT ?
-                """,
-                (limit,),
-            ).fetchall()
-        events = [
-            RuntimeEvent(
-                event_id=row[0],
-                task_id=row[1],
-                session_id=row[2],
-                sequence=row[3],
-                type=row[4],
-                timestamp=row[5],
-                payload=json.loads(row[6]),
-                causation_id=row[7],
-                correlation_id=row[8],
-            )
-            for row in rows
-        ]
-        return list(reversed(events))
-
-    def get_latest_sequence(self, task_id: str) -> int:
-        with sqlite3.connect(self._db_path) as conn:
-            row = conn.execute(
-                "SELECT COALESCE(MAX(sequence), 0) FROM events WHERE task_id = ?",
-                (task_id,),
-            ).fetchone()
-        return int(row[0]) if row else 0
-
-    def _initialize(self) -> None:
-        with sqlite3.connect(self._db_path) as conn:
-            conn.execute(
-                """
-                CREATE TABLE IF NOT EXISTS events (
-                    event_id TEXT PRIMARY KEY,
-                    task_id TEXT NOT NULL,
-                    session_id TEXT NOT NULL,
-                    sequence INTEGER NOT NULL,
-                    type TEXT NOT NULL,
-                    timestamp TEXT NOT NULL,
-                    payload_json TEXT NOT NULL,
-                    causation_id TEXT,
-                    correlation_id TEXT NOT NULL,
-                    UNIQUE(task_id, sequence)
-                )
-                """
-            )
-            conn.commit()
diff --git a/app/events/event_types.py b/app/events/event_types.py
deleted file mode 100644
index 7ab7e91..0000000
--- a/app/events/event_types.py
+++ /dev/null
@@ -1,35 +0,0 @@
-TASK_RECEIVED = "task_received"
-CONTEXT_BUILT = "context_built"
-STEP_STARTED = "step_started"
-TOOL_CALLED = "tool_called"
-TOOL_OUTPUT_CHUNK = "tool_output_chunk"
-TOOL_COMPLETED = "tool_completed"
-PERMISSION_REQUESTED = "permission_requested"
-PERMISSION_RESOLVED = "permission_resolved"
-TASK_AWAITING_PERMISSION = "task_awaiting_permission"
-SECRET_REQUESTED = "secret_requested"
-TASK_AWAITING_INPUT = "task_awaiting_input"
-TASK_AWAITING_REVIEW = "task_awaiting_review"
-REVIEW_RESOLVED = "review_resolved"
-CHECKPOINT_SAVED = "checkpoint_saved"
-TASK_COMPLETED = "task_completed"
-TASK_FAILED = "task_failed"
-ORCHESTRATOR_CALLED = "orchestrator_called"
-ORCHESTRATOR_RESULT = "orchestrator_result"
-ORCHESTRATOR_UNAVAILABLE = "orchestrator_unavailable"
-ORCHESTRATOR_FALLBACK_USED = "orchestrator_fallback_used"
-ORCHESTRATOR_RETRY = "orchestrator_retry"
-PLANNER_CALLED = "planner_called"
-PLANNER_RETRY = "planner_retry"
-CRITIC_CALLED = "critic_called"
-CRITIC_RESULT = "critic_result"
-MEMORY_WRITE_DECIDED = "memory_write_decided"
-PLAN_STARTED = "plan_started"
-PLAN_FAILED = "plan_failed"
-PLAN_COMPLETED = "plan_completed"
-STEPPED_COMPLETED = "step_completed"
-THINKER_CALLED = "thinker_called"
-THINKER_RESULT = "thinker_result"
-JSON_COMPILER_CALLED = "json_compiler_called"
-JSON_COMPILER_RESULT = "json_compiler_result"
-MEMORY_RECALL_USED = "memory_recall_used"
diff --git a/app/memory/__init__.py b/app/memory/__init__.py
deleted file mode 100644
index f912364..0000000
--- a/app/memory/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-MEMORY_AVAILABLE = False
-VECTOR_AVAILABLE = False
-
-try:
-    from app.memory.store import MemoryStore
-    from app.memory.vector_index import VectorIndex
-    from app.memory.interface import MemoryInterface
-    from app.memory.write_policy import MemoryWritePolicy
-    MEMORY_AVAILABLE = True
-    VECTOR_AVAILABLE = True
-except ImportError:
-    MemoryStore = None
-    VectorIndex = None
-    MemoryInterface = None
-    MemoryWritePolicy = None
-
-__all__ = [
-    "MemoryStore",
-    "VectorIndex",
-    "MemoryInterface",
-    "MemoryWritePolicy",
-    "MEMORY_AVAILABLE",
-    "VECTOR_AVAILABLE",
-]
\ No newline at end of file
diff --git a/app/memory/interface.py b/app/memory/interface.py
deleted file mode 100644
index 8ab756c..0000000
--- a/app/memory/interface.py
+++ /dev/null
@@ -1,155 +0,0 @@
-from __future__ import annotations
-
-import json
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Literal
-
-import numpy as np
-
-from app.core.contracts import MemoryEntry
-from app.memory.store import MemoryStore
-from app.memory.vector_index import VectorIndex
-from app.models.embeddings import EmbeddingsAdapter
-
-
-class MemoryInterface:
-    def __init__(
-        self,
-        store: MemoryStore,
-        vector_index: VectorIndex,
-        embeddings: EmbeddingsAdapter,
-    ) -> None:
-        self._store = store
-        self._vector_index = vector_index
-        self._embeddings = embeddings
-
-    def insert(
-        self,
-        text: str,
-        kind: Literal["tool_result", "plan", "critique", "fact", "summary", "user_preference"],
-        source: Literal["tool", "critic", "user", "system"],
-        task_id: str | None = None,
-        session_id: str | None = None,
-        weight: float = 0.5,
-        metadata: dict[str, Any] | None = None,
-    ) -> MemoryEntry:
-        entry = MemoryEntry(
-            text=text,
-            kind=kind,
-            source=source,
-            weight=weight,
-            task_id=task_id,
-            session_id=session_id,
-            metadata=metadata or {},
-            embedding_model=self._embeddings.__class__.__name__,
-            embedding_dim=self._embeddings.embedding_dim,
-        )
-
-        embedding = self._embeddings.encode(text)
-        embedding_bytes = embedding.astype("float32").tobytes()
-
-        self._store.insert(entry, embedding_bytes)
-        self._vector_index.insert(entry.id, embedding)
-        self._vector_index.save()
-
-        self.cleanup()
-
-        return entry
-
-    def search(
-        self,
-        query: str,
-        top_k: int = 5,
-        kind: str | None = None,
-        session_id: str | None = None,
-    ) -> list[tuple[MemoryEntry, float]]:
-        query_embedding = self._embeddings.encode(query)
-        memory_ids, scores = self._vector_index.search(query_embedding, k=top_k)
-
-        results: list[tuple[MemoryEntry, float]] = []
-        for memory_id, score in zip(memory_ids, scores):
-            entry = self._store.get(memory_id)
-            if entry:
-                if kind and entry.kind != kind:
-                    continue
-                if session_id and entry.session_id != session_id:
-                    continue
-                results.append((entry, score))
-
-        return results[:top_k]
-
-    def get(self, memory_id: str) -> MemoryEntry | None:
-        return self._store.get(memory_id)
-
-    def delete(self, memory_id: str) -> bool:
-        entry = self._store.get(memory_id)
-        if entry:
-            self._vector_index.delete(memory_id)
-            return self._store.delete(memory_id)
-        return False
-
-    def get_by_task(self, task_id: str) -> list[MemoryEntry]:
-        return self._store.get_by_task(task_id)
-
-    def get_by_session(self, session_id: str, limit: int = 100) -> list[MemoryEntry]:
-        return self._store.get_by_session(session_id, limit)
-
-    def get_recent(self, limit: int = 10) -> list[MemoryEntry]:
-        return self._store.get_all(limit)
-
-    def count(self) -> int:
-        return self._store.count()
-
-    def reindex(self) -> int:
-        """Rebuild vector index from all entries in memory store.
-        Returns number of indexed entries."""
-        entries = self._store.get_all(limit=10000)
-        # Delete old index file and re-initialize from scratch
-        import os
-        if self._vector_index._index_path and self._vector_index._index_path.exists():
-            self._vector_index._index_path.unlink()
-        self._vector_index._index = None
-        self._vector_index._init_index()
-        count = 0
-        for entry in entries:
-            text = entry.text
-            embedding = self._embeddings.encode(text)
-            self._vector_index.insert(entry.id, embedding)
-            count += 1
-        self._vector_index.save()
-        return count
-
-    def close(self) -> None:
-        self._store.close()
-
-    def cleanup(self, max_items: int = 750, decay_factor: float = 0.95) -> int:
-        """Remove low-weight entries when exceeding max_items limit.
-
-        Applies weight decay based on freshness before cleanup.
-        Returns number of removed entries.
-        """
-        current_count = self._store.count()
-        if current_count <= max_items:
-            return 0
-
-        removed = 0
-        entries_to_remove = current_count - max_items
-
-        all_entries = self._store.get_all(limit=current_count)
-
-        def effective_weight(entry: MemoryEntry) -> float:
-            entry_weight = entry.weight
-            if entry.created_at:
-                age_days = (datetime.now(timezone.utc) - entry.created_at).total_seconds() / 86400
-                freshness_factor = max(0.1, decay_factor ** age_days)
-                return entry_weight * freshness_factor
-            return entry_weight
-
-        sorted_entries = sorted(all_entries, key=effective_weight)
-
-        for entry in sorted_entries[:entries_to_remove]:
-            self._store.delete(entry.id)
-            removed += 1
-
-        return removed
\ No newline at end of file
diff --git a/app/memory/recall.py b/app/memory/recall.py
deleted file mode 100644
index e0e847a..0000000
--- a/app/memory/recall.py
+++ /dev/null
@@ -1,205 +0,0 @@
-from __future__ import annotations
-
-import json
-import logging
-from typing import Any
-
-from app.core.contracts import MemoryEntry
-from app.memory.interface import MemoryInterface
-from app.models.async_adapters import AsyncOrchestratorAdapter
-
-logger = logging.getLogger(__name__)
-
-RECALL_PROMPT_TEMPLATE = """Определи, нужно ли искать в долговременной памяти для ответа на этот запрос.
-
-Запрос: "{task_input}"
-
-ИСКАТЬ в памяти если запрос:
-- Содержит вопрос о пользователе (имя, предпочтения, история)
-- Содержит отсылки к прошлым разговорам или действиям
-- Содержит местоимения без контекста ("он", "это", "тот файл")
-- Просит вспомнить, повторить, рассказать о прошлом
-- Спрашивает "что ты помнишь", "как меня зовут", "что я говорил"
-
-НЕ ИСКАТЬ если:
-- Приветствие или прощание
-- Простая команда (ls, pwd, echo)
-- Общий вопрос не связанный с прошлым
-
-Ответь ТОЛЬКО JSON:
-{{"should_recall": true, "search_query": "поисковый запрос"}}
-или
-{{"should_recall": false, "reason": "краткая причина"}}"""
-
-
-class MemoryRecallService:
-    """Активное воспоминание: система сама решает, что и когда искать в памяти."""
-
-    def __init__(
-        self,
-        memory_interface: MemoryInterface | None,
-        recall_model: AsyncOrchestratorAdapter | None,
-    ) -> None:
-        self._memory = memory_interface
-        self._model = recall_model
-
-    async def recall(
-        self,
-        task_input: str,
-        top_k: int = 5,
-    ) -> dict[str, Any]:
-        """
-        Определяет необходимость воспоминания и выполняет поиск.
-
-        Возвращает:
-        {
-            "should_recall": bool,
-            "reason": str,
-            "query": str,
-            "results": list[MemoryEntry],
-            "summary": str,  # краткая сводка для оркестратора
-        }
-        """
-        if not self._memory or not self._model:
-            with open("/tmp/recall_debug.log", "a") as f:
-                f.write(f"SKIP: memory={self._memory is not None}, model={self._model is not None}\n")
-            return self._empty_result("memory_or_model_unavailable")
-
-        # 1. LLM решает, нужно ли искать
-        decision = await self._classify(task_input)
-        with open("/tmp/recall_debug.log", "a") as f:
-            f.write(f"DECISION type={type(decision)} value={decision}\n")
-        if not isinstance(decision, dict):
-            return self._empty_result("invalid_decision_type")
-        if not decision.get("should_recall"):
-            return self._empty_result(decision.get("reason", "not_needed"))
-
-        search_query = decision.get("search_query", task_input)
-        logger.info(f"Memory recall: query='{search_query}', reason='{decision.get('reason')}'")
-
-        # 2. Векторный поиск
-        try:
-            raw_results = self._memory.search(query=search_query, top_k=top_k)
-        except Exception as e:
-            logger.warning(f"Memory search failed: {e}")
-            return self._empty_result("search_failed")
-
-        # 3. Фильтрация: убираем пустые и слишком нерелевантные
-        filtered = self._filter(raw_results)
-
-        if not filtered:
-            return self._empty_result("no_relevant_results")
-
-        # 4. Сводка для оркестратора
-        summary = self._summarize(filtered, search_query)
-
-        return {
-            "should_recall": True,
-            "reason": decision.get("reason", ""),
-            "query": search_query,
-            "results": filtered,
-            "summary": summary,
-        }
-
-    async def _classify(self, task_input: str) -> dict[str, Any]:
-        """LLM-классификация: нужно ли искать в памяти."""
-        prompt = RECALL_PROMPT_TEMPLATE.format(task_input=task_input)
-
-        try:
-            raw = await self._model.generate(prompt, max_tokens=512)
-            data = self._parse_json(raw)
-            if "should_recall" in data:
-                return data
-            logger.warning(f"Recall classification missing 'should_recall': {raw[:200]}")
-            return {"should_recall": False, "reason": "parse_error"}
-        except Exception as e:
-            logger.warning(f"Recall classification failed: {e}")
-            return {"should_recall": False, "reason": "classification_error"}
-
-    def _filter(
-        self,
-        results: list[tuple[MemoryEntry, float]],
-        min_score: float = 0.3,
-    ) -> list[MemoryEntry]:
-        """Фильтрует результаты по score и убирает дубликаты."""
-        seen_texts: set[str] = set()
-        filtered: list[MemoryEntry] = []
-
-        for entry, score in results:
-            if score < min_score:
-                continue
-            # Нормализуем текст для дедупликации
-            normalized = entry.text.strip().lower()[:100]
-            if normalized in seen_texts:
-                continue
-            seen_texts.add(normalized)
-            filtered.append(entry)
-
-        return filtered
-
-    def _summarize(
-        self,
-        results: list[MemoryEntry],
-        query: str,
-    ) -> str:
-        """Краткая сводка найденного для оркестратора."""
-        parts = [f"По запросу '{query}' найдено {len(results)} записей:"]
-        for i, entry in enumerate(results[:5], 1):
-            text_preview = entry.text[:120].replace("\n", " ")
-            parts.append(f"  {i}. [{entry.kind}] {text_preview}")
-        return "\n".join(parts)
-
-    def _parse_json(self, raw: str) -> dict[str, Any]:
-        """Извлекает JSON из ответа модели, пропуская рассуждения перед ним."""
-        try:
-            json_start = raw.find("{")
-            json_end = raw.rfind("}") + 1
-
-            if json_start < 0 or json_end <= 0:
-                return {}
-
-            # Пробуем весь текст от первого { до последнего }
-            try:
-                data = json.loads(raw[json_start:json_end])
-                if isinstance(data, dict):
-                    return data
-            except json.JSONDecodeError:
-                pass
-
-            # Ищем все возможные начала JSON
-            candidates = []
-            pos = 0
-            while True:
-                pos = raw.find("{", pos)
-                if pos < 0:
-                    break
-                candidates.append(pos)
-                pos += 1
-
-            # Пробуем каждый candidate с конца
-            for start in reversed(candidates):
-                end = raw.rfind("}") + 1
-                if end <= start:
-                    continue
-                try:
-                    data = json.loads(raw[start:end])
-                    if isinstance(data, dict):
-                        return data
-                except json.JSONDecodeError:
-                    continue
-
-            return {}
-        except Exception as e:
-            with open("/tmp/recall_debug.log", "a") as f:
-                f.write(f"PARSE ERROR: {e}\n")
-            return {}
-
-    @staticmethod
-    def _empty_result(reason: str) -> dict[str, Any]:
-        return {
-            "should_recall": False,
-            "reason": reason,
-            "query": "",
-            "results": [],
-            "summary": "",
-        }
diff --git a/app/memory/store.py b/app/memory/store.py
deleted file mode 100644
index dcf76ee..0000000
--- a/app/memory/store.py
+++ /dev/null
@@ -1,185 +0,0 @@
-from __future__ import annotations
-
-import json
-import sqlite3
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Sequence
-from uuid import uuid4
-
-from app.core.contracts import MemoryEntry
-
-
-def utc_now() -> datetime:
-    return datetime.now(timezone.utc)
-
-
-class MemoryStore:
-    def __init__(self, db_path: str | Path) -> None:
-        self._db_path = Path(db_path)
-        self._db_path.parent.mkdir(parents=True, exist_ok=True)
-        self._conn = sqlite3.connect(str(self._db_path), check_same_thread=False)
-        self._conn.row_factory = sqlite3.Row
-        self._init_tables()
-
-    def _init_tables(self) -> None:
-        self._conn.executescript("""
-            CREATE TABLE IF NOT EXISTS memory_items (
-                id TEXT PRIMARY KEY,
-                text TEXT NOT NULL,
-                kind TEXT NOT NULL,
-                source TEXT NOT NULL,
-                weight REAL NOT NULL DEFAULT 0.5,
-                task_id TEXT,
-                session_id TEXT,
-                metadata_json TEXT,
-                created_at TEXT NOT NULL,
-                updated_at TEXT NOT NULL
-            );
-
-            CREATE TABLE IF NOT EXISTS memory_embeddings (
-                memory_id TEXT PRIMARY KEY,
-                embedding BLOB NOT NULL,
-                embedding_model TEXT NOT NULL,
-                embedding_dim INTEGER NOT NULL,
-                created_at TEXT NOT NULL,
-                FOREIGN KEY (memory_id) REFERENCES memory_items(id) ON DELETE CASCADE
-            );
-
-            CREATE INDEX IF NOT EXISTS idx_memory_items_task ON memory_items(task_id);
-            CREATE INDEX IF NOT EXISTS idx_memory_items_session ON memory_items(session_id);
-            CREATE INDEX IF NOT EXISTS idx_memory_items_kind ON memory_items(kind);
-            CREATE INDEX IF NOT EXISTS idx_memory_embeddings_model ON memory_embeddings(embedding_model);
-        """)
-        self._conn.commit()
-
-    def insert(self, entry: MemoryEntry, embedding: bytes) -> None:
-        cursor = self._conn.cursor()
-        cursor.execute(
-            """
-            INSERT INTO memory_items (id, text, kind, source, weight, task_id, session_id, metadata_json, created_at, updated_at)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-            """,
-            (
-                entry.id,
-                entry.text,
-                entry.kind,
-                entry.source,
-                entry.weight,
-                entry.task_id,
-                entry.session_id,
-                json.dumps(entry.metadata) if entry.metadata else None,
-                entry.created_at.isoformat(),
-                utc_now().isoformat(),
-            ),
-        )
-        cursor.execute(
-            """
-            INSERT INTO memory_embeddings (memory_id, embedding, embedding_model, embedding_dim, created_at)
-            VALUES (?, ?, ?, ?, ?)
-            """,
-            (
-                entry.id,
-                embedding,
-                entry.embedding_model,
-                entry.embedding_dim,
-                utc_now().isoformat(),
-            ),
-        )
-        self._conn.commit()
-
-    def get(self, memory_id: str) -> MemoryEntry | None:
-        cursor = self._conn.cursor()
-        row = cursor.execute(
-            "SELECT * FROM memory_items WHERE id = ?", (memory_id,)
-        ).fetchone()
-        if not row:
-            return None
-        return self._row_to_entry(row)
-
-    def get_embedding(self, memory_id: str) -> bytes | None:
-        cursor = self._conn.cursor()
-        row = cursor.execute(
-            "SELECT embedding FROM memory_embeddings WHERE memory_id = ?", (memory_id,)
-        ).fetchone()
-        return bytes(row["embedding"]) if row else None
-
-    def get_all(self, limit: int = 1000) -> list[MemoryEntry]:
-        cursor = self._conn.cursor()
-        rows = cursor.execute(
-            "SELECT * FROM memory_items ORDER BY created_at DESC LIMIT ?", (limit,)
-        ).fetchall()
-        return [self._row_to_entry(row) for row in rows]
-
-    def get_by_task(self, task_id: str) -> list[MemoryEntry]:
-        cursor = self._conn.cursor()
-        rows = cursor.execute(
-            "SELECT * FROM memory_items WHERE task_id = ? ORDER BY created_at DESC", (task_id,)
-        ).fetchall()
-        return [self._row_to_entry(row) for row in rows]
-
-    def get_by_session(self, session_id: str, limit: int = 100) -> list[MemoryEntry]:
-        cursor = self._conn.cursor()
-        rows = cursor.execute(
-            "SELECT * FROM memory_items WHERE session_id = ? ORDER BY created_at DESC LIMIT ?",
-            (session_id, limit),
-        ).fetchall()
-        return [self._row_to_entry(row) for row in rows]
-
-    def get_by_kind(self, kind: str, limit: int = 100) -> list[MemoryEntry]:
-        cursor = self._conn.cursor()
-        rows = cursor.execute(
-            "SELECT * FROM memory_items WHERE kind = ? ORDER BY created_at DESC LIMIT ?", (kind, limit)
-        ).fetchall()
-        return [self._row_to_entry(row) for row in rows]
-
-    def delete(self, memory_id: str) -> bool:
-        cursor = self._conn.cursor()
-        cursor.execute("DELETE FROM memory_embeddings WHERE memory_id = ?", (memory_id,))
-        cursor.execute("DELETE FROM memory_items WHERE id = ?", (memory_id,))
-        self._conn.commit()
-        return cursor.rowcount > 0
-
-    def update_weight(self, memory_id: str, weight: float) -> bool:
-        cursor = self._conn.cursor()
-        cursor.execute(
-            "UPDATE memory_items SET weight = ?, updated_at = ? WHERE id = ?",
-            (weight, utc_now().isoformat(), memory_id),
-        )
-        self._conn.commit()
-        return cursor.rowcount > 0
-
-    def search_text(self, query: str, limit: int = 10) -> list[MemoryEntry]:
-        cursor = self._conn.cursor()
-        rows = cursor.execute(
-            "SELECT * FROM memory_items WHERE text LIKE ? ORDER BY created_at DESC LIMIT ?",
-            (f"%{query}%", limit),
-        ).fetchall()
-        return [self._row_to_entry(row) for row in rows]
-
-    def count(self) -> int:
-        cursor = self._conn.cursor()
-        row = cursor.execute("SELECT COUNT(*) FROM memory_items").fetchone()
-        return row[0] if row else 0
-
-    def close(self) -> None:
-        self._conn.close()
-
-    def _row_to_entry(self, row: sqlite3.Row) -> MemoryEntry:
-        metadata = {}
-        if row["metadata_json"]:
-            import json
-            metadata = json.loads(row["metadata_json"])
-        return MemoryEntry(
-            id=row["id"],
-            text=row["text"],
-            kind=row["kind"],
-            source=row["source"],
-            weight=row["weight"],
-            task_id=row["task_id"],
-            session_id=row["session_id"],
-            metadata=metadata,
-            created_at=datetime.fromisoformat(row["created_at"]),
-            embedding_model="",
-            embedding_dim=0,
-        )
\ No newline at end of file
diff --git a/app/memory/vector_index.py b/app/memory/vector_index.py
deleted file mode 100644
index fb24fcf..0000000
--- a/app/memory/vector_index.py
+++ /dev/null
@@ -1,149 +0,0 @@
-from __future__ import annotations
-
-import logging
-import numpy as np
-import hnswlib
-from pathlib import Path
-from typing import Any
-
-logger = logging.getLogger(__name__)
-
-
-class VectorIndex:
-    def __init__(
-        self,
-        index_path: str | Path | None = None,
-        embedding_dim: int = 384,
-        max_elements: int = 10000,
-    ) -> None:
-        self._embedding_dim = embedding_dim
-        self._index_path = Path(index_path) if index_path else None
-        self._index: hnswlib.Index | None = None
-        self._max_elements = max_elements
-        self._loading = False  # Prevent recursion
-
-        self._init_index()
-
-    def _init_index(self) -> None:
-        if self._loading:
-            return
-        self._loading = True
-        try:
-            if self._index_path and self._index_path.exists():
-                self._load()
-            else:
-                self._index = hnswlib.Index(
-                    space="l2",
-                    dim=self._embedding_dim,
-                )
-                self._index.init_index(
-                    max_elements=self._max_elements,
-                    ef_construction=200,
-                    M=16,
-                )
-        except Exception as e:
-            logger.warning(f"VectorIndex init failed: {e}")
-            self._index = hnswlib.Index(
-                space="l2",
-                dim=self._embedding_dim,
-            )
-            self._index.init_index(
-                max_elements=self._max_elements,
-                ef_construction=100,
-                M=16,
-            )
-        finally:
-            self._loading = False
-
-    def insert(self, memory_id: str, embedding: np.ndarray) -> None:
-        if self._index is None:
-            self._init_index()
-            if self._index is None:
-                return
-
-        try:
-            vector = self._normalize(embedding)
-            internal_id = self._get_internal_id(memory_id)
-            self._index.add_items(vector, ids=np.array([internal_id]))
-        except Exception as e:
-            logger.warning(f"VectorIndex insert failed: {e}")
-
-    def search(
-        self,
-        query_embedding: np.ndarray,
-        k: int = 5,
-    ) -> tuple[list[str], list[float]]:
-        if self._index is None:
-            return [], []
-
-        try:
-            if self._index.get_current_count() == 0:
-                return [], []
-
-            # Set ef to at least k for proper search
-            self._index.set_ef(max(k * 2, 50))
-
-            vector = self._normalize(query_embedding)
-            labels, distances = self._index.knn_query(vector, k=k)
-
-            memory_ids = [self._get_memory_id(int(label)) for label in labels[0]]
-            scores = [1.0 - dist for dist in distances[0]]
-            return memory_ids, scores
-        except Exception as e:
-            logger.warning(f"VectorIndex search failed: {e}")
-            return [], []
-
-    def delete(self, memory_id: str) -> bool:
-        return False
-
-    def get_items(self, memory_ids: list[str]) -> np.ndarray:
-        if self._index is None:
-            raise RuntimeError("Index not initialized")
-        internal_ids = [self._get_internal_id(mid) for mid in memory_ids]
-        return self._index.get_items(np.array(internal_ids))
-
-    def save(self) -> None:
-        if self._index and self._index_path:
-            try:
-                self._index_path.parent.mkdir(parents=True, exist_ok=True)
-                self._index.save_index(str(self._index_path))
-            except Exception as e:
-                logger.warning(f"VectorIndex save failed: {e}")
-
-    def _load(self) -> None:
-        if self._loading:
-            return
-        self._loading = True
-        try:
-            if self._index_path and self._index_path.exists():
-                self._index = hnswlib.Index(space="l2", dim=self._embedding_dim)
-                self._index.load_index(
-                    str(self._index_path),
-                    max_elements=self._max_elements
-                )
-        except Exception as e:
-            logger.warning(f"VectorIndex load failed: {e}")
-            self._init_index()
-        finally:
-            self._loading = False
-
-    def _normalize(self, vector: np.ndarray) -> np.ndarray:
-        vec = vector.flatten()
-        norm = np.linalg.norm(vec)
-        if norm > 0:
-            vec = vec / norm
-        return vec.reshape(1, -1)
-
-    def _get_internal_id(self, memory_id: str) -> int:
-        return hash(memory_id) % (2**31)
-
-    def _get_memory_id(self, internal_id: int) -> str:
-        return str(internal_id)
-
-    @property
-    def embedding_dim(self) -> int:
-        return self._embedding_dim
-
-    @property
-    def element_count(self) -> int:
-        return self._index.get_current_count() if self._index else 0
\ No newline at end of file
diff --git a/app/memory/write_policy.py b/app/memory/write_policy.py
deleted file mode 100644
index 9ac6c85..0000000
--- a/app/memory/write_policy.py
+++ /dev/null
@@ -1,98 +0,0 @@
-from __future__ import annotations
-
-from typing import Any, Literal
-
-from app.core.contracts import CriticScore, MemoryEntry
-
-
-class MemoryWritePolicy:
-    def __init__(
-        self,
-        store_threshold: float = 0.7,
-        min_usefulness: float = 0.3,
-        max_entries_per_session: int = 50,
-    ) -> None:
-        self._store_threshold = store_threshold
-        self._min_usefulness = min_usefulness
-        self._max_entries_per_session = max_entries_per_session
-
-    def decide(
-        self,
-        critic_score: CriticScore,
-        memory_type: MemoryEntry.Kind,
-        session_id: str | None = None,
-        has_duplicate: bool = False,
-        current_session_count: int = 0,
-    ) -> Literal["store", "store_with_weight", "skip", "merge"]:
-        if critic_score.safety < 0.5:
-            return "skip"
-
-        if has_duplicate:
-            return "merge"
-
-        if not critic_score.memory_store:
-            return "skip"
-
-        if critic_score.usefulness < self._min_usefulness:
-            return "skip"
-
-        if session_id and current_session_count >= self._max_entries_per_session:
-            return "skip"
-
-        base_decision = self._evaluate_scores(critic_score, memory_type)
-
-        if base_decision == "store" and critic_score.weight < self._store_threshold:
-            adjusted_weight = self._adjust_weight(critic_score, memory_type)
-            if adjusted_weight >= self._store_threshold:
-                return "store_with_weight"
-            return base_decision
-
-        return base_decision
-
-    def _evaluate_scores(
-        self,
-        critic_score: CriticScore,
-        memory_type: MemoryEntry.Kind,
-    ) -> Literal["store", "store_with_weight", "skip", "merge"]:
-        avg_score = (critic_score.correctness + critic_score.usefulness + critic_score.safety) / 3.0
-
-        if memory_type in ("fact", "plan", "summary"):
-            if avg_score >= 0.8:
-                return "store"
-            elif avg_score >= 0.6:
-                return "store_with_weight"
-
-        if memory_type in ("tool_result", "critique"):
-            if avg_score >= self._store_threshold:
-                return "store"
-            elif avg_score >= 0.5:
-                return "store_with_weight"
-
-        if memory_type == "user_preference":
-            if avg_score >= 0.5:
-                return "store"
-
-        return "skip"
-
-    def _adjust_weight(
-        self,
-        critic_score: CriticScore,
-        memory_type: MemoryEntry.Kind,
-    ) -> float:
-        base_weight = critic_score.weight
-
-        type_boost = {
-            "fact": 0.15,
-            "plan": 0.1,
-            "summary": 0.1,
-            "user_preference": 0.2,
-            "tool_result": 0.05,
-            "critique": 0.05,
-        }.get(memory_type, 0.0)
-
-        safety_boost = 0.0
-        if critic_score.safety >= 0.9:
-            safety_boost = 0.1
-
-        adjusted = base_weight + type_boost + safety_boost
-        return min(adjusted, 1.0)
\ No newline at end of file
diff --git a/app/models/__init__.py b/app/models/__init__.py
deleted file mode 100644
index 3c4e242..0000000
--- a/app/models/__init__.py
+++ /dev/null
@@ -1,32 +0,0 @@
-LLM_AVAILABLE = False
-EMBEDDINGS_AVAILABLE = False
-
-try:
-    from app.models.adapters import create_adapter, create_llama_adapter
-    from app.models.orchestrator import OrchestratorAdapter
-    from app.models.coder import CoderAdapter
-    from app.models.critic import CriticAdapter
-    LLM_AVAILABLE = True
-except ImportError:
-    create_adapter = None
-    create_llama_adapter = None
-    OrchestratorAdapter = None
-    CoderAdapter = None
-    CriticAdapter = None
-
-try:
-    from app.models.embeddings import EmbeddingsAdapter
-    EMBEDDINGS_AVAILABLE = True
-except ImportError:
-    EmbeddingsAdapter = None
-
-__all__ = [
-    "create_adapter",
-    "create_llama_adapter",
-    "OrchestratorAdapter",
-    "CoderAdapter",
-    "CriticAdapter",
-    "EmbeddingsAdapter",
-    "LLM_AVAILABLE",
-    "EMBEDDINGS_AVAILABLE",
-]
\ No newline at end of file
diff --git a/app/models/adapters.py b/app/models/adapters.py
deleted file mode 100644
index ef78f0e..0000000
--- a/app/models/adapters.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from __future__ import annotations
-
-from pathlib import Path
-from typing import Any, Protocol, Iterator
-import os
-
-try:
-    from llama_cpp import Llama
-    LLAMA_AVAILABLE = True
-except ImportError:
-    Llama = None
-    LLAMA_AVAILABLE = False
-
-
-class BaseModelAdapter(Protocol):
-    async def generate(self, prompt: str, **kwargs: Any) -> str: ...
-    def stream(self, prompt: str, **kwargs: Any) -> Iterator[str]: ...
-
-
-def create_llama_adapter(
-    model_path: str,
-    backend: str = "cpu",
-    n_gpu_layers: int = 0,
-    max_tokens: int = 2048,
-    temperature: float = 0.2,
-    base_dir: Path | None = None,
-) -> "Llama":
-    if not LLAMA_AVAILABLE:
-        raise RuntimeError("llama-cpp-python not installed")
-
-    if base_dir:
-        model_path = str(base_dir / model_path)
-    else:
-        model_path = str(Path.cwd() / model_path)
-
-    return Llama(
-        model_path=model_path,
-        n_gpu_layers=n_gpu_layers,
-        n_ctx=4096,
-        n_threads=int(os.environ.get("DUCKLM_N_THREADS", max(4, min((os.cpu_count() or 4) // 2, 20)))),
-        n_threads_batch=-1,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        verbose=False,
-    )
-
-
-def create_adapter(
-    model_type: str,
-    config: dict[str, Any],
-    base_dir: Path | None = None,
-) -> "Llama":
-    if not LLAMA_AVAILABLE:
-        raise RuntimeError("llama-cpp-python not installed")
-
-    model_path = config.get("path", "")
-    backend = config.get("backend", "cpu")
-    n_gpu_layers = config.get("n_gpu_layers", 0)
-    max_tokens = config.get("max_tokens", 2048)
-    temperature = config.get("temperature", 0.2)
-
-    if backend == "vulkan" and n_gpu_layers != 0:
-        n_gpu_layers = -1
-
-    return create_llama_adapter(
-        model_path=model_path,
-        backend=backend,
-        n_gpu_layers=n_gpu_layers,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        base_dir=base_dir,
-    )
diff --git a/app/models/async_adapters.py b/app/models/async_adapters.py
deleted file mode 100644
index ae23d55..0000000
--- a/app/models/async_adapters.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-from typing import Any, AsyncIterator
-
-from app.models.orchestrator import OrchestratorAdapter as SyncOrchestrator
-
-
-class AsyncOrchestratorAdapter:
-    """Async wrapper for orchestrator - runs in executor to avoid blocking event loop."""
-
-    def __init__(self, sync_adapter: SyncOrchestrator) -> None:
-        self._sync = sync_adapter
-
-    async def generate(self, prompt: str, max_tokens: int | None = None) -> str:
-        loop = asyncio.get_event_loop()
-        return await loop.run_in_executor(
-            None,
-            lambda: self._sync.generate(prompt, max_tokens)
-        )
-
-    async def stream(self, prompt: str, max_tokens: int | None = None) -> AsyncIterator[str]:
-        loop = asyncio.get_event_loop()
-        
-        async def gen():
-            return list(self._sync.stream(prompt, max_tokens))
-        
-        result = await loop.run_in_executor(None, gen)
-        for chunk in result:
-            yield chunk
-
-
-class AsyncCoderAdapter:
-    """Async wrapper for coder."""
-
-    def __init__(self, sync_adapter) -> None:
-        self._sync = sync_adapter
-
-    async def generate(self, prompt: str, max_tokens: int | None = None) -> str:
-        loop = asyncio.get_event_loop()
-        return await loop.run_in_executor(
-            None,
-            lambda: self._sync.generate(prompt, max_tokens)
-        )
-
-
-class AsyncCriticAdapter:
-    """Async wrapper for critic."""
-
-    def __init__(self, sync_adapter) -> None:
-        self._sync = sync_adapter
-
-    async def generate(self, prompt: str, max_tokens: int | None = None) -> str:
-        loop = asyncio.get_event_loop()
-        return await loop.run_in_executor(
-            None,
-            lambda: self._sync.generate(prompt, max_tokens)
-        )
\ No newline at end of file
diff --git a/app/models/coder.py b/app/models/coder.py
deleted file mode 100644
index 17af40c..0000000
--- a/app/models/coder.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from __future__ import annotations
-
-from threading import RLock
-from typing import Any, Iterator
-from llama_cpp import Llama
-
-
-class CoderAdapter:
-    def __init__(self, llm: Llama, system_prompt: str | None = None, lock: RLock | None = None) -> None:
-        self._llm = llm
-        self._lock = lock or RLock()
-        self._system_prompt = system_prompt or (
-            "You are an expert code generation model."
-        )
-        self._temperature = 0.2
-
-    def generate(self, prompt: str, max_tokens: int | None = None) -> str:
-        messages = [
-            {"role": "system", "content": self._system_prompt},
-            {"role": "user", "content": prompt},
-        ]
-        with self._lock:
-            output = self._llm.create_chat_completion(
-                messages=messages,
-                max_tokens=max_tokens or 1024,
-                temperature=self._temperature,
-            )
-        return output["choices"][0]["message"]["content"]
-
-    def stream(self, prompt: str, max_tokens: int | None = None) -> Iterator[str]:
-        messages = [
-            {"role": "system", "content": self._system_prompt},
-            {"role": "user", "content": prompt},
-        ]
-        with self._lock:
-            for chunk in self._llm.create_chat_completion(
-                messages=messages,
-                max_tokens=max_tokens or 1024,
-                temperature=self._temperature,
-                stream=True,
-            ):
-                content = chunk["choices"][0].get("delta", {}).get("content")
-                if content:
-                    yield content
diff --git a/app/models/critic.py b/app/models/critic.py
deleted file mode 100644
index 94ff83c..0000000
--- a/app/models/critic.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from __future__ import annotations
-
-from threading import RLock
-from typing import Any, Iterator
-from llama_cpp import Llama
-
-
-class CriticAdapter:
-    def __init__(self, llm: Llama, system_prompt: str | None = None, lock: RLock | None = None) -> None:
-        self._llm = llm
-        self._lock = lock or RLock()
-        self._system_prompt = system_prompt or (
-            "You are a critic model. Evaluate tool results and respond with JSON."
-        )
-        self._temperature = 0.1
-
-    def generate(self, prompt: str, max_tokens: int | None = None) -> str:
-        messages = [
-            {"role": "system", "content": self._system_prompt},
-            {"role": "user", "content": prompt},
-        ]
-        with self._lock:
-            output = self._llm.create_chat_completion(
-                messages=messages,
-                max_tokens=max_tokens or 512,
-                temperature=self._temperature,
-            )
-        return output["choices"][0]["message"]["content"]
-
-    def stream(self, prompt: str, max_tokens: int | None = None) -> Iterator[str]:
-        messages = [
-            {"role": "system", "content": self._system_prompt},
-            {"role": "user", "content": prompt},
-        ]
-        with self._lock:
-            for chunk in self._llm.create_chat_completion(
-                messages=messages,
-                max_tokens=max_tokens or 512,
-                temperature=self._temperature,
-                stream=True,
-            ):
-                content = chunk["choices"][0].get("delta", {}).get("content")
-                if content:
-                    yield content
diff --git a/app/models/embeddings.py b/app/models/embeddings.py
deleted file mode 100644
index ea3958d..0000000
--- a/app/models/embeddings.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from __future__ import annotations
-
-from pathlib import Path
-from typing import Any
-
-import numpy as np
-from sentence_transformers import SentenceTransformer
-
-
-class EmbeddingsAdapter:
-    def __init__(
-        self,
-        model_path: str | Path | None = None,
-        model_name: str = "sentence-transformers/all-MiniLM-L6-v2",
-        embedding_dim: int = 384,
-    ) -> None:
-        self._embedding_dim = embedding_dim
-        if model_path and Path(model_path).exists():
-            self._model = SentenceTransformer(str(model_path))
-        else:
-            self._model = SentenceTransformer(model_name)
-
-    def encode(self, texts: str | list[str]) -> np.ndarray:
-        is_single = isinstance(texts, str)
-        if is_single:
-            texts = [texts]
-        embeddings = self._model.encode(texts, convert_to_numpy=True)
-        if is_single:
-            return embeddings[0]
-        return embeddings
-
-    def encode_batch(self, texts: list[str], batch_size: int = 32) -> np.ndarray:
-        return self._model.encode(texts, batch_size=batch_size, convert_to_numpy=True)
-
-    @property
-    def embedding_dim(self) -> int:
-        return self._embedding_dim
\ No newline at end of file
diff --git a/app/models/orchestrator.py b/app/models/orchestrator.py
deleted file mode 100644
index 0a7482d..0000000
--- a/app/models/orchestrator.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from __future__ import annotations
-
-from threading import RLock
-from typing import Any, Iterator
-from llama_cpp import Llama
-
-
-class OrchestratorAdapter:
-    def __init__(self, llm: Llama, system_prompt: str | None = None, lock: RLock | None = None) -> None:
-        self._llm = llm
-        self._lock = lock or RLock()
-        self._system_prompt = system_prompt or (
-            "You are an expert orchestrator for a local AI agent system. "
-            "Your role is to analyze the user's task, decide whether planning is needed."
-        )
-        self._temperature = 0.2
-
-    def generate(self, prompt: str, max_tokens: int | None = None) -> str:
-        messages = [
-            {"role": "system", "content": self._system_prompt},
-            {"role": "user", "content": prompt},
-        ]
-        with self._lock:
-            output = self._llm.create_chat_completion(
-                messages=messages,
-                max_tokens=max_tokens or 512,
-                temperature=self._temperature,
-            )
-        return output["choices"][0]["message"]["content"]
-
-    def stream(self, prompt: str, max_tokens: int | None = None) -> Iterator[str]:
-        messages = [
-            {"role": "system", "content": self._system_prompt},
-            {"role": "user", "content": prompt},
-        ]
-        with self._lock:
-            for chunk in self._llm.create_chat_completion(
-                messages=messages,
-                max_tokens=max_tokens or 512,
-                temperature=self._temperature,
-                stream=True,
-            ):
-                content = chunk["choices"][0].get("delta", {}).get("content")
-                if content:
-                    yield content
diff --git a/app/permissions/__init__.py b/app/permissions/__init__.py
deleted file mode 100644
index ad3e429..0000000
--- a/app/permissions/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""Permission and approval handling."""
-
diff --git a/app/permissions/approval_store.py b/app/permissions/approval_store.py
deleted file mode 100644
index 5b9ea42..0000000
--- a/app/permissions/approval_store.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from __future__ import annotations
-
-import sqlite3
-from pathlib import Path
-
-from app.core.contracts import PermissionDecision
-
-
-class SQLiteApprovalStore:
-    """Stores persistent user approval decisions."""
-
-    def __init__(self, db_path: str | Path) -> None:
-        self._db_path = Path(db_path)
-        self._db_path.parent.mkdir(parents=True, exist_ok=True)
-        self._initialize()
-
-    def save(self, decision: PermissionDecision) -> PermissionDecision:
-        with sqlite3.connect(self._db_path) as conn:
-            conn.execute(
-                """
-                INSERT OR REPLACE INTO approvals (action_type, pattern, decision, created_at)
-                VALUES (?, ?, ?, ?)
-                """,
-                (
-                    decision.action_type,
-                    decision.pattern,
-                    decision.decision,
-                    decision.created_at.isoformat(),
-                ),
-            )
-            conn.commit()
-        return decision
-
-    def load(self, action_type: str, pattern: str) -> PermissionDecision | None:
-        with sqlite3.connect(self._db_path) as conn:
-            row = conn.execute(
-                """
-                SELECT action_type, pattern, decision, created_at
-                FROM approvals
-                WHERE action_type = ? AND pattern = ?
-                """,
-                (action_type, pattern),
-            ).fetchone()
-        if not row:
-            return None
-        return PermissionDecision(
-            action_type=row[0],
-            pattern=row[1],
-            decision=row[2],
-            created_at=row[3],
-        )
-
-    def _initialize(self) -> None:
-        with sqlite3.connect(self._db_path) as conn:
-            conn.execute(
-                """
-                CREATE TABLE IF NOT EXISTS approvals (
-                    action_type TEXT NOT NULL,
-                    pattern TEXT NOT NULL,
-                    decision TEXT NOT NULL,
-                    created_at TEXT NOT NULL,
-                    PRIMARY KEY (action_type, pattern)
-                )
-                """
-            )
-            conn.commit()
-
diff --git a/app/runtime/__init__.py b/app/runtime/__init__.py
deleted file mode 100644
index b2327dd..0000000
--- a/app/runtime/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""Runtime loop and execution coordination."""
-
diff --git a/app/runtime/async_runtime_loop.py b/app/runtime/async_runtime_loop.py
deleted file mode 100644
index 77196e8..0000000
--- a/app/runtime/async_runtime_loop.py
+++ /dev/null
@@ -1,148 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-from app.core.context_builder import ContextBuilder
-from app.core.contracts import ExecutionDirective, PermissionDecision, PermissionRequest, RuntimeEvent, TaskCheckpoint, UserTask
-from app.core.execution_engine import ExecutionEngine
-from app.core.async_router import AsyncRouter
-from app.events.event_bus import EventBus
-from app.events.event_types import CHECKPOINT_SAVED, CONTEXT_BUILT, TASK_AWAITING_PERMISSION, TASK_COMPLETED, TASK_FAILED, TASK_RECEIVED
-from app.core.permission_service import PermissionService
-from app.state.checkpoint_store import SQLiteCheckpointStore
-from app.state.task_state_store import SQLiteTaskStateStore
-
-
-class AsyncRuntimeLoop:
-    """Async runtime loop using LLM orchestrator."""
-
-    def __init__(
-        self,
-        event_bus: EventBus,
-        task_state_store: SQLiteTaskStateStore,
-        checkpoint_store: SQLiteCheckpointStore,
-        context_builder: ContextBuilder,
-        router: AsyncRouter,
-        execution_engine: ExecutionEngine,
-        permission_service: PermissionService,
-        memory_interface=None,
-    ) -> None:
-        self._event_bus = event_bus
-        self._task_state_store = task_state_store
-        self._checkpoint_store = checkpoint_store
-        self._context_builder = context_builder
-        self._router = router
-        self._execution_engine = execution_engine
-        self._permission_service = permission_service
-        self._memory_interface = memory_interface
-
-    async def run_task(self, task: UserTask) -> dict[str, object]:
-        state = self._task_state_store.create_task(
-            task.task_id,
-            {
-                "status": "received",
-                "session_id": task.session_id,
-                "plan": None,
-                "task_input": task.input,
-                "task_context": task.context,
-            },
-        )
-        self._publish(task, TASK_RECEIVED, {"status": "received"})
-
-        checkpoint = TaskCheckpoint(task_id=task.task_id, status="received")
-        self._checkpoint_store.save(checkpoint)
-        self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json"))
-
-        context = self._context_builder.build(task=task, checkpoint=checkpoint)
-        self._publish(task, CONTEXT_BUILT, {"keys": sorted(context.keys())})
-
-        directive = await self._router.decide(state=state, context=context, task_id=task.task_id, session_id=task.session_id)
-        
-        execution_result = await asyncio.to_thread(
-            self._execution_engine.execute,
-            task=task,
-            directive=directive,
-        )
-        
-        state_patch = {"status": execution_result["status"], "last_directive": directive.model_dump(mode="json")}
-        
-        if execution_result["status"] == "awaiting_permission":
-            state_patch["pending_permission_request"] = execution_result["result"].get("permission_request")
-        
-        self._task_state_store.update_task(task.task_id, state_patch)
-        
-        status = execution_result["status"]
-        
-        if status == "completed":
-            self._publish(task, TASK_COMPLETED, {"directive": directive.model_dump(mode="json"), "execution_result": execution_result["result"]})
-        elif status == "failed":
-            self._publish(task, TASK_FAILED, {"error": execution_result.get("result", {}).get("error")})
-        
-        checkpoint.status = status
-        self._checkpoint_store.save(checkpoint)
-        self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json"))
-
-        # Save task and result to memory for session context
-        self._save_to_memory(task, execution_result, status)
-
-        return {
-            "task_id": task.task_id,
-            "status": status,
-            "directive": directive.model_dump(mode="json"),
-            "result": execution_result.get("result"),
-            "events": list(self._event_bus.get_task_events(task.task_id)),
-        }
-
-    def _publish(self, task: UserTask, event_type: str, payload: dict) -> None:
-        if not self._event_bus:
-            return
-        event = RuntimeEvent(
-            task_id=task.task_id,
-            session_id=task.session_id,
-            sequence=self._event_bus.next_sequence(task.task_id),
-            type=event_type,
-            payload=payload,
-        )
-        self._event_bus.publish(event)
-
-    def _save_to_memory(self, task: UserTask, execution_result: dict, status: str) -> None:
-        """Save task input and result to memory for session context."""
-        if not self._memory_interface:
-            return
-
-        try:
-            # Save task input as summary
-            self._memory_interface.insert(
-                text=f"User request: {task.input}",
-                kind="summary",
-                source="user",
-                task_id=task.task_id,
-                session_id=task.session_id,
-                weight=0.8,
-                metadata={"status": status},
-            )
-
-            # Save execution result
-            result_text = ""
-            if status == "completed":
-                step_results = execution_result.get("result", {}).get("step_results", [])
-                if step_results:
-                    for step in step_results:
-                        tool_result = step.get("result", {}).get("result", {})
-                        if tool_result.get("output"):
-                            result_text += f" | {step.get('step_id')}: {tool_result.get('output')[:200]}"
-            elif status == "failed":
-                result_text = f" | Error: {execution_result.get('result', {}).get('error', 'Unknown')}"
-
-            if result_text:
-                self._memory_interface.insert(
-                    text=f"Result: {status}{result_text}",
-                    kind="tool_result",
-                    source="system",
-                    task_id=task.task_id,
-                    session_id=task.session_id,
-                    weight=0.7,
-                    metadata={"status": status},
-                )
-        except Exception as e:
-            import logging
-            logging.getLogger(__name__).warning(f"Failed to save to memory: {e}")
\ No newline at end of file
diff --git a/app/runtime/runtime_controller.py b/app/runtime/runtime_controller.py
deleted file mode 100644
index 47eb065..0000000
--- a/app/runtime/runtime_controller.py
+++ /dev/null
@@ -1,643 +0,0 @@
-from __future__ import annotations
-
-import json
-from concurrent.futures import Future, ThreadPoolExecutor
-from threading import RLock
-from pathlib import Path
-
-from app.core.config import AppConfig, load_app_config
-from app.core.context_builder import ContextBuilder
-from app.core.command_analyzer import CommandAnalyzer
-from app.core.contracts import UserTask
-from app.core.execution_engine import ExecutionEngine
-from app.core.execution_scheduler import ExecutionScheduler
-from app.core.async_router import AsyncRouter
-from app.events.event_bus import EventBus
-from app.events.event_store import SQLiteEventStore
-from app.memory import MemoryInterface, MemoryStore, VectorIndex
-from app.memory.recall import MemoryRecallService
-from app.memory.write_policy import MemoryWritePolicy
-from app.models import (
-    CoderAdapter,
-    CriticAdapter,
-    EmbeddingsAdapter,
-    OrchestratorAdapter,
-    create_adapter,
-)
-from app.models.async_adapters import AsyncOrchestratorAdapter, AsyncCriticAdapter, AsyncCoderAdapter
-from app.permissions.approval_store import SQLiteApprovalStore
-from app.core.permission_service import PermissionService
-from app.runtime.runtime_loop import RuntimeLoop
-from app.state.checkpoint_store import SQLiteCheckpointStore
-from app.state.task_state_store import SQLiteTaskStateStore
-from app.tools.file_read import FileReadTool
-from app.tools.file_write import FileWriteTool
-from app.tools.registry import ToolRegistry
-from app.tools.sandbox import ToolSandbox
-from app.tools.shell_exec import ShellExecTool
-from app.tools.memory_tools import MemoryInsertTool, MemorySearchTool, MemoryListTool
-
-
-class RuntimeController:
-    """Composition root for the ducklm runtime."""
-
-    def __init__(self, base_dir: str | Path | None = None) -> None:
-        self.base_dir = Path(base_dir or Path(__file__).resolve().parents[2])
-        self.config: AppConfig = load_app_config(self.base_dir / "config")
-
-        self.event_bus = EventBus(
-            SQLiteEventStore(self.base_dir / "data" / "events" / "events.sqlite3")
-        )
-        self.task_state_store = SQLiteTaskStateStore(
-            self.base_dir / "data" / "state" / "task_state.sqlite3"
-        )
-        self.checkpoint_store = SQLiteCheckpointStore(
-            self.base_dir / "data" / "state" / "checkpoints.sqlite3"
-        )
-        self.approval_store = SQLiteApprovalStore(
-            self.base_dir / "data" / "permissions" / "approvals.sqlite3"
-        )
-
-        self._thinker: OrchestratorAdapter | None = None
-        self._json_compiler: OrchestratorAdapter | None = None
-        self._orchestrator: OrchestratorAdapter | None = None
-        self._coder: CoderAdapter | None = None
-        self._critic: CriticAdapter | None = None
-        self._sys_util: OrchestratorAdapter | None = None
-        self._model_cache: dict[tuple[object, ...], tuple[object, RLock]] = {}
-        self._memory_interface: MemoryInterface | None = None
-        self._memory_policy: MemoryWritePolicy | None = None
-        self._background_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ducklm-task")
-        self._background_tasks: dict[str, Future[dict[str, object]]] = {}
-        self.tool_registry = None
-        self.tool_sandbox = None
-
-        self._init_models()
-        self._init_memory()
-
-        runtime_config = self.config.runtime
-
-        self.tool_sandbox = ToolSandbox(
-            allowed_root=self.base_dir,
-            timeout_ms=runtime_config.step_timeout_ms,
-            command_timeout_ms=runtime_config.shell_command_timeout_ms,
-            idle_timeout_ms=runtime_config.shell_idle_timeout_ms,
-        )
-
-        self.tool_registry = self._create_tool_registry()
-
-        context_config = {
-            "max_context_tokens": runtime_config.max_context_tokens,
-            "context_budgets": runtime_config.context_budgets,
-            "reserve_for_generation_pct": runtime_config.reserve_for_generation_pct,
-        }
-
-        self.context_builder = ContextBuilder(
-            memory_interface=self._memory_interface,
-            tool_registry=self.tool_registry,
-            config=context_config,
-        )
-
-        self._prompts = self._load_prompts()
-        # ensure sys_util prompt is present in prompts dict for router
-        # ensure sys_util prompt is available to router (prompts.json may have "sys_util" key)
-        if "sys_util" not in self._prompts and "prompts" in self.config:
-            self._prompts["sys_util"] = self.config.get("sys_util")
-
-        self.context_builder = ContextBuilder(
-            memory_interface=self._memory_interface,
-            tool_registry=self.tool_registry,
-            config=context_config,
-        )
-
-        self.router = AsyncRouter(
-            thinker=None,
-            json_compiler=None,
-            intent_parser=None,
-            prompts=self._prompts,
-            event_bus=self.event_bus,
-            tool_registry=self.tool_registry,
-            retry_limit=runtime_config.orchestrator_retry_limit,
-            debug=runtime_config.debug if hasattr(runtime_config, 'debug') else False,
-            log_length=runtime_config.debug_orchestrator_log_length if hasattr(runtime_config, 'debug_orchestrator_log_length') else 500,
-            json_fix_retry_limit=runtime_config.json_fix_retry_limit if hasattr(runtime_config, 'json_fix_retry_limit') else 2,
-            json_fix_use_sys_util=runtime_config.json_fix_use_sys_util if hasattr(runtime_config, "json_fix_use_sys_util") else True,
-            intent_classifier=runtime_config.intent_classifier if hasattr(runtime_config, "intent_classifier") else "thinker",
-        )
-
-        self.permission_service = PermissionService(
-            config=self._load_permissions_config(),
-        )
-        self.command_analyzer = CommandAnalyzer(self.permission_service)
-
-        self.execution_engine = ExecutionEngine(
-            event_bus=self.event_bus,
-            tool_registry=self.tool_registry,
-            permission_service=self.permission_service,
-            scheduler=ExecutionScheduler(
-                retry_limit=runtime_config.planner_retry_limit
-            ),
-            critic=self._critic,
-            memory_policy=self._memory_policy,
-            memory_interface=self._memory_interface,
-            prompts=self._prompts,
-            recovery_limit=runtime_config.tool_retry_limit,
-            critic_retry_limit=runtime_config.critic_retry_limit,
-            command_analyzer=self.command_analyzer,
-        )
-
-        self.runtime_loop = RuntimeLoop(
-            event_bus=self.event_bus,
-            task_state_store=self.task_state_store,
-            checkpoint_store=self.checkpoint_store,
-            context_builder=self.context_builder,
-            router=self.router,
-            execution_engine=self.execution_engine,
-            permission_service=self.permission_service,
-            memory_interface=self._memory_interface,
-        )
-
-    def _load_prompts(self) -> dict[str, str]:
-        prompts_dir = self.base_dir / "config" / "prompts"
-        prompts = {}
-
-        if prompts_dir.is_dir():
-            for md_file in prompts_dir.glob("*.md"):
-                role = md_file.stem
-                prompts[role] = md_file.read_text(encoding="utf-8")
-
-        if prompts:
-            return prompts
-
-        prompts_file = self.base_dir / "config" / "prompts.json"
-        if prompts_file.exists():
-            with open(prompts_file) as f:
-                return json.load(f)
-        return {}
-
-    def _load_permissions_config(self) -> dict:
-        permissions_file = self.base_dir / "config" / "permissions.json"
-        if not permissions_file.exists():
-            return {}
-        with permissions_file.open("r", encoding="utf-8") as handle:
-            return json.load(handle)
-
-    def _init_models(self) -> None:
-        try:
-            memory_config = self.config.runtime.memory_thresholds or {}
-            if memory_config:
-                self._memory_policy = MemoryWritePolicy(
-                    store_threshold=memory_config.get("default_store_weight", 0.8),
-                )
-            print("Models policy ready")
-        except Exception as e:
-            print(f"Models init failed: {e}")
-
-    def load_models_at_startup(self) -> None:
-        """Load all LLM models synchronously. Called from startup hook in executor."""
-        import os
-        os.chdir(str(self.base_dir / "models"))
-        
-        try:
-            print("Loading thinker model...")
-            thinker_config = self.config.models.thinker or {}
-            if thinker_config.get("path"):
-                llm, lock = self._get_or_create_llm("thinker", thinker_config)
-                self._thinker = OrchestratorAdapter(llm, system_prompt=self._prompts.get("thinker"), lock=lock)
-                print(f"Thinker loaded: {self._thinker} (model: {thinker_config.get('path')})")
-
-            print("Loading json_compiler model...")
-            compiler_config = self.config.models.json_compiler or {}
-            if compiler_config.get("path"):
-                llm, lock = self._get_or_create_llm("json_compiler", compiler_config)
-                self._json_compiler = OrchestratorAdapter(llm, system_prompt=self._prompts.get("json_compiler"), lock=lock)
-                print(f"JSON Compiler loaded: {self._json_compiler} (model: {compiler_config.get('path')})")
-
-            print("Loading coder model...")
-            coder_config = self.config.models.coder or {}
-            if coder_config.get("path"):
-                llm, lock = self._get_or_create_llm("coder", coder_config)
-                self._coder = CoderAdapter(llm, system_prompt=self._prompts.get("coder"), lock=lock)
-                print(f"Coder loaded: {self._coder} (model: {coder_config.get('path')})")
-
-            print("Loading critic model...")
-            critic_config = self.config.models.critic or {}
-            if critic_config.get("path"):
-                llm, lock = self._get_or_create_llm("critic", critic_config)
-                self._critic = CriticAdapter(llm, system_prompt=self._prompts.get("critic"), lock=lock)
-                print(f"Critic loaded: {self._critic} (model: {critic_config.get('path')})")
-
-            print("Loading sys_util model...")
-            sys_util_config = self.config.models.sys_util or {}
-            if sys_util_config.get("path"):
-                llm, lock = self._get_or_create_llm("sys_util", sys_util_config)
-                self._sys_util = OrchestratorAdapter(llm, system_prompt=self._prompts.get("sys_util"), lock=lock)
-                print(f"Sys_util loaded: {self._sys_util} (model: {sys_util_config.get('path')})")
-
-            print("All models loaded successfully")
-
-            async_thinker = AsyncOrchestratorAdapter(self._thinker) if self._thinker else None
-            async_compiler = AsyncOrchestratorAdapter(self._json_compiler) if self._json_compiler else None
-            async_coder = AsyncCoderAdapter(self._coder) if self._coder else None
-            async_critic = AsyncCriticAdapter(self._critic) if self._critic else None
-            async_sys_util = AsyncOrchestratorAdapter(self._sys_util) if self._sys_util else None
-
-            self.router.set_thinker(async_thinker)
-            self.router.set_json_compiler(async_compiler)
-            self.router.set_sys_util(async_sys_util)
-            self.router.set_tool_registry(self.tool_registry)
-            if async_critic:
-                self.execution_engine.set_critic(async_critic)
-            if async_coder:
-                self.execution_engine.set_coder(async_coder)
-
-            # Create MemoryRecallService using the configured model (default: sys_util)
-            # Reuses already-loaded async adapter — no duplicate model loading
-            recall_model_name = self.config.runtime.recall_model
-            recall_async_model = {
-                "sys_util": async_sys_util,
-                "thinker": async_thinker,
-                "json_compiler": async_compiler,
-                "critic": async_critic,
-                "coder": async_coder,
-            }.get(recall_model_name, async_sys_util)
-
-            self._recall_service = MemoryRecallService(
-                memory_interface=self._memory_interface,
-                recall_model=recall_async_model,
-            )
-            self.runtime_loop.set_recall_service(self._recall_service)
-            print(f"MemoryRecallService initialized with model: {recall_model_name}")
-
-            # Set memory policy in runtime loop
-            self.runtime_loop.set_memory_policy(self._memory_policy)
-            print(f"MemoryWritePolicy set: {self._memory_policy is not None}")
-
-        except Exception as e:
-            print(f"Failed to load models at startup: {e}")
-            raise RuntimeError(f"Model loading failed: {e}") from e
-
-    def _model_cache_key(self, model_config: dict) -> tuple[object, ...]:
-        path = str((self.base_dir / "models" / model_config.get("path", "")).resolve())
-        return (
-            path,
-            model_config.get("backend", "cpu"),
-            model_config.get("n_gpu_layers", 0),
-            model_config.get("n_ctx", 4096),
-        )
-
-    def _get_or_create_llm(self, model_type: str, model_config: dict):
-        key = self._model_cache_key(model_config)
-        cached = self._model_cache.get(key)
-        if cached:
-            print(f"Reusing model instance: {model_config.get('path')} for {model_type}")
-            return cached
-
-        llm = create_adapter(model_type, model_config, self.base_dir / "models")
-        lock = RLock()
-        cached = (llm, lock)
-        self._model_cache[key] = cached
-        return cached
-
-    def _init_memory(self) -> None:
-        try:
-            emb_config = self.config.models.embeddings or {}
-            model_path = self.base_dir / emb_config.get("path", "models/all-MiniLM-L6-v2")
-            if not model_path.exists() and not Path(emb_config.get("path", "")).is_absolute():
-                model_path = self.base_dir / "models" / emb_config.get("path", "all-MiniLM-L6-v2")
-            if not model_path.exists():
-                print(f"Memory init skipped: embeddings model not found at {model_path}")
-                self._memory_interface = None
-                return
-            embeddings = EmbeddingsAdapter(
-                model_path=model_path,
-                embedding_dim=emb_config.get("embedding_dim", 384),
-            )
-
-            store = MemoryStore(
-                self.base_dir / "data" / "memory" / "memory.sqlite3"
-            )
-            vector_index = VectorIndex(
-                index_path=self.base_dir / "data" / "memory" / "index.bin",
-                embedding_dim=embeddings.embedding_dim,
-            )
-
-            self._memory_interface = MemoryInterface(store, vector_index, embeddings)
-
-        except Exception as e:
-            print(f"Memory init failed: {e}")
-            self._memory_interface = None
-
-    def _create_tool_registry(self) -> ToolRegistry:
-        from app.tools.registry import ToolRegistry
-        from app.tools.plugins.shell_exec import Tool as ShellExecTool
-        from app.tools.plugins.file_read import Tool as FileReadTool
-        from app.tools.plugins.file_write import Tool as FileWriteTool
-        from app.tools.plugins.memory_tools import Tool as MemoryTool
-        from app.tools.discover import ToolDiscovery
-
-        registry = ToolRegistry()
-
-        tool_init_map = {
-            "shell_exec": lambda m: ShellExecTool(self.tool_sandbox),
-            "file_read": lambda m: FileReadTool(self.tool_sandbox),
-            "file_write": lambda m: FileWriteTool(self.tool_sandbox),
-            "memory": lambda m: MemoryTool(self._memory_interface),
-        }
-
-        discovery = ToolDiscovery()
-        discovered = discovery.discover()
-
-        for name, data in discovered.items():
-            init_fn = tool_init_map.get(name)
-            if init_fn:
-                tool = init_fn(data.get("manifest", {}))
-                registry.register(tool)
-                registry._schemas[name] = {
-                    "description": data.get("manifest", {}).get("description", ""),
-                    "args_schema": data.get("manifest", {}).get("args_schema", {}),
-                    "requires_permission": data.get("manifest", {}).get("requires_permission", False),
-                }
-                print(f"Registered tool: {name}")
-            else:
-                print(f"No init mapping for tool: {name} - skipping")
-
-        return registry
-
-    @property
-    def orchestrator(self) -> OrchestratorAdapter | None:
-        return self._orchestrator
-
-    @property
-    def coder(self) -> CoderAdapter | None:
-        return self._coder
-
-    @property
-    def critic(self) -> CriticAdapter | None:
-        return self._critic
-
-    @property
-    def memory_interface(self) -> MemoryInterface | None:
-        return self._memory_interface
-
-    def _ensure_orchestrator(self) -> OrchestratorAdapter | None:
-        if self._orchestrator is not None:
-            return self._orchestrator
-        try:
-            orch_config = self.config.models.orchestrator or {}
-            if orch_config.get("path"):
-                llm, lock = self._get_or_create_llm("orchestrator", orch_config)
-                self._orchestrator = OrchestratorAdapter(llm, lock=lock)
-        except Exception as e:
-            print(f"Orchestrator load failed: {e}")
-        return self._orchestrator
-
-    def _ensure_critic(self) -> CriticAdapter | None:
-        if self._critic is not None:
-            return self._critic
-        try:
-            critic_config = self.config.models.critic or {}
-            if critic_config.get("path"):
-                llm, lock = self._get_or_create_llm("critic", critic_config)
-                self._critic = CriticAdapter(llm, lock=lock)
-        except Exception as e:
-            print(f"Critic load failed: {e}")
-        return self._critic
-
-    def handle_task(self, task: UserTask) -> dict[str, object]:
-        return self.runtime_loop.run_task(task)
-
-    def submit_task(self, task: UserTask) -> dict[str, object]:
-        self._background_tasks[task.task_id] = self._background_executor.submit(
-            self.handle_task,
-            task,
-        )
-        return {"task_id": task.task_id, "status": "accepted"}
-
-    def resolve_permission(self, task_id: str, decision: str) -> dict[str, object]:
-        return self.runtime_loop.resolve_permission(
-            task_id=task_id, decision=decision
-        )
-
-    def submit_permission_resolution(self, task_id: str, decision: str) -> dict[str, object]:
-        if not self.task_state_store.get_task(task_id):
-            return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
-        self._background_tasks[task_id] = self._background_executor.submit(
-            self.resolve_permission,
-            task_id,
-            decision,
-        )
-        return {"task_id": task_id, "status": "accepted"}
-
-    def resolve_secret(self, task_id: str, secret: str) -> dict[str, object]:
-        return self.runtime_loop.resolve_secret(
-            task_id=task_id, secret=secret
-        )
-
-    def submit_secret_resolution(self, task_id: str, secret: str) -> dict[str, object]:
-        if not self.task_state_store.get_task(task_id):
-            return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
-        self._background_tasks[task_id] = self._background_executor.submit(
-            self.resolve_secret,
-            task_id,
-            secret,
-        )
-        return {"task_id": task_id, "status": "accepted"}
-
-    def resolve_password(self, task_id: str, password: str) -> dict[str, object]:
-        return self.runtime_loop.resolve_password(
-            task_id=task_id, password=password
-        )
-
-    def resolve_review(self, task_id: str, decision: str, correction: str | None = None) -> dict[str, object]:
-        return self.runtime_loop.resolve_review(
-            task_id=task_id,
-            decision=decision,
-            correction=correction,
-        )
-
-    def submit_review_resolution(self, task_id: str, decision: str, correction: str | None = None) -> dict[str, object]:
-        if not self.task_state_store.get_task(task_id):
-            return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
-        self._background_tasks[task_id] = self._background_executor.submit(
-            self.resolve_review,
-            task_id,
-            decision,
-            correction,
-        )
-        return {"task_id": task_id, "status": "accepted"}
-
-    def submit_password_resolution(self, task_id: str, password: str) -> dict[str, object]:
-        if not self.task_state_store.get_task(task_id):
-            return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
-        self._background_tasks[task_id] = self._background_executor.submit(
-            self.resolve_password,
-            task_id,
-            password,
-        )
-        return {"task_id": task_id, "status": "accepted"}
-
-    def handle_critic_feedback(
-        self,
-        feedback: str,
-        task_id: str | None = None,
-        session_id: str | None = None,
-        feedback_type: str | None = None,
-        severity: str | None = None,
-        correction: str | None = None,
-        remember: bool = True,
-        retry: bool = False,
-        assistant_answer: str | None = None,
-        correctness_override: float | None = None,
-        usefulness_override: float | None = None,
-        safety_override: float | None = None,
-    ) -> dict[str, object]:
-        target_task_id = task_id
-        target_session_id = session_id
-
-        if not target_session_id and not target_task_id:
-            return {
-                "status": "error",
-                "message": "Either task_id or session_id must be provided",
-            }
-
-        state = self.task_state_store.get_task(target_task_id) if target_task_id else None
-        if not target_session_id and state:
-            target_session_id = state.get("session_id")
-
-        if not target_task_id and target_session_id:
-            recent_tasks = self.task_state_store.get_session_tasks(target_session_id, limit=1)
-            if recent_tasks:
-                target_task_id = recent_tasks[0]["task_id"]
-
-        min_weight = 0.3
-        max_weight = 0.95
-        user_weight = 0.9
-
-        final_weight = max(min_weight, min(max_weight, user_weight))
-
-        task_input = state.get("task_input") if state else None
-        last_directive = state.get("last_directive") if state else None
-        feedback_type = feedback_type or "other"
-        severity = severity or "major"
-
-        lesson = self._build_feedback_lesson(
-            feedback_type=feedback_type,
-            severity=severity,
-            feedback=feedback,
-            correction=correction,
-            task_input=task_input,
-        )
-
-        metadata = {
-            "feedback_text": feedback,
-            "feedback_type": feedback_type,
-            "severity": severity,
-            "correction": correction,
-            "assistant_answer": assistant_answer,
-            "task_input": task_input,
-            "last_directive": last_directive,
-            "overrides": {
-                "correctness": correctness_override,
-                "usefulness": usefulness_override,
-                "safety": safety_override,
-            },
-            "source": "user",
-        }
-
-        feedback_text = lesson
-        if correctness_override is not None:
-            feedback_text += f" | Correctness corrected to: {correctness_override}"
-        if usefulness_override is not None:
-            feedback_text += f" | Usefulness corrected to: {usefulness_override}"
-        if safety_override is not None:
-            feedback_text += f" | Safety corrected to: {safety_override}"
-
-        retry_result = None
-        stored = False
-        store_error = None
-        try:
-            if remember and self._memory_interface:
-                self._memory_interface.insert(
-                    text=feedback_text,
-                    kind="critique",
-                    source="user",
-                    task_id=target_task_id,
-                    session_id=target_session_id,
-                    weight=final_weight,
-                    metadata=metadata,
-                )
-                stored = True
-            elif remember and not self._memory_interface:
-                store_error = "Memory not available"
-        except Exception as e:
-            store_error = str(e)
-
-        if retry and task_input:
-            retry_input = self._build_retry_input(
-                task_input=task_input,
-                feedback=feedback,
-                feedback_type=feedback_type,
-                correction=correction,
-            )
-            retry_task = UserTask(
-                session_id=target_session_id or "feedback-retry",
-                input=retry_input,
-                context={
-                    "feedback_retry": True,
-                    "original_task_id": target_task_id,
-                    "feedback_type": feedback_type,
-                    "severity": severity,
-                    "correction": correction,
-                },
-            )
-            retry_result = self.handle_task(retry_task)
-
-        status = "ok" if stored or not remember else "error"
-        return {
-            "status": status,
-            "message": "Feedback saved" if stored else (store_error or "Feedback accepted"),
-            "stored": stored,
-            "task_id": target_task_id,
-            "session_id": target_session_id,
-            "lesson": lesson,
-            "retry_result": retry_result,
-        }
-
-    def _build_feedback_lesson(
-        self,
-        feedback_type: str,
-        severity: str,
-        feedback: str,
-        correction: str | None,
-        task_input: str | None,
-    ) -> str:
-        parts = [
-            "User critique lesson.",
-            f"Error type: {feedback_type}.",
-            f"Severity: {severity}.",
-        ]
-        if task_input:
-            parts.append(f"Original task: {task_input}")
-        if feedback:
-            parts.append(f"What was wrong: {feedback}")
-        if correction:
-            parts.append(f"Preferred correction: {correction}")
-        return " | ".join(parts)
-
-    def _build_retry_input(
-        self,
-        task_input: str,
-        feedback: str,
-        feedback_type: str,
-        correction: str | None,
-    ) -> str:
-        retry_input = (
-            f"Повтори задачу с учетом обратной связи.\n"
-            f"Исходная задача: {task_input}\n"
-            f"Тип ошибки: {feedback_type}\n"
-            f"Что было неверно: {feedback}\n"
-        )
-        if correction:
-            retry_input += f"Как должно быть: {correction}\n"
-        return retry_input
diff --git a/app/runtime/runtime_loop.py b/app/runtime/runtime_loop.py
deleted file mode 100644
index 29d00c7..0000000
--- a/app/runtime/runtime_loop.py
+++ /dev/null
@@ -1,688 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-
-from app.core.context_builder import ContextBuilder
-from app.core.contracts import CriticScore, ExecutionDirective, PermissionDecision, PermissionRequest, RuntimeEvent, SecretRequest, TaskCheckpoint, UserTask
-from app.core.execution_engine import ExecutionEngine
-from app.core.async_router import AsyncRouter
-from app.events.event_bus import EventBus
-from app.events.event_types import CHECKPOINT_SAVED, CONTEXT_BUILT, MEMORY_RECALL_USED, MEMORY_WRITE_DECIDED, REVIEW_RESOLVED, TASK_AWAITING_INPUT, TASK_AWAITING_PERMISSION, TASK_AWAITING_REVIEW, TASK_COMPLETED, TASK_FAILED, TASK_RECEIVED
-from app.core.permission_service import PermissionService
-from app.memory.recall import MemoryRecallService
-from app.memory.write_policy import MemoryWritePolicy
-from app.state.checkpoint_store import SQLiteCheckpointStore
-from app.state.task_state_store import SQLiteTaskStateStore
-
-
-def _build_response_directive(execution_result: dict) -> dict | None:
-    """Build a response_directive from step_results or direct output for the client."""
-    result = execution_result.get("result", {})
-
-    # Case 1: step_results from plan execution
-    step_results = result.get("step_results")
-    if step_results:
-        response_parts = []
-        for step in step_results:
-            result_data = step.get("result", {})
-            tool_result = result_data.get("result", result_data)
-            if tool_result.get("ok") and tool_result.get("output"):
-                response_parts.append(str(tool_result["output"]))
-        if response_parts:
-            response_text = "\n\n".join(response_parts)
-            return ExecutionDirective(
-                type="respond", payload={"text": response_text}
-            ).model_dump(mode="json")
-
-    # Case 2: direct tool output (e.g. from resolve_secret -> execute_tool)
-    if result.get("ok") and result.get("output"):
-        return ExecutionDirective(
-            type="respond", payload={"text": str(result["output"])}
-        ).model_dump(mode="json")
-
-    return None
-
-
-class RuntimeLoop:
-    """Central control loop skeleton coordinating task state and events."""
-
-    def __init__(
-        self,
-        event_bus: EventBus,
-        task_state_store: SQLiteTaskStateStore,
-        checkpoint_store: SQLiteCheckpointStore,
-        context_builder: ContextBuilder,
-        router: AsyncRouter,
-        execution_engine: ExecutionEngine,
-        permission_service: PermissionService,
-        memory_interface=None,
-        recall_service: MemoryRecallService | None = None,
-        memory_policy: MemoryWritePolicy | None = None,
-    ) -> None:
-        self._event_bus = event_bus
-        self._task_state_store = task_state_store
-        self._checkpoint_store = checkpoint_store
-        self._context_builder = context_builder
-        self._router = router
-        self._execution_engine = execution_engine
-        self._permission_service = permission_service
-        self._memory_interface = memory_interface
-        self._recall_service = recall_service
-        self._memory_policy = memory_policy
-
-    def set_recall_service(self, recall_service: MemoryRecallService) -> None:
-        self._recall_service = recall_service
-
-    def set_memory_policy(self, policy: MemoryWritePolicy | None) -> None:
-        self._memory_policy = policy
-
-    def run_task(self, task: UserTask) -> dict[str, object]:
-        # Check input for hard-stop commands BEFORE processing
-        hard_stop_check = self._permission_service.check_shell_command(
-            task_id=task.task_id,
-            session_id=task.session_id,
-            command=task.input,
-        )
-        if hard_stop_check.get("decision") == "hard_stop":
-            # Immediately reject hard-stop commands
-            self._publish(task, TASK_RECEIVED, {"status": "received"})
-            checkpoint = TaskCheckpoint(task_id=task.task_id, status="received")
-            self._checkpoint_store.save(checkpoint)
-            self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json"))
-            
-            error_msg = f"⚠️ BLOCKED: {hard_stop_check.get('reason', 'Hard stop command')}"
-            self._publish(task, TASK_FAILED, {
-                "directive": {},
-                "execution_result": {"error": error_msg},
-            })
-            return {
-                "task_id": task.task_id,
-                "status": "failed",
-                "directive": {},
-                "result": {"error": error_msg},
-                "events": [e.model_dump(mode="json") for e in self._event_bus.list_for_task(task.task_id)],
-            }
-        
-        state = self._task_state_store.create_task(
-            task.task_id,
-            {
-                "status": "received",
-                "session_id": task.session_id,
-                "plan": None,
-                "task_input": task.input,
-                "task_context": task.context,
-            },
-        )
-        self._publish(task, TASK_RECEIVED, {"status": "received"})
-
-        checkpoint = TaskCheckpoint(task_id=task.task_id, status="received")
-        self._checkpoint_store.save(checkpoint)
-        self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json"))
-
-        context = self._context_builder.build(task=task, checkpoint=checkpoint)
-        self._publish(task, CONTEXT_BUILT, {"keys": sorted(context.keys())})
-
-        # Active memory recall: system decides if it needs to search memory
-        recall_result = asyncio.run(self._run_recall(task))
-        if recall_result["should_recall"]:
-            context["memory_recall"] = {
-                "query": recall_result["query"],
-                "summary": recall_result["summary"],
-                "entries": [
-                    {"text": e.text, "kind": e.kind, "weight": e.weight}
-                    for e in recall_result["results"]
-                ],
-            }
-            self._publish(task, MEMORY_RECALL_USED, {
-                "query": recall_result["query"],
-                "results_count": len(recall_result["results"]),
-                "reason": recall_result["reason"],
-            })
-
-        directive = asyncio.run(
-            self._router.decide(state=state, context=context, task_id=task.task_id, session_id=task.session_id)
-        )
-        execution_result = self._execution_engine.execute(task=task, directive=directive)
-        state_patch = {"status": execution_result["status"], "last_directive": directive.model_dump(mode="json")}
-        if execution_result["status"] == "awaiting_permission":
-            state_patch["pending_permission_request"] = execution_result["result"]["permission_request"]
-            state_patch["pending_secret_request"] = None
-            state_patch["resolved_permission_decision"] = None
-        elif execution_result["status"] == "awaiting_input":
-            state_patch["pending_permission_request"] = None
-            state_patch["pending_secret_request"] = execution_result["result"]["secret_request"]
-            state_patch["resolved_permission_decision"] = None
-        elif execution_result["status"] == "awaiting_password":
-            state_patch["pending_permission_request"] = None
-            state_patch["pending_secret_request"] = None
-            state_patch["resolved_permission_decision"] = None
-            state_patch["pending_password_request"] = {
-                "command": execution_result["result"].get("command", ""),
-                "reason": "Permission denied - требуется sudo пароль",
-                "attempts": 0,
-            }
-        elif execution_result["status"] == "awaiting_review":
-            state_patch["pending_permission_request"] = None
-            state_patch["pending_secret_request"] = None
-            state_patch["resolved_permission_decision"] = None
-            state_patch["pending_review"] = execution_result["result"]["review"]
-        else:
-            state_patch["pending_permission_request"] = None
-            state_patch["pending_secret_request"] = None
-            state_patch["resolved_permission_decision"] = None
-            state_patch["pending_review"] = None
-        self._task_state_store.update_task(task.task_id, state_patch)
-        final_status = str(execution_result["status"])
-        
-        # For awaiting states - do NOT mark task as completed, keep it in pending state
-        if final_status in ("awaiting_permission", "awaiting_input", "awaiting_password", "awaiting_review"):
-            # Task stays in pending state, don't update to completed
-            pass
-        else:
-            self._task_state_store.update_task(task.task_id, {"status": final_status})
-
-        final_checkpoint = TaskCheckpoint(
-            task_id=task.task_id,
-            status=final_status,
-            context_snapshot=context,
-        )
-        self._checkpoint_store.save(final_checkpoint)
-        
-        # Generate response for user
-        # Case 1: step_results from plan execution
-        if final_status == "completed" and execution_result.get("result", {}).get("step_results"):
-            step_results = execution_result["result"]["step_results"]
-            response_parts = []
-            for step in step_results:
-                result_data = step.get("result", {})
-                tool_result = result_data.get("result", result_data)
-                if tool_result.get("ok") and tool_result.get("output"):
-                    response_parts.append(tool_result["output"])
-            if response_parts:
-                response_text = "\n\n".join(response_parts)
-                execution_result["response_directive"] = ExecutionDirective(
-                    type="respond", payload={"text": response_text}
-                ).model_dump(mode="json")
-
-        # Case 2: respond directive from orchestrator (direct response, no steps)
-        if final_status == "completed" and not execution_result.get("response_directive"):
-            # Use the original directive from router.decide()
-            if hasattr(directive, "type") and directive.type == "respond":
-                if directive.payload.get("text"):
-                    execution_result["response_directive"] = directive.model_dump(mode="json")
-            elif isinstance(directive, dict) and directive.get("type") == "respond":
-                if directive.get("payload", {}).get("text"):
-                    execution_result["response_directive"] = directive
-        
-        # Map status to terminal event type
-        if final_status == "completed":
-            terminal_event_type = TASK_COMPLETED
-        elif final_status == "failed":
-            terminal_event_type = TASK_FAILED
-        elif final_status == "awaiting_permission":
-            terminal_event_type = TASK_AWAITING_PERMISSION
-        elif final_status == "awaiting_input":
-            terminal_event_type = TASK_AWAITING_INPUT
-        elif final_status == "awaiting_review":
-            terminal_event_type = TASK_AWAITING_REVIEW
-        elif final_status == "awaiting_password":
-            terminal_event_type = TASK_AWAITING_PERMISSION
-        else:
-            terminal_event_type = TASK_FAILED
-        self._publish(
-            task,
-            terminal_event_type,
-            {
-                "directive": directive.model_dump(mode="json"),
-                "execution_result": execution_result["result"],
-            },
-        )
-
-        # Save task and result to memory for session context
-        self._save_to_memory(task, execution_result, final_status)
-
-        return {
-            "task_id": task.task_id,
-            "status": final_status,
-            "directive": directive.model_dump(mode="json"),
-            "result": {
-                **execution_result["result"],
-                "response_directive": execution_result.get("response_directive"),
-            },
-            "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
-        }
-
-    def resolve_permission(self, task_id: str, decision: str) -> dict[str, object]:
-        state = self._task_state_store.get_task(task_id)
-        if not state:
-            return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
-
-        pending_request_payload = state.get("pending_permission_request")
-        last_directive_payload = state.get("last_directive")
-        if not pending_request_payload or not last_directive_payload:
-            return {"task_id": task_id, "status": "failed", "result": {"error": "No pending permission request"}}
-
-        task = UserTask(
-            task_id=task_id,
-            session_id=state["session_id"],
-            input=state["task_input"],
-            context=state.get("task_context", {}),
-        )
-        # Get command from pending request
-        command = pending_request_payload.get("command", "")
-        
-        # Resolve permission using new service
-        resolved = self._permission_service.resolve_permission(
-            task_id=task_id,
-            session_id=state["session_id"],
-            command=command,
-            decision=decision,
-        )
-
-        if decision == "deny":
-            execution_result = {
-                "status": "failed",
-                "result": {
-                    "error": "Permission denied by user.",
-                    "permission_decision": resolved,
-                },
-            }
-        elif decision == "allow_with_password":
-            directive = ExecutionDirective.model_validate(last_directive_payload)
-            self._task_state_store.update_task(
-                task.task_id,
-                {
-                    "status": "awaiting_password",
-                    "pending_password_request": {
-                        "command": command,
-                        "reason": pending_request_payload.get("reason", "Требуется пароль для выполнения команды"),
-                        "attempts": 0,
-                    },
-                    "pending_permission_request": None,
-                },
-            )
-            self._publish(task, TASK_AWAITING_PERMISSION, {
-                "password_required": True,
-                "command": command,
-            })
-            return {
-                "task_id": task_id,
-                "status": "awaiting_password",
-                "result": {"message": "Требуется ввод пароля"},
-            }
-        else:
-            directive = ExecutionDirective.model_validate(last_directive_payload)
-            execution_result = self._execution_engine.execute(
-                task=task,
-                directive=directive,
-            )
-
-        final_status = str(execution_result["status"])
-        if decision != "allow_with_password":
-            self._task_state_store.update_task(
-                task.task_id,
-                {
-                    "status": final_status,
-                    "pending_permission_request": None,
-                    "pending_secret_request": execution_result["result"].get("secret_request")
-                    if final_status == "awaiting_input"
-                    else None,
-                    "pending_review": execution_result["result"].get("review")
-                    if final_status == "awaiting_review"
-                    else None,
-                    "resolved_permission_decision": resolved,
-                },
-            )
-        checkpoint = TaskCheckpoint(task_id=task.task_id, status=final_status)
-        self._checkpoint_store.save(checkpoint)
-        self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json"))
-        if final_status == "completed":
-            terminal_event_type = TASK_COMPLETED
-        elif final_status == "awaiting_input":
-            terminal_event_type = TASK_AWAITING_INPUT
-        elif final_status == "awaiting_permission":
-            terminal_event_type = TASK_AWAITING_PERMISSION
-        elif final_status == "awaiting_review":
-            terminal_event_type = TASK_AWAITING_REVIEW
-        else:
-            terminal_event_type = TASK_FAILED
-        self._publish(
-            task,
-            terminal_event_type,
-            {
-                "permission_resolution": resolved.model_dump(mode="json") if hasattr(resolved, 'model_dump') else resolved,
-                "execution_result": execution_result["result"],
-            },
-        )
-
-        # Save to memory after permission resolution
-        self._save_to_memory(task, execution_result, final_status)
-
-        return {
-            "task_id": task.task_id,
-            "status": final_status,
-            "result": {
-                **execution_result["result"],
-                "response_directive": _build_response_directive(execution_result),
-            },
-            "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
-        }
-
-    def resolve_secret(self, task_id: str, secret: str) -> dict[str, object]:
-        state = self._task_state_store.get_task(task_id)
-        if not state:
-            return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
-        pending_secret_payload = state.get("pending_secret_request")
-        last_directive_payload = state.get("last_directive")
-        resolved_permission_payload = state.get("resolved_permission_decision")
-        if not pending_secret_payload or not last_directive_payload:
-            return {"task_id": task_id, "status": "failed", "result": {"error": "No pending secret request"}}
-        if not resolved_permission_payload:
-            return {"task_id": task_id, "status": "failed", "result": {"error": "No resolved permission available"}}
-
-        task = UserTask(
-            task_id=task_id,
-            session_id=state["session_id"],
-            input=state["task_input"],
-            context=state.get("task_context", {}),
-        )
-        _secret_request = SecretRequest.model_validate(pending_secret_payload)
-        directive = ExecutionDirective.model_validate(last_directive_payload)
-        execution_result = self._execution_engine.execute(
-            task=task,
-            directive=directive,
-            permission_override=None,
-            secret_override=secret,
-        )
-        final_status = str(execution_result["status"])
-        pending_review = execution_result["result"].get("review") if final_status == "awaiting_review" else None
-        pending_secret = execution_result["result"].get("secret_request") if final_status == "awaiting_input" else None
-        self._task_state_store.update_task(
-            task.task_id,
-            {
-                "status": final_status,
-                "pending_secret_request": pending_secret,
-                "resolved_permission_decision": resolved_permission_payload if final_status == "awaiting_input" else None,
-                "pending_review": pending_review,
-            },
-        )
-        checkpoint = TaskCheckpoint(task_id=task.task_id, status=final_status)
-        self._checkpoint_store.save(checkpoint)
-        self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json"))
-        if final_status == "completed":
-            terminal_event_type = TASK_COMPLETED
-        elif final_status == "awaiting_input":
-            terminal_event_type = TASK_AWAITING_INPUT
-        elif final_status == "awaiting_permission":
-            terminal_event_type = TASK_AWAITING_PERMISSION
-        elif final_status == "awaiting_review":
-            terminal_event_type = TASK_AWAITING_REVIEW
-        else:
-            terminal_event_type = TASK_FAILED
-        self._publish(
-            task,
-            terminal_event_type,
-            {
-                "secret_resolution": {"task_id": task_id},
-                "execution_result": execution_result["result"],
-            },
-        )
-        return {
-            "task_id": task.task_id,
-            "status": final_status,
-            "result": {
-                **execution_result["result"],
-                "response_directive": _build_response_directive(execution_result),
-            },
-            "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
-        }
-
-    def resolve_review(self, task_id: str, decision: str, correction: str | None = None) -> dict[str, object]:
-        state = self._task_state_store.get_task(task_id)
-        if not state:
-            return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
-        pending_review = state.get("pending_review")
-        if not pending_review:
-            return {"task_id": task_id, "status": "failed", "result": {"error": "No pending review"}}
-
-        task = UserTask(
-            task_id=task_id,
-            session_id=state["session_id"],
-            input=state["task_input"],
-            context={
-                **state.get("task_context", {}),
-                "previous_action_review": {
-                    "decision": decision,
-                    "correction": correction,
-                    "review": pending_review,
-                },
-            },
-        )
-        self._publish(task, REVIEW_RESOLVED, {
-            "decision": decision,
-            "correction": correction,
-            "review": pending_review,
-        })
-        if self._memory_interface:
-            try:
-                self._memory_interface.insert(
-                    text=f"User reviewed model action as {decision}. Correction: {correction or ''}. Review: {pending_review}",
-                    kind="critique",
-                    source="user",
-                    task_id=task_id,
-                    session_id=state["session_id"],
-                    weight=0.9 if decision == "wrong_action" else 0.5,
-                    metadata={"decision": decision, "review": pending_review},
-                )
-            except Exception:
-                pass
-        self._task_state_store.update_task(task_id, {"pending_review": None, "status": "replanning"})
-        return self.run_task(task)
-
-    def resolve_password(self, task_id: str, password: str) -> dict[str, object]:
-        state = self._task_state_store.get_task(task_id)
-        if not state:
-            return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}}
-
-        pending_password_payload = state.get("pending_password_request")
-        last_directive_payload = state.get("last_directive")
-        if not pending_password_payload or not last_directive_payload:
-            return {"task_id": task_id, "status": "failed", "result": {"error": "No pending password request"}}
-
-        current_attempt = pending_password_payload.get("attempts", 0) + 1
-
-        task = UserTask(
-            task_id=task_id,
-            session_id=state["session_id"],
-            input=state["task_input"],
-            context=state.get("task_context", {}),
-        )
-        directive = ExecutionDirective.model_validate(last_directive_payload)
-
-        execution_result = self._execution_engine.execute(
-            task=task,
-            directive=directive,
-            password_override=password,
-        )
-
-        final_status = str(execution_result["status"])
-
-        if final_status == "failed":
-            error_msg = execution_result.get("result", {}).get("error", "")
-            is_password_error = "permission denied" in error_msg.lower() or "incorrect password" in error_msg.lower()
-
-            if is_password_error and current_attempt < 3:
-                self._task_state_store.update_task(
-                    task.task_id,
-                    {
-                        "status": "awaiting_password",
-                        "pending_password_request": {
-                            "command": pending_password_payload.get("command"),
-                            "reason": pending_password_payload.get("reason"),
-                            "attempts": current_attempt,
-                        },
-                    },
-                )
-                self._publish(task, TASK_AWAITING_PERMISSION, {
-                    "password_attempt_failed": True,
-                    "attempts": current_attempt,
-                    "max_attempts": 3,
-                    "message": "Неверный пароль. Попробуйте снова.",
-                })
-                return {
-                    "task_id": task_id,
-                    "status": "awaiting_password",
-                    "result": {"error": "Неверный пароль", "attempts": current_attempt, "max_attempts": 3},
-                }
-            else:
-                self._task_state_store.update_task(
-                    task.task_id,
-                    {
-                        "status": "failed",
-                        "pending_password_request": None,
-                        "password_attempts": current_attempt,
-                    },
-                )
-                self._publish(task, TASK_FAILED, {
-                    "password_failed": True,
-                    "attempts": current_attempt,
-                    "message": "Неверный пароль (3 попытки). Передаю решение модели.",
-                    "execution_result": execution_result["result"],
-                })
-                return {
-                    "task_id": task_id,
-                    "status": "failed",
-                    "result": {
-                        "error": "Password failed after 3 attempts",
-                        "attempts": current_attempt,
-                        "message": "Пользователь 3 раза ввёл неверный пароль. Решение за вами.",
-                    },
-                }
-
-        self._task_state_store.update_task(
-            task.task_id,
-            {
-                "status": final_status,
-                "pending_password_request": None,
-            },
-        )
-        checkpoint = TaskCheckpoint(task_id=task.task_id, status=final_status)
-        self._checkpoint_store.save(checkpoint)
-        self._publish(task, TASK_COMPLETED, {"execution_result": execution_result["result"]})
-
-        # Save to memory after password resolution
-        self._save_to_memory(task, execution_result, final_status)
-
-        return {
-            "task_id": task.task_id,
-            "status": final_status,
-            "result": {
-                **execution_result["result"],
-                "response_directive": _build_response_directive(execution_result),
-            },
-            "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)],
-        }
-
-    def _publish(self, task: UserTask, event_type: str, payload: dict[str, object]) -> None:
-        event = RuntimeEvent(
-            task_id=task.task_id,
-            session_id=task.session_id,
-            sequence=self._event_bus.next_sequence(task.task_id),
-            type=event_type,
-            payload=payload,
-        )
-        self._event_bus.publish(event)
-
-    async def _run_recall(self, task: UserTask) -> dict:
-        """Run active memory recall before orchestration."""
-        if not self._recall_service:
-            return {"should_recall": False, "reason": "no_recall_service", "query": "", "results": [], "summary": ""}
-        try:
-            return await self._recall_service.recall(task_input=task.input)
-        except Exception as e:
-            return {"should_recall": False, "reason": f"recall_error: {e}", "query": "", "results": [], "summary": ""}
-
-    def _save_to_memory(self, task: UserTask, execution_result: dict, status: str) -> None:
-        """Save task input and result to memory for session context, using MemoryWritePolicy."""
-        if not self._memory_interface:
-            return
-
-        try:
-            # Build a synthetic critic_score for policy based on task status
-            # For summary/tool_result without real critic, we derive from execution outcome
-            if status == "completed":
-                synthetic_score = CriticScore(
-                    correctness=0.9, usefulness=0.8, safety=0.95,
-                    memory_store=True, weight=0.85, explanation="Task completed successfully"
-                )
-            elif status == "failed":
-                synthetic_score = CriticScore(
-                    correctness=0.2, usefulness=0.3, safety=0.7,
-                    memory_store=True, weight=0.5, explanation="Task failed — store for learning"
-                )
-            else:
-                synthetic_score = CriticScore(
-                    correctness=0.5, usefulness=0.5, safety=0.8,
-                    memory_store=False, weight=0.3, explanation=f"Status: {status}"
-                )
-
-            # Save task input as summary
-            decision = "store"
-            if self._memory_policy:
-                decision = self._memory_policy.decide(
-                    critic_score=synthetic_score,
-                    memory_type="summary",
-                    session_id=task.session_id,
-                )
-            if decision in ("store", "store_with_weight"):
-                weight = synthetic_score.weight if decision == "store_with_weight" else 0.8
-                self._memory_interface.insert(
-                    text=f"User request: {task.input}",
-                    kind="summary",
-                    source="user",
-                    task_id=task.task_id,
-                    session_id=task.session_id,
-                    weight=weight,
-                    metadata={"status": status, "policy_decision": decision},
-                )
-                self._publish(task, MEMORY_WRITE_DECIDED, {
-                    "kind": "summary", "decision": decision, "text_preview": task.input[:80]
-                })
-
-            # Save execution result
-            result_text = ""
-            if status == "completed":
-                step_results = execution_result.get("result", {}).get("step_results", [])
-                if step_results:
-                    for step in step_results:
-                        tool_result = step.get("result", {}).get("result", {})
-                        if tool_result.get("output"):
-                            result_text += f" | {step.get('step_id')}: {tool_result.get('output')[:200]}"
-            elif status == "failed":
-                result_text = f" | Error: {execution_result.get('result', {}).get('error', 'Unknown')}"
-
-            if result_text:
-                decision = "store"
-                if self._memory_policy:
-                    decision = self._memory_policy.decide(
-                        critic_score=synthetic_score,
-                        memory_type="tool_result",
-                        session_id=task.session_id,
-                    )
-                if decision in ("store", "store_with_weight"):
-                    weight = synthetic_score.weight if decision == "store_with_weight" else 0.7
-                    self._memory_interface.insert(
-                        text=f"Result: {status}{result_text}",
-                        kind="tool_result",
-                        source="system",
-                        task_id=task.task_id,
-                        session_id=task.session_id,
-                        weight=weight,
-                        metadata={"status": status, "policy_decision": decision},
-                    )
-                    self._publish(task, MEMORY_WRITE_DECIDED, {
-                        "kind": "tool_result", "decision": decision, "text_preview": result_text[:80]
-                    })
-        except Exception as e:
-            import logging
-            logging.getLogger(__name__).warning(f"Failed to save to memory: {e}")
diff --git a/app/services/__init__.py b/app/services/__init__.py
deleted file mode 100644
index 6f66849..0000000
--- a/app/services/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""Shared services."""
-
diff --git a/app/state/__init__.py b/app/state/__init__.py
deleted file mode 100644
index 5cc321c..0000000
--- a/app/state/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""Task state and checkpoints."""
-
diff --git a/app/state/checkpoint_store.py b/app/state/checkpoint_store.py
deleted file mode 100644
index 277d8b8..0000000
--- a/app/state/checkpoint_store.py
+++ /dev/null
@@ -1,75 +0,0 @@
-from __future__ import annotations
-
-import json
-import sqlite3
-from pathlib import Path
-
-from app.core.contracts import TaskCheckpoint
-
-
-class SQLiteCheckpointStore:
-    """Durable checkpoint store for resumable runtime state."""
-
-    def __init__(self, db_path: str | Path) -> None:
-        self._db_path = Path(db_path)
-        self._db_path.parent.mkdir(parents=True, exist_ok=True)
-        self._initialize()
-
-    def save(self, checkpoint: TaskCheckpoint) -> TaskCheckpoint:
-        with sqlite3.connect(self._db_path) as conn:
-            conn.execute(
-                """
-                INSERT OR REPLACE INTO checkpoints (
-                    task_id, status, active_step_id, plan_snapshot_json,
-                    context_snapshot_json, updated_at
-                ) VALUES (?, ?, ?, ?, ?, ?)
-                """,
-                (
-                    checkpoint.task_id,
-                    checkpoint.status,
-                    checkpoint.active_step_id,
-                    json.dumps(checkpoint.plan_snapshot, default=str),
-                    json.dumps(checkpoint.context_snapshot, default=str),
-                    checkpoint.updated_at.isoformat(),
-                ),
-            )
-            conn.commit()
-        return checkpoint
-
-    def load(self, task_id: str) -> TaskCheckpoint | None:
-        with sqlite3.connect(self._db_path) as conn:
-            row = conn.execute(
-                """
-                SELECT task_id, status, active_step_id, plan_snapshot_json,
-                       context_snapshot_json, updated_at
-                FROM checkpoints
-                WHERE task_id = ?
-                """,
-                (task_id,),
-            ).fetchone()
-        if not row:
-            return None
-        return TaskCheckpoint(
-            task_id=row[0],
-            status=row[1],
-            active_step_id=row[2],
-            plan_snapshot=json.loads(row[3]),
-            context_snapshot=json.loads(row[4]),
-            updated_at=row[5],
-        )
-
-    def _initialize(self) -> None:
-        with sqlite3.connect(self._db_path) as conn:
-            conn.execute(
-                """
-                CREATE TABLE IF NOT EXISTS checkpoints (
-                    task_id TEXT PRIMARY KEY,
-                    status TEXT NOT NULL,
-                    active_step_id TEXT,
-                    plan_snapshot_json TEXT NOT NULL,
-                    context_snapshot_json TEXT NOT NULL,
-                    updated_at TEXT NOT NULL
-                )
-                """
-            )
-            conn.commit()
diff --git a/app/state/task_state_store.py b/app/state/task_state_store.py
deleted file mode 100644
index b6b7470..0000000
--- a/app/state/task_state_store.py
+++ /dev/null
@@ -1,77 +0,0 @@
-from __future__ import annotations
-
-import json
-import sqlite3
-from pathlib import Path
-from typing import Any
-
-
-class SQLiteTaskStateStore:
-    """Durable task state store for runtime lifecycle state."""
-
-    def __init__(self, db_path: str | Path) -> None:
-        self._db_path = Path(db_path)
-        self._db_path.parent.mkdir(parents=True, exist_ok=True)
-        self._initialize()
-
-    def create_task(self, task_id: str, initial_state: dict[str, Any]) -> dict[str, Any]:
-        state = dict(initial_state)
-        session_id = state.get("session_id")
-        with sqlite3.connect(self._db_path) as conn:
-            conn.execute(
-                """
-                INSERT OR REPLACE INTO task_states (task_id, state_json, session_id)
-                VALUES (?, ?, ?)
-                """,
-                (task_id, json.dumps(state), session_id),
-            )
-            conn.commit()
-        return state
-
-    def get_task(self, task_id: str) -> dict[str, Any] | None:
-        with sqlite3.connect(self._db_path) as conn:
-            row = conn.execute(
-                "SELECT state_json FROM task_states WHERE task_id = ?",
-                (task_id,),
-            ).fetchone()
-        return json.loads(row[0]) if row else None
-
-    def update_task(self, task_id: str, patch: dict[str, Any]) -> dict[str, Any]:
-        state = self.get_task(task_id) or {}
-        state.update(patch)
-        with sqlite3.connect(self._db_path) as conn:
-            conn.execute(
-                """
-                INSERT OR REPLACE INTO task_states (task_id, state_json)
-                VALUES (?, ?)
-                """,
-                (task_id, json.dumps(state)),
-            )
-            conn.commit()
-        return state
-
-    def _initialize(self) -> None:
-        with sqlite3.connect(self._db_path) as conn:
-            conn.execute(
-                """
-                CREATE TABLE IF NOT EXISTS task_states (
-                    task_id TEXT PRIMARY KEY,
-                    state_json TEXT NOT NULL
-                )
-                """
-            )
-            conn.commit()
-            try:
-                conn.execute("ALTER TABLE task_states ADD COLUMN session_id TEXT")
-                conn.commit()
-            except sqlite3.OperationalError:
-                pass
-
-    def get_session_tasks(self, session_id: str, limit: int = 10) -> list[dict[str, Any]]:
-        with sqlite3.connect(self._db_path) as conn:
-            conn.row_factory = sqlite3.Row
-            rows = conn.execute(
-                "SELECT state_json FROM task_states WHERE session_id = ? ORDER BY rowid DESC LIMIT ?",
-                (session_id, limit),
-            ).fetchall()
-        return [json.loads(row[0]) for row in rows]
diff --git a/app/streaming/__init__.py b/app/streaming/__init__.py
deleted file mode 100644
index 24d18ec..0000000
--- a/app/streaming/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""Streaming projections."""
-
diff --git a/app/streaming/manager.py b/app/streaming/manager.py
deleted file mode 100644
index 64d0c49..0000000
--- a/app/streaming/manager.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-from collections import defaultdict
-from dataclasses import dataclass
-
-from app.core.contracts import RuntimeEvent
-from app.events.event_bus import EventBus
-
-
-class StreamingManager:
-    """Simple in-process projection from event bus to websocket consumers."""
-
-    def __init__(self, event_bus: EventBus) -> None:
-        self._event_bus = event_bus
-        self._subscribers: dict[str, list[StreamSubscriber]] = defaultdict(list)
-        self._event_bus.subscribe(self._on_event)
-
-    def replay_events(self, task_id: str) -> list[RuntimeEvent]:
-        return self._event_bus.list_for_task(task_id)
-
-    def subscribe(self, task_id: str) -> asyncio.Queue[RuntimeEvent]:
-        queue: asyncio.Queue[RuntimeEvent] = asyncio.Queue()
-        self._subscribers[task_id].append(
-            StreamSubscriber(loop=asyncio.get_running_loop(), queue=queue)
-        )
-        return queue
-
-    def unsubscribe(self, task_id: str, queue: asyncio.Queue[RuntimeEvent]) -> None:
-        listeners = self._subscribers.get(task_id, [])
-        for listener in list(listeners):
-            if listener.queue is queue:
-                listeners.remove(listener)
-                break
-        if not listeners and task_id in self._subscribers:
-            del self._subscribers[task_id]
-
-    def _on_event(self, event: RuntimeEvent) -> None:
-        for listener in list(self._subscribers.get(event.task_id, [])):
-            listener.loop.call_soon_threadsafe(listener.queue.put_nowait, event)
-
-
-@dataclass
-class StreamSubscriber:
-    loop: asyncio.AbstractEventLoop
-    queue: asyncio.Queue[RuntimeEvent]
diff --git a/app/tools/__init__.py b/app/tools/__init__.py
deleted file mode 100644
index b8046a9..0000000
--- a/app/tools/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""Tool registry and tool adapters."""
-
diff --git a/app/tools/base.py b/app/tools/base.py
deleted file mode 100644
index 6601eba..0000000
--- a/app/tools/base.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-from typing import Any
-
-from app.core.contracts import ToolResult, UserTask
-
-
-class BaseTool(ABC):
-    name: str = ""
-    description: str = ""
-    
-    @property
-    def name(self) -> str:
-        return getattr(self, '_name', self.__class__.__name__.replace('Tool', '').lower())
-    
-    @property
-    def description(self) -> str:
-        return getattr(self, '_description', "")
-
-    @abstractmethod
-    def execute(self, task: UserTask, args: dict[str, Any]) -> ToolResult:
-        raise NotImplementedError
-
diff --git a/app/tools/discover.py b/app/tools/discover.py
deleted file mode 100644
index 421acce..0000000
--- a/app/tools/discover.py
+++ /dev/null
@@ -1,83 +0,0 @@
-from __future__ import annotations
-
-import importlib
-import json
-import logging
-from pathlib import Path
-from typing import Any
-
-logger = logging.getLogger(__name__)
-
-PLUGINS_DIR = Path(__file__).parent / "plugins"
-
-
-class ToolDiscovery:
-    """Decentralized tool discovery system."""
-
-    def __init__(self, plugins_dir: Path | None = None) -> None:
-        self._plugins_dir = plugins_dir or PLUGINS_DIR
-
-    def discover(self) -> dict[str, Any]:
-        """Discover all tools from plugins directory."""
-        tools = {}
-        
-        if not self._plugins_dir.exists():
-            logger.warning(f"Plugins directory not found: {self._plugins_dir}")
-            return tools
-
-        for folder in self._plugins_dir.iterdir():
-            if not folder.is_dir():
-                continue
-            
-            manifest_file = folder / "manifest.json"
-            if not manifest_file.exists():
-                logger.warning(f"Missing manifest.json in {folder.name}")
-                continue
-
-            try:
-                manifest = self._load_manifest(manifest_file)
-                
-                tool_name = manifest.get("name", folder.name)
-                tools[tool_name] = {
-                    "manifest": manifest,
-                    "tool_class": folder.name,
-                }
-                logger.info(f"Discovered tool: {tool_name}")
-                
-            except Exception as e:
-                logger.error(f"Failed to load tool {folder.name}: {e}")
-                continue
-
-        return tools
-
-    def _load_manifest(self, manifest_file: Path) -> dict[str, Any]:
-        with open(manifest_file) as f:
-            return json.load(f)
-
-    def _load_tool_class(self, tool_name: str, manifest: dict[str, Any]) -> Any:
-        entrypoint = manifest.get("entrypoint", "Tool")
-        module = importlib.import_module(f"app.tools.plugins.{tool_name}")
-        tool_class = getattr(module, entrypoint)
-        return tool_class
-
-    def get_tool_schemas(self) -> list[dict[str, Any]]:
-        """Get schemas for all discovered tools."""
-        tools = self.discover()
-        schemas = []
-        
-        for name, data in tools.items():
-            manifest = data.get("manifest", {})
-            schemas.append({
-                "name": name,
-                "description": manifest.get("description", ""),
-                "args_schema": manifest.get("args_schema", {}),
-                "requires_permission": manifest.get("requires_permission", False),
-            })
-        
-        return schemas
-
-
-def discover_tools() -> dict[str, Any]:
-    """Convenience function for quick tool discovery."""
-    discovery = ToolDiscovery()
-    return discovery.discover()
\ No newline at end of file
diff --git a/app/tools/file_read.py b/app/tools/file_read.py
deleted file mode 100644
index 6bba378..0000000
--- a/app/tools/file_read.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from __future__ import annotations
-
-from app.core.contracts import ToolResult, UserTask
-from app.tools.base import BaseTool
-from app.tools.sandbox import ToolSandbox
-
-
-class FileReadTool(BaseTool):
-    name = "file_read"
-
-    def __init__(self, sandbox: ToolSandbox) -> None:
-        self._sandbox = sandbox
-
-    def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
-        path = args.get("path")
-        if not path:
-            return ToolResult(tool=self.name, ok=False, error="Missing path")
-        resolved = self._sandbox.ensure_path_allowed(str(path))
-        content = resolved.read_text(encoding="utf-8")
-        return ToolResult(
-            tool=self.name,
-            ok=True,
-            output=content,
-            metadata={"path": str(resolved), "size": len(content)},
-        )
-
diff --git a/app/tools/file_write.py b/app/tools/file_write.py
deleted file mode 100644
index 0bf7708..0000000
--- a/app/tools/file_write.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from __future__ import annotations
-
-from app.core.contracts import ToolResult, UserTask
-from app.tools.base import BaseTool
-from app.tools.sandbox import ToolSandbox
-
-
-class FileWriteTool(BaseTool):
-    name = "file_write"
-
-    def __init__(self, sandbox: ToolSandbox) -> None:
-        self._sandbox = sandbox
-
-    def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
-        path = args.get("path")
-        content = str(args.get("content", ""))
-        if not path:
-            return ToolResult(tool=self.name, ok=False, error="Missing path")
-        resolved = self._sandbox.ensure_path_allowed(str(path))
-        resolved.parent.mkdir(parents=True, exist_ok=True)
-        resolved.write_text(content, encoding="utf-8")
-        return ToolResult(
-            tool=self.name,
-            ok=True,
-            output=f"Wrote {len(content)} bytes",
-            metadata={"path": str(resolved), "size": len(content)},
-        )
diff --git a/app/tools/memory_tools.py b/app/tools/memory_tools.py
deleted file mode 100644
index 77bf03b..0000000
--- a/app/tools/memory_tools.py
+++ /dev/null
@@ -1,123 +0,0 @@
-from __future__ import annotations
-
-import logging
-from typing import Any
-
-from app.tools.base import BaseTool
-from app.core.contracts import ToolResult, UserTask
-from app.tools.sandbox import ToolSandbox
-
-logger = logging.getLogger(__name__)
-
-
-class MemoryInsertTool(BaseTool):
-    _name = "memory_insert"
-    _description = "Store information in memory"
-
-    def __init__(self, sandbox: ToolSandbox, memory_interface=None) -> None:
-        super().__init__()
-        self._sandbox = sandbox
-        self._memory = memory_interface
-
-    def execute(self, task: UserTask, args: dict[str, Any]) -> ToolResult:
-        text = args.get("text", "")
-        kind = args.get("kind", "fact")
-        source = args.get("source", "user")
-        weight = args.get("weight", 0.5)
-
-        if not text:
-            return ToolResult(tool="memory_insert", ok=False, output="", error="text is required")
-        if not self._memory:
-            return ToolResult(tool="memory_insert", ok=False, output="", error="Memory not available")
-
-        try:
-            entry = self._memory.insert(
-                text=text,
-                kind=kind,
-                source=source,
-                task_id=task.task_id,
-                session_id=task.session_id,
-                weight=weight,
-            )
-            return ToolResult(
-                tool="memory_insert",
-                ok=True,
-                output=f"Stored: {entry.id}",
-                metadata={"entry_id": entry.id},
-            )
-        except Exception as e:
-            logger.warning(f"Memory insert failed: {e}")
-            return ToolResult(tool="memory_insert", ok=False, output="", error=str(e))
-
-
-class MemorySearchTool(BaseTool):
-    _name = "memory_search"
-    _description = "Search memory for information"
-
-    def __init__(self, sandbox: ToolSandbox, memory_interface=None) -> None:
-        super().__init__()
-        self._sandbox = sandbox
-        self._memory = memory_interface
-
-    def execute(self, task: UserTask, args: dict[str, Any]) -> ToolResult:
-        query = args.get("query", "")
-        top_k = args.get("top_k", 5)
-
-        if not query:
-            return ToolResult(tool="memory_search", ok=False, output="", error="query is required")
-        if not self._memory:
-            return ToolResult(tool="memory_search", ok=False, output="", error="Memory not available")
-
-        try:
-            results = self._memory.search(query, top_k=top_k)
-            if not results:
-                return ToolResult(tool="memory_search", ok=True, output="No results found", metadata={"count": 0})
-
-            output_lines = []
-            for entry, score in results:
-                output_lines.append(f"[{score:.2f}] {entry.text[:100]}")
-
-            return ToolResult(
-                tool="memory_search",
-                ok=True,
-                output="\n".join(output_lines),
-                metadata={"count": len(results)},
-            )
-        except Exception as e:
-            logger.warning(f"Memory search failed: {e}")
-            return ToolResult(tool="memory_search", ok=False, output="", error=str(e))
-
-
-class MemoryListTool(BaseTool):
-    _name = "memory_list"
-    _description = "List recent memories"
-
-    def __init__(self, sandbox: ToolSandbox, memory_interface=None) -> None:
-        super().__init__()
-        self._sandbox = sandbox
-        self._memory = memory_interface
-
-    def execute(self, task: UserTask, args: dict[str, Any]) -> ToolResult:
-        limit = args.get("limit", 10)
-
-        if not self._memory:
-            return ToolResult(tool="memory_list", ok=False, output="", error="Memory not available")
-
-        try:
-            entries = self._memory.get_recent(limit=limit)
-            if not entries:
-                return ToolResult(tool="memory_list", ok=True, output="No memories", metadata={"count": 0})
-
-            output_lines = []
-            for entry in entries:
-                output_lines.append(f"{entry.kind}: {entry.text[:80]}")
-
-            return ToolResult(
-                tool="memory_list",
-                ok=True,
-                output="\n".join(output_lines),
-                metadata={"count": len(entries)},
-            )
-        except Exception as e:
-            logger.warning(f"Memory list failed: {e}")
-            return ToolResult(tool="memory_list", ok=False, output="", error=str(e))
\ No newline at end of file
diff --git a/app/tools/plugins/file_read/__init__.py b/app/tools/plugins/file_read/__init__.py
deleted file mode 100644
index a06af7b..0000000
--- a/app/tools/plugins/file_read/__init__.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from __future__ import annotations
-
-from app.core.contracts import ToolResult, UserTask
-from app.tools.base import BaseTool
-from app.tools.sandbox import ToolSandbox
-
-
-class Tool(BaseTool):
-    name = "file_read"
-    description = "Read file contents"
-
-    def __init__(self, sandbox: ToolSandbox) -> None:
-        self._sandbox = sandbox
-
-    def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
-        path = args.get("path")
-        if not path:
-            return ToolResult(tool=self.name, ok=False, error="Missing path")
-        try:
-            resolved = self._sandbox.ensure_path_allowed(str(path))
-            if not resolved.exists():
-                return ToolResult(tool=self.name, ok=False, error=f"File not found: {path}")
-            content = resolved.read_text(encoding="utf-8")
-            return ToolResult(
-                tool=self.name,
-                ok=True,
-                output=content,
-                metadata={"path": str(resolved), "size": len(content)},
-            )
-        except PermissionError as e:
-            return ToolResult(tool=self.name, ok=False, error=f"Access denied: {e}")
-        except FileNotFoundError as e:
-            return ToolResult(tool=self.name, ok=False, error=f"File not found: {path}")
-        except Exception as e:
-            return ToolResult(tool=self.name, ok=False, error=f"Error: {e}")
\ No newline at end of file
diff --git a/app/tools/plugins/file_read/manifest.json b/app/tools/plugins/file_read/manifest.json
deleted file mode 100644
index ec51f07..0000000
--- a/app/tools/plugins/file_read/manifest.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-  "name": "file_read",
-  "version": "1.0",
-  "entrypoint": "Tool",
-  "description": "Read file contents from allowed paths",
-  "args_schema": {
-    "path": {"type": "string", "required": true, "description": "File path to read"}
-  },
-  "requires_permission": false
-}
\ No newline at end of file
diff --git a/app/tools/plugins/file_write/__init__.py b/app/tools/plugins/file_write/__init__.py
deleted file mode 100644
index 7cd8572..0000000
--- a/app/tools/plugins/file_write/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from __future__ import annotations
-
-from app.core.contracts import ToolResult, UserTask
-from app.tools.base import BaseTool
-from app.tools.sandbox import ToolSandbox
-
-
-class Tool(BaseTool):
-    name = "file_write"
-    description = "Write content to file"
-
-    def __init__(self, sandbox: ToolSandbox) -> None:
-        self._sandbox = sandbox
-
-    def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
-        path = args.get("path")
-        content = str(args.get("content", ""))
-        if not path:
-            return ToolResult(tool=self.name, ok=False, error="Missing path")
-        try:
-            resolved = self._sandbox.ensure_path_allowed(str(path))
-            resolved.parent.mkdir(parents=True, exist_ok=True)
-            resolved.write_text(content, encoding="utf-8")
-            return ToolResult(
-                tool=self.name,
-                ok=True,
-                output=f"Wrote {len(content)} bytes",
-                metadata={"path": str(resolved), "size": len(content)},
-            )
-        except PermissionError as e:
-            return ToolResult(tool=self.name, ok=False, error=f"Access denied: {e}")
-        except Exception as e:
-            return ToolResult(tool=self.name, ok=False, error=f"Error: {e}")
\ No newline at end of file
diff --git a/app/tools/plugins/file_write/manifest.json b/app/tools/plugins/file_write/manifest.json
deleted file mode 100644
index 742451a..0000000
--- a/app/tools/plugins/file_write/manifest.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-  "name": "file_write",
-  "version": "1.0",
-  "entrypoint": "Tool",
-  "description": "Write content to file",
-  "args_schema": {
-    "path": {"type": "string", "required": true, "description": "File path to write"},
-    "content": {"type": "string", "required": true, "description": "Content to write"}
-  },
-  "requires_permission": true
-}
\ No newline at end of file
diff --git a/app/tools/plugins/memory_tools/__init__.py b/app/tools/plugins/memory_tools/__init__.py
deleted file mode 100644
index ba60907..0000000
--- a/app/tools/plugins/memory_tools/__init__.py
+++ /dev/null
@@ -1,112 +0,0 @@
-from __future__ import annotations
-
-import logging
-from typing import Any
-
-from app.core.contracts import ToolResult, UserTask
-from app.tools.base import BaseTool
-
-logger = logging.getLogger(__name__)
-
-
-class Tool(BaseTool):
-    name = "memory"
-    description = "Memory operations: insert, search, list"
-
-    def __init__(self, memory_interface=None) -> None:
-        self._memory = memory_interface
-
-    def execute(self, task: UserTask, args: dict[str, Any]) -> ToolResult:
-        action = args.get("action", "search")
-        
-        if action == "insert":
-            return self._insert(task, args)
-        elif action == "search":
-            return self._search(task, args)
-        elif action == "list":
-            return self._list(task, args)
-        else:
-            return ToolResult(tool=self.name, ok=False, error=f"Unknown action: {action}")
-
-    def _insert(self, task: UserTask, args: dict[str, Any]) -> ToolResult:
-        text = args.get("text", "")
-        kind = args.get("kind", "fact")
-        source = args.get("source", "user")
-        weight = args.get("weight", 0.5)
-
-        if not text:
-            return ToolResult(tool=self.name, ok=False, output="", error="text is required")
-        if not self._memory:
-            return ToolResult(tool=self.name, ok=False, output="", error="Memory not available")
-
-        try:
-            entry = self._memory.insert(
-                text=text,
-                kind=kind,
-                source=source,
-                task_id=task.task_id,
-                session_id=task.session_id,
-                weight=weight,
-            )
-            return ToolResult(
-                tool=self.name,
-                ok=True,
-                output=f"Stored: {entry.id}",
-                metadata={"entry_id": entry.id},
-            )
-        except Exception as e:
-            logger.warning(f"Memory insert failed: {e}")
-            return ToolResult(tool=self.name, ok=False, output="", error=str(e))
-
-    def _search(self, task: UserTask, args: dict[str, Any]) -> ToolResult:
-        query = args.get("query", "")
-        top_k = args.get("top_k", 5)
-
-        if not query:
-            return ToolResult(tool=self.name, ok=False, output="", error="query is required")
-        if not self._memory:
-            return ToolResult(tool=self.name, ok=False, output="", error="Memory not available")
-
-        try:
-            results = self._memory.search(query, top_k=top_k)
-            if not results:
-                return ToolResult(tool=self.name, ok=True, output="No results found", metadata={"count": 0})
-
-            output_lines = []
-            for entry, score in results:
-                output_lines.append(f"[{score:.2f}] {entry.text[:100]}")
-
-            return ToolResult(
-                tool=self.name,
-                ok=True,
-                output="\n".join(output_lines),
-                metadata={"count": len(results)},
-            )
-        except Exception as e:
-            logger.warning(f"Memory search failed: {e}")
-            return ToolResult(tool=self.name, ok=False, output="", error=str(e))
-
-    def _list(self, task: UserTask, args: dict[str, Any]) -> ToolResult:
-        limit = args.get("limit", 10)
-
-        if not self._memory:
-            return ToolResult(tool=self.name, ok=False, output="", error="Memory not available")
-
-        try:
-            entries = self._memory.get_recent(limit=limit)
-            if not entries:
-                return ToolResult(tool=self.name, ok=True, output="No memories", metadata={"count": 0})
-
-            output_lines = []
-            for entry in entries:
-                output_lines.append(f"{entry.kind}: {entry.text[:80]}")
-
-            return ToolResult(
-                tool=self.name,
-                ok=True,
-                output="\n".join(output_lines),
-                metadata={"count": len(entries)},
-            )
-        except Exception as e:
-            logger.warning(f"Memory list failed: {e}")
-            return ToolResult(tool=self.name, ok=False, output="", error=str(e))
\ No newline at end of file
diff --git a/app/tools/plugins/memory_tools/manifest.json b/app/tools/plugins/memory_tools/manifest.json
deleted file mode 100644
index ac23ef9..0000000
--- a/app/tools/plugins/memory_tools/manifest.json
+++ /dev/null
@@ -1,22 +0,0 @@
-{
-  "name": "memory",
-  "version": "1.0",
-  "entrypoint": "Tool",
-  "description": "Memory operations: insert, search, list",
-  "args_schema": {
-    "action": {
-      "type": "string",
-      "required": true,
-      "description": "Action: insert, search, or list",
-      "enum": ["insert", "search", "list"]
-    },
-    "text": {"type": "string", "required": false, "description": "Text to store (insert)"},
-    "query": {"type": "string", "required": false, "description": "Query string (search)"},
-    "kind": {"type": "string", "required": false, "description": "Memory kind: fact, command, etc"},
-    "source": {"type": "string", "required": false, "description": "Source: user, system, etc"},
-    "weight": {"type": "number", "required": false, "description": "Memory weight 0-1"},
-    "top_k": {"type": "number", "required": false, "description": "Max results (search)"},
-    "limit": {"type": "number", "required": false, "description": "Max entries (list)"}
-  },
-  "requires_permission": false
-}
\ No newline at end of file
diff --git a/app/tools/plugins/shell_exec/__init__.py b/app/tools/plugins/shell_exec/__init__.py
deleted file mode 100644
index 83aecc6..0000000
--- a/app/tools/plugins/shell_exec/__init__.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from __future__ import annotations
-
-from app.core.contracts import ToolResult, UserTask
-from app.tools.base import BaseTool
-from app.tools.sandbox import ToolSandbox
-
-
-def _detect_sudo_auth_failure(output: str) -> bool:
-    normalized = output.lower()
-    return any(
-        marker in normalized
-        for marker in (
-            "incorrect password",
-            "incorrect password attempt",
-            "sudo: no password was provided",
-            "sorry, try again",
-            "authentication failure",
-        )
-    )
-
-
-class Tool(BaseTool):
-    name = "shell_exec"
-    description = "Execute shell commands"
-
-    def __init__(self, sandbox: ToolSandbox) -> None:
-        self._sandbox = sandbox
-
-    def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
-        command = str(args.get("command", "")).strip()
-        if not command:
-            return ToolResult(tool=self.name, ok=False, error="Missing command", metadata={"exit_code": -1})
-        cwd = args.get("cwd")
-        stdin_secret = args.get("stdin_secret")
-        output_callback = args.get("__output_callback")
-        completed = self._sandbox.run_shell(
-            command=command,
-            cwd=str(cwd) if cwd else None,
-            stdin_data=str(stdin_secret) if stdin_secret is not None else None,
-            output_callback=output_callback if callable(output_callback) else None,
-        )
-        output = completed.stdout if completed.returncode == 0 else completed.stderr or completed.stdout
-        sudo_auth_failed = completed.returncode != 0 and _detect_sudo_auth_failure(
-            f"{completed.stdout}\n{completed.stderr}"
-        )
-        return ToolResult(
-            tool=self.name,
-            ok=completed.returncode == 0,
-            output=output,
-            error=None if completed.returncode == 0 else f"Command failed with exit code {completed.returncode}",
-            metadata={
-                "exit_code": completed.returncode,
-                "sudo_auth_failed": sudo_auth_failed,
-            },
-        )
diff --git a/app/tools/plugins/shell_exec/manifest.json b/app/tools/plugins/shell_exec/manifest.json
deleted file mode 100644
index a797718..0000000
--- a/app/tools/plugins/shell_exec/manifest.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-  "name": "shell_exec",
-  "version": "1.0",
-  "entrypoint": "Tool",
-  "description": "Execute shell commands in sandboxed environment",
-  "args_schema": {
-    "command": {"type": "string", "required": true, "description": "Shell command to execute"},
-    "cwd": {"type": "string", "required": false, "description": "Working directory"},
-    "stdin_secret": {"type": "string", "required": false, "description": "Data to pass via stdin"}
-  },
-  "requires_permission": true
-}
\ No newline at end of file
diff --git a/app/tools/registry.py b/app/tools/registry.py
deleted file mode 100644
index 1bcc296..0000000
--- a/app/tools/registry.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from __future__ import annotations
-
-import logging
-from typing import Any, Callable
-
-from app.tools.base import BaseTool
-from app.tools.discover import ToolDiscovery
-
-logger = logging.getLogger(__name__)
-
-
-class ToolRegistry:
-    def __init__(self) -> None:
-        self._tools: dict[str, BaseTool] = {}
-        self._schemas: dict[str, dict[str, Any]] = {}
-
-    def register(self, tool: BaseTool) -> None:
-        self._tools[tool.name] = tool
-
-    def discover_and_init(
-        self,
-        init_factory: Callable[[dict], BaseTool] | None = None,
-    ) -> None:
-        """Discover tools from plugins and initialize them."""
-        discovery = ToolDiscovery()
-        discovered = discovery.discover()
-        
-        for name, data in discovered.items():
-            manifest = data.get("manifest", {})
-            
-            if init_factory:
-                tool = init_factory({"name": name, "manifest": manifest})
-            else:
-                tool_instance = data.get("instance")
-                if tool_instance:
-                    self._tools[name] = tool_instance
-                    self._schemas[name] = {
-                        "description": manifest.get("description", ""),
-                        "args_schema": manifest.get("args_schema", {}),
-                        "requires_permission": manifest.get("requires_permission", False),
-                    }
-                    logger.info(f"Registered tool: {name}")
-            logger.warning(f"No init_factory provided for {name}")
-
-    def get(self, name: str) -> BaseTool:
-        if name not in self._tools:
-            raise KeyError(f"Tool {name} is not registered")
-        return self._tools[name]
-
-    def list_names(self) -> list[str]:
-        return list(self._tools.keys())
-
-    def get_schema(self, name: str) -> dict[str, Any]:
-        return self._schemas.get(name, {})
-
-    def list_schemas(self) -> list[dict[str, Any]]:
-        return [
-            {"name": name, **schema}
-            for name, schema in self._schemas.items()
-        ]
-
diff --git a/app/tools/sandbox.py b/app/tools/sandbox.py
deleted file mode 100644
index 48de9ec..0000000
--- a/app/tools/sandbox.py
+++ /dev/null
@@ -1,139 +0,0 @@
-from __future__ import annotations
-
-import os
-import signal
-import subprocess
-import threading
-import time
-from pathlib import Path
-from typing import Callable
-
-
-class ToolSandbox:
-    """Applies simple working directory and timeout restrictions."""
-
-    def __init__(
-        self,
-        allowed_root: str | Path,
-        timeout_ms: int,
-        command_timeout_ms: int | None = None,
-        idle_timeout_ms: int | None = None,
-    ) -> None:
-        self._allowed_root = Path(allowed_root).resolve()
-        self._timeout_seconds = max(timeout_ms / 1000, 0.001)
-        self._command_timeout_seconds = max((command_timeout_ms or timeout_ms) / 1000, 0.001)
-        self._idle_timeout_seconds = max((idle_timeout_ms or timeout_ms) / 1000, 0.001)
-
-    def ensure_path_allowed(self, path: str | Path) -> Path:
-        resolved = Path(path).expanduser().resolve()
-        # Permission-first model: path is allowed if it exists
-        # Permission service will handle write/shell restrictions
-        return resolved
-
-    def run_shell(
-        self,
-        command: str,
-        cwd: str | Path | None = None,
-        stdin_data: str | None = None,
-        output_callback: Callable[[str, str], None] | None = None,
-    ) -> subprocess.CompletedProcess[str]:
-        working_directory = self.ensure_path_allowed(cwd or self._allowed_root)
-        env = {"PATH": os.environ.get("PATH", "")}
-        if output_callback is None:
-            return subprocess.run(
-                command,
-                shell=True,
-                cwd=str(working_directory),
-                env=env,
-                text=True,
-                capture_output=True,
-                input=stdin_data,
-                timeout=self._command_timeout_seconds,
-                check=False,
-            )
-
-        process = subprocess.Popen(
-            command,
-            shell=True,
-            cwd=str(working_directory),
-            env=env,
-            text=True,
-            stdin=subprocess.PIPE if stdin_data is not None else None,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            start_new_session=True,
-        )
-
-        stdout_chunks: list[str] = []
-        stderr_chunks: list[str] = []
-        output_lock = threading.Lock()
-        last_output_at = time.monotonic()
-
-        if stdin_data is not None and process.stdin is not None:
-            process.stdin.write(stdin_data)
-            process.stdin.close()
-
-        def read_stream(stream_name: str) -> None:
-            stream = process.stdout if stream_name == "stdout" else process.stderr
-            if stream is None:
-                return
-            chunks = stdout_chunks if stream_name == "stdout" else stderr_chunks
-            try:
-                for line in iter(stream.readline, ""):
-                    if not line:
-                        break
-                    chunks.append(line)
-                    nonlocal last_output_at
-                    with output_lock:
-                        last_output_at = time.monotonic()
-                    output_callback(stream_name, line)
-            finally:
-                stream.close()
-
-        stdout_thread = threading.Thread(target=read_stream, args=("stdout",), daemon=True)
-        stderr_thread = threading.Thread(target=read_stream, args=("stderr",), daemon=True)
-        stdout_thread.start()
-        stderr_thread.start()
-
-        timed_out = False
-        timeout_reason: str | None = None
-        started_at = time.monotonic()
-        return_code: int | None = None
-        while return_code is None:
-            return_code = process.poll()
-            if return_code is not None:
-                break
-
-            now = time.monotonic()
-            with output_lock:
-                idle_for = now - last_output_at
-            if now - started_at > self._command_timeout_seconds:
-                timed_out = True
-                timeout_reason = f"Command timed out after {self._command_timeout_seconds:.0f}s"
-                break
-            if idle_for > self._idle_timeout_seconds:
-                timed_out = True
-                timeout_reason = f"Command produced no output for {self._idle_timeout_seconds:.0f}s"
-                break
-            time.sleep(0.1)
-
-        if timed_out:
-            try:
-                os.killpg(process.pid, signal.SIGKILL)
-            except ProcessLookupError:
-                pass
-            except PermissionError:
-                process.kill()
-            return_code = process.wait()
-            timeout_message = f"{timeout_reason}\n"
-            stderr_chunks.append(timeout_message)
-            output_callback("stderr", timeout_message)
-
-        stdout_thread.join(timeout=1)
-        stderr_thread.join(timeout=1)
-        return subprocess.CompletedProcess(
-            args=command,
-            returncode=return_code if not timed_out else -9,
-            stdout="".join(stdout_chunks),
-            stderr="".join(stderr_chunks),
-        )
diff --git a/app/tools/shell_exec.py b/app/tools/shell_exec.py
deleted file mode 100644
index ea572c2..0000000
--- a/app/tools/shell_exec.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from __future__ import annotations
-
-from app.core.contracts import ToolResult, UserTask
-from app.tools.base import BaseTool
-from app.tools.sandbox import ToolSandbox
-
-
-def _detect_sudo_auth_failure(output: str) -> bool:
-    normalized = output.lower()
-    return any(
-        marker in normalized
-        for marker in (
-            "incorrect password",
-            "incorrect password attempt",
-            "sudo: no password was provided",
-            "sudo: password incorrect",
-            "sorry, try again",
-            "authentication failure",
-            "wrong password",
-        )
-    )
-
-
-class ShellExecTool(BaseTool):
-    name = "shell_exec"
-
-    def __init__(self, sandbox: ToolSandbox) -> None:
-        self._sandbox = sandbox
-
-    def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
-        command = str(args.get("command", "")).strip()
-        if not command:
-            return ToolResult(tool=self.name, ok=False, error="Missing command", metadata={"exit_code": -1})
-        cwd = args.get("cwd")
-        stdin_secret = args.get("stdin_secret")
-        password = args.get("password")
-        output_callback = args.get("__output_callback")
-
-        if password:
-            command = f'echo "{password}" | sudo -S {command}'
-
-        completed = self._sandbox.run_shell(
-            command=command,
-            cwd=str(cwd) if cwd else None,
-            stdin_data=str(stdin_secret) if stdin_secret is not None else None,
-            output_callback=output_callback if callable(output_callback) else None,
-        )
-        output = completed.stdout if completed.returncode == 0 else completed.stderr or completed.stdout
-        error_output = completed.stderr or completed.stdout
-        sudo_auth_failed = completed.returncode != 0 and _detect_sudo_auth_failure(
-            f"{completed.stdout}\n{completed.stderr}"
-        )
-        needs_sudo = completed.returncode != 0 and "permission denied" in error_output.lower() and not sudo_auth_failed
-        
-        return ToolResult(
-            tool=self.name,
-            ok=completed.returncode == 0,
-            output=output,
-            error=None if completed.returncode == 0 else f"Command failed with exit code {completed.returncode}",
-            metadata={
-                "exit_code": completed.returncode,
-                "needs_sudo": needs_sudo,
-                "sudo_auth_failed": sudo_auth_failed,
-            },
-        )
diff --git a/config/models.json b/config/models.json
deleted file mode 100644
index 0f221d6..0000000
--- a/config/models.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
-  "thinker": {
-    "path": "Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf",
-    "backend": "vulkan",
-    "n_gpu_layers": -1,
-    "max_tokens": 2048,
-    "temperature": 0.3
-  },
-  "json_compiler": {
-    "path": "gemma-4-E4B-it-Q4_K_M.gguf",
-    "backend": "cpu",
-    "n_gpu_layers": 0,
-    "max_tokens": 1024,
-    "temperature": 0.1
-  },
-  "coder": {
-    "path": "X-Coder-SFT-Qwen3-8B.Q6_K.gguf",
-    "backend": "cpu",
-    "n_gpu_layers": 0,
-    "max_tokens": 2048,
-    "temperature": 0.2
-  },
-  "critic": {
-    "path": "gemma-4-E4B-it-Q4_K_M.gguf",
-    "backend": "cpu",
-    "n_gpu_layers": 0,
-    "max_tokens": 1024,
-    "temperature": 0.1
-  },
-  "sys_util": {
-    "path": "Menlo_Lucy-Q4_K_M.gguf",
-    "backend": "cpu",
-    "n_gpu_layers": 0,
-    "max_tokens": 1024,
-    "temperature": 0.1
-  },
-  "embeddings": {
-    "path": "all-MiniLM-L6-v2",
-    "model_name": "sentence-transformers/all-MiniLM-L6-v2",
-    "embedding_dim": 384
-  }
-}
\ No newline at end of file
diff --git a/config/models.json.backup b/config/models.json.backup
deleted file mode 100644
index 0f221d6..0000000
--- a/config/models.json.backup
+++ /dev/null
@@ -1,42 +0,0 @@
-{
-  "thinker": {
-    "path": "Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf",
-    "backend": "vulkan",
-    "n_gpu_layers": -1,
-    "max_tokens": 2048,
-    "temperature": 0.3
-  },
-  "json_compiler": {
-    "path": "gemma-4-E4B-it-Q4_K_M.gguf",
-    "backend": "cpu",
-    "n_gpu_layers": 0,
-    "max_tokens": 1024,
-    "temperature": 0.1
-  },
-  "coder": {
-    "path": "X-Coder-SFT-Qwen3-8B.Q6_K.gguf",
-    "backend": "cpu",
-    "n_gpu_layers": 0,
-    "max_tokens": 2048,
-    "temperature": 0.2
-  },
-  "critic": {
-    "path": "gemma-4-E4B-it-Q4_K_M.gguf",
-    "backend": "cpu",
-    "n_gpu_layers": 0,
-    "max_tokens": 1024,
-    "temperature": 0.1
-  },
-  "sys_util": {
-    "path": "Menlo_Lucy-Q4_K_M.gguf",
-    "backend": "cpu",
-    "n_gpu_layers": 0,
-    "max_tokens": 1024,
-    "temperature": 0.1
-  },
-  "embeddings": {
-    "path": "all-MiniLM-L6-v2",
-    "model_name": "sentence-transformers/all-MiniLM-L6-v2",
-    "embedding_dim": 384
-  }
-}
\ No newline at end of file
diff --git a/config/models.json.test b/config/models.json.test
deleted file mode 100644
index 0f221d6..0000000
--- a/config/models.json.test
+++ /dev/null
@@ -1,42 +0,0 @@
-{
-  "thinker": {
-    "path": "Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf",
-    "backend": "vulkan",
-    "n_gpu_layers": -1,
-    "max_tokens": 2048,
-    "temperature": 0.3
-  },
-  "json_compiler": {
-    "path": "gemma-4-E4B-it-Q4_K_M.gguf",
-    "backend": "cpu",
-    "n_gpu_layers": 0,
-    "max_tokens": 1024,
-    "temperature": 0.1
-  },
-  "coder": {
-    "path": "X-Coder-SFT-Qwen3-8B.Q6_K.gguf",
-    "backend": "cpu",
-    "n_gpu_layers": 0,
-    "max_tokens": 2048,
-    "temperature": 0.2
-  },
-  "critic": {
-    "path": "gemma-4-E4B-it-Q4_K_M.gguf",
-    "backend": "cpu",
-    "n_gpu_layers": 0,
-    "max_tokens": 1024,
-    "temperature": 0.1
-  },
-  "sys_util": {
-    "path": "Menlo_Lucy-Q4_K_M.gguf",
-    "backend": "cpu",
-    "n_gpu_layers": 0,
-    "max_tokens": 1024,
-    "temperature": 0.1
-  },
-  "embeddings": {
-    "path": "all-MiniLM-L6-v2",
-    "model_name": "sentence-transformers/all-MiniLM-L6-v2",
-    "embedding_dim": 384
-  }
-}
\ No newline at end of file
diff --git a/config/models.yaml b/config/models.yaml
new file mode 100644
index 0000000..4449fc4
--- /dev/null
+++ b/config/models.yaml
@@ -0,0 +1,53 @@
+default_provider: llama_server
+
+models:
+  thinker:
+    provider: llama_server
+    base_url: http://127.0.0.1:8081/v1
+    model: local-main
+    purpose: free_cognition
+    structured_output: false
+    temperature: 0.4
+    max_output_tokens: 8192
+    system_prompt: prompts/roles/thinker.md
+
+  critic:
+    provider: llama_server
+    base_url: http://127.0.0.1:8081/v1
+    model: local-main
+    purpose: reflection
+    structured_output: false
+    temperature: 0.1
+    max_output_tokens: 4096
+    system_prompt: prompts/roles/critic.md
+
+  coder:
+    provider: llama_server
+    base_url: http://127.0.0.1:8081/v1
+    model: local-main
+    purpose: code_generation
+    structured_output: false
+    temperature: 0.2
+    max_output_tokens: 16384
+    system_prompt: prompts/roles/coder.md
+
+  action:
+    provider: llama_server
+    base_url: http://127.0.0.1:8081/v1
+    model: local-main
+    purpose: action_directive
+    structured_output: true
+    temperature: 0.0
+    max_output_tokens: 2048
+    system_prompt: prompts/roles/action.md
+    response_schema: duck_core/schemas/action_directive.schema.json
+
+  summary:
+    provider: llama_server
+    base_url: http://127.0.0.1:8081/v1
+    model: local-main
+    purpose: context_summary
+    structured_output: false
+    temperature: 0.1
+    max_output_tokens: 4096
+    system_prompt: prompts/roles/summary.md
diff --git a/config/permissions.json b/config/permissions.json
deleted file mode 100644
index cca6b1b..0000000
--- a/config/permissions.json
+++ /dev/null
@@ -1,94 +0,0 @@
-{
-  "description": "Permission-first model configuration",
-  "settings": {
-    "allow_caching": true,
-    "cache_file": "data/runtime/allowed_commands.json",
-    "normalize_commands": true,
-    "split_chained": true
-  },
-  "command_categories": {
-    "hard_stop": {
-      "description": "Commands that are never executed - hard stop",
-      "allow_once": false,
-      "allow_always": false,
-      "commands": [
-        "rm -rf /",
-        "rm -rf /*",
-        "dd if=/dev/zero of=/dev/sd*",
-        "dd if=/dev/zero of=/dev/hd*",
-        "mkfs",
-        "> /dev/sd*",
-        "> /dev/hd*"
-      ]
-    },
-    "no_always": {
-      "description": "Dangerous commands - allow once only",
-      "allow_once": true,
-      "allow_always": false,
-      "commands": [
-        "rm -rf *",
-        "rm -rf .*",
-        "curl |",
-        "wget -O- |",
-        ":(){:|:&};:",
-        "fork",
-        "chmod -R 000",
-        "chmod -R 777",
-        "chown -R",
-        "apt",
-        "apt-get",
-        "dpkg",
-        "yum",
-        "dnf",
-        "pacman",
-        "shutdown",
-        "reboot",
-        "halt",
-        "init 0",
-        "init 6",
-        "telinit",
-        "systemctl stop",
-        "systemctl start",
-        "systemctl restart",
-        "service stop",
-        "service start",
-        "kill -9 -1",
-        "killall",
-        "pkill -9",
-        "reboot -f",
-        "shutdown -h now",
-        "poweroff",
-        "echo .* > /proc/",
-        "echo .* > /sys/"
-      ]
-    },
-    "normal": {
-      "description": "Normal commands - allow once or always",
-      "allow_once": true,
-      "allow_always": true,
-      "commands": [
-        "shell_exec",
-        "file_write"
-      ],
-      "file_extensions": [
-        ".py",
-        ".txt",
-        ".json",
-        ".md",
-        ".yaml",
-        ".yml",
-        ".sh",
-        ".bash"
-      ]
-    }
-  },
-  "path_settings": {
-    "allow_read_outside": true,
-    "allow_write_paths": [
-      "/home/mirivlad/git/ducklm",
-      "/tmp"
-    ],
-    "require_confirmation_for_write": true,
-    "require_confirmation_for_shell": true
-  }
-}
\ No newline at end of file
diff --git a/config/prompts.json b/config/prompts.json
deleted file mode 100644
index 4b39235..0000000
--- a/config/prompts.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "thinker": "You are the orchestrator of a local AI agent runtime. Your job is to analyze the user's task and decide how to execute it.\n\n## Decision Types\n\n1. **Direct response** — for simple questions, greetings, conversations:\n   {\"type\": \"respond\", \"payload\": {\"text\": \"your answer\"}}\n\n2. **Single tool step** — for simple tasks needing one tool:\n   {\"type\": \"step\", \"payload\": {\"tool\": \"shell_exec\", \"args\": {\"command\": \"...\"}}}\n\n3. **Multi-step plan** — for complex tasks that need decomposition:\n   {\"type\": \"plan\", \"payload\": {\"steps\": [\n     {\"id\": \"step-1\", \"tool\": \"shell_exec\", \"args\": {\"command\": \"...\"}, \"description\": \"...\", \"depends_on\": []},\n     {\"id\": \"step-2\", \"tool\": \"file_read\", \"args\": {\"path\": \"...\"}, \"description\": \"...\", \"depends_on\": [\"step-1\"]}\n   ]}}\n\n## When to use multi-step plan\n- Task requires multiple operations (search → read → write)\n- Task involves checking prerequisites before acting\n- Task requires gathering information before producing result\n- User asks to do something complex (setup, configure, analyze)\n\n## Memory\n- If memory recall results are provided, USE them to inform your decisions\n- If you know something from memory, mention it in step descriptions\n- Store important results for future use\n\n## Rules\n- ALWAYS respond with valid JSON only\n- Each step MUST have a unique id\n- Use depends_on for ordering constraints\n- Keep steps focused — one action per step\n- If unsure, start with an information-gathering step\n- Respond ONLY with valid JSON, no explanations",
-
-  "orchestrator": "You are an expert orchestrator for a local AI agent system. Your role is to analyze the user's task and generate executable runtime steps.\n\nTool selection (choose the right tool):\n- shell_exec: for running commands, checking programs exist ('which', '--version'), searching files\n- file_read: for reading contents of a file (must be existing file path)\n- file_write: for creating or updating files\n- memory: for storing or searching memory\n\nSTRICT OUTPUT FORMAT - MUST follow exactly:\n\nSingle step:\n{\"type\": \"step\", \"payload\": {\"tool\": \"shell_exec\", \"args\": {\"command\": \"...\"}}}\n{\"type\": \"step\", \"payload\": {\"tool\": \"file_read\", \"args\": {\"path\": \"...\"}}}\n{\"type\": \"step\", \"payload\": {\"tool\": \"file_write\", \"args\": {\"path\": \"...\", \"content\": \"...\"}}}\n\nMulti-step plan:\n{\"type\": \"plan\", \"payload\": {\"steps\": [{\"tool\": \"file_read\", \"args\": {\"path\": \"...\"}, \"description\": \"...\", \"depends_on\": []}]}}\n\nDirect response:\n{\"type\": \"respond\", \"payload\": {\"text\": \"...\"}}\n\nIMPORTANT:\n- Use exactly {\"type\": \"step|plan|respond\", \"payload\": {...}} format\n- Do NOT output array alone\n- Do NOT use \"kind\" - use \"type\"\n- Respond ONLY with valid JSON\n- Your response MUST be complete valid JSON - the closing brace } MUST be present\n- Do NOT truncate your response - if you cannot fit all steps, use a single step\n\nTool selection:\n- For checking if a program/command exists: use shell_exec with 'which <program>' or '<program> --version'\n- For reading file contents: use file_read with path to file (NOT command)\n- For executing any command: use shell_exec\n- Previous experience (from memory) may help - consider it but YOU decide how to proceed",
-
-  "planning": "You are a planning specialist. Generate execution plans.\n\nOutput MUST be:\n{\"type\": \"plan\", \"version\": \"1.0\", \"payload\": {\"steps\": [{\"tool\": \"\", \"args\": {}, \"description\": \"...\", \"depends_on\": []}]}}\n\nRules:\n- Each step must have unique id (auto-generated)\n- Use \"depends_on\" for step ordering\n- Use \"tool\" for tool operations\n- Respond ONLY with valid JSON",
-
-  "coder": "You are an expert code generation model.\n\nOutput format:\n{\"type\": \"code\", \"payload\": {\"language\": \"python\", \"content\": \"...\"}}\n\nOR for completion:\n{\"type\": \"respond\", \"payload\": {\"text\": \"...\"}}\n\nGenerate clean, working code. Respond ONLY with valid JSON.",
-
-  "critic": "You are a critic model. Evaluate tool execution results.\n\nScoring criteria:\n- correctness: 0-1 (does result accomplish task?)\n- usefulness: 0-1 (is result useful?)\n- safety: 0-1 (is result safe?)\n- suggest_memory: boolean (should this be stored in memory?)\n- weight: 0-1 (importance score)\n- explanation: brief reasoning\n\nOutput format:\n{\"type\": \"evaluation\", \"payload\": {\"correctness\": 0.0-1.0, \"usefulness\": 0.0-1.0, \"safety\": 0.0-1.0, \"suggest_memory\": true|false, \"weight\": 0.0-1.0, \"explanation\": \"...\"}}\n\nRespond ONLY with valid JSON.",
-
-  "system": "You are ducklm, a local AI agent runtime.\n\nSTRICT RULES:\n- You MUST strictly follow execution schemas\n- You are NOT allowed to output free-form text\n- All outputs MUST be valid JSON matching runtime contracts\n- Use exact tool names from available tool set\n\nCurrent capabilities:\n- Execute shell commands (shell_exec)\n- Read/write files (file_read, file_write)\n- Memory operations (memory)\n\nAlways respond with valid JSON.",
-
-  "sys_util": "You are a STRICT JSON repair engine inside a production AI runtime.\nYour job is ONLY to fix invalid JSON syntax.\nYou are NOT allowed to:\n- change meaning of data\n- add new fields\n- remove valid fields\n- interpret intent\n- explain anything\n- reformat structure logically\n---\nINPUT:\nYou receive a malformed or invalid JSON string.\n---\nOUTPUT RULES:\n- Output ONLY valid JSON\n- No markdown\n- No comments\n- No explanations\n- No extra text\n---\nREPAIR RULES (STRICT):\nFix ONLY syntax issues:\n- missing or extra commas\n- missing quotes\n- incorrect brackets\n- trailing commas\n- invalid escaping\n- broken strings\n- unbalanced braces\nDO NOT:\n- rename keys\n- reorder fields intentionally\n- guess missing semantic data\n- \"improve\" structure\n---\nIMPORTANT:\nIf multiple valid repairs exist:\n→ choose the minimal change that makes JSON valid\n---\nOUTPUT MUST BE VALID JSON OR NOTHING ELSE\nInvalid JSON:"
-}
diff --git a/config/prompts/coder.md b/config/prompts/coder.md
deleted file mode 100644
index 4dde8b6..0000000
--- a/config/prompts/coder.md
+++ /dev/null
@@ -1,9 +0,0 @@
-You are an expert code generation model.
-
-Output format:
-{"type": "code", "payload": {"language": "python", "content": "..."}}
-
-OR for completion:
-{"type": "respond", "payload": {"text": "..."}}
-
-Generate clean, working code. Respond ONLY with valid JSON.
\ No newline at end of file
diff --git a/config/prompts/critic.md b/config/prompts/critic.md
deleted file mode 100644
index f3b986e..0000000
--- a/config/prompts/critic.md
+++ /dev/null
@@ -1,14 +0,0 @@
-You are a critic model. Evaluate tool execution results.
-
-Scoring criteria:
-- correctness: 0-1 (does result accomplish task?)
-- usefulness: 0-1 (is result useful?)
-- safety: 0-1 (is result safe?)
-- suggest_memory: boolean (should this be stored in memory?)
-- weight: 0-1 (importance score)
-- explanation: brief reasoning
-
-Output format:
-{"type": "evaluation", "payload": {"correctness": 0.0-1.0, "usefulness": 0.0-1.0, "safety": 0.0-1.0, "suggest_memory": true|false, "weight": 0.0-1.0, "explanation": "..."}}
-
-Respond ONLY with valid JSON.
\ No newline at end of file
diff --git a/config/prompts/json_compiler.md b/config/prompts/json_compiler.md
deleted file mode 100644
index b944a93..0000000
--- a/config/prompts/json_compiler.md
+++ /dev/null
@@ -1,25 +0,0 @@
-You are a JSON Compiler. Convert semantic plan to strict JSON.
-
-INPUT: Semantic plan from Thinker
-OUTPUT: Valid JSON only
-
-RULES:
-- Convert ONLY, do not make decisions
-- Do not invent tools
-- Do not modify plan logic
-- Do not skip steps
-- Output ONLY valid JSON
-
-AVAILABLE TOOLS:
-- file_write (requires permission)
-- shell_exec (execute shell commands, requires permission)
-- memory (no permission needed)
-- file_read (no permission needed)
-- respond (just return text to user, no execution)
-
-IMPORTANT: Use exactly "shell_exec" (not "shell") for shell commands!
-
-OUTPUT FORMAT:
-{"type": "plan", "payload": {"steps": [{"id": "1", "tool": "shell_exec", "args": {"command": "..."}, "depends_on": []}]}}
-OR
-{"type": "respond", "payload": {"text": "..."}}
diff --git a/config/prompts/orchestrator.md b/config/prompts/orchestrator.md
deleted file mode 100644
index e8eeb95..0000000
--- a/config/prompts/orchestrator.md
+++ /dev/null
@@ -1,34 +0,0 @@
-You are an expert orchestrator for a local AI agent system. Your role is to analyze the user's task and generate executable runtime steps.
-
-Tool selection (choose the right tool):
-- shell_exec: for running commands, checking programs exist ('which', '--version'), searching files
-- file_read: for reading contents of a file (must be existing file path)
-- file_write: for creating or updating files
-- memory: for storing or searching memory
-
-STRICT OUTPUT FORMAT - MUST follow exactly:
-
-Single step:
-{"type": "step", "payload": {"tool": "shell_exec", "args": {"command": "..."}}}
-{"type": "step", "payload": {"tool": "file_read", "args": {"path": "..."}}}
-{"type": "step", "payload": {"tool": "file_write", "args": {"path": "...", "content": "..."}}}
-
-Multi-step plan:
-{"type": "plan", "payload": {"steps": [{"tool": "file_read", "args": {"path": "..."}, "description": "...", "depends_on": []}]}}
-
-Direct response:
-{"type": "respond", "payload": {"text": "..."}}
-
-IMPORTANT:
-- Use exactly {"type": "step|plan|respond", "payload": {...}} format
-- Do NOT output array alone
-- Do NOT use "kind" - use "type"
-- Respond ONLY with valid JSON
-- Your response MUST be complete valid JSON - the closing brace } MUST be present
-- Do NOT truncate your response - if you cannot fit all steps, use a single step
-
-Tool selection:
-- For checking if a program/command exists: use shell_exec with 'which <program>' or '<program> --version'
-- For reading file contents: use file_read with path to file (NOT command)
-- For executing any command: use shell_exec
-- Previous experience (from memory) may help - consider it but YOU decide how to proceed
\ No newline at end of file
diff --git a/config/prompts/planning.md b/config/prompts/planning.md
deleted file mode 100644
index 186cdf6..0000000
--- a/config/prompts/planning.md
+++ /dev/null
@@ -1,10 +0,0 @@
-You are a planning specialist. Generate execution plans.
-
-Output MUST be:
-{"type": "plan", "version": "1.0", "payload": {"steps": [{"tool": "", "args": {}, "description": "...", "depends_on": []}]}}
-
-Rules:
-- Each step must have unique id (auto-generated)
-- Use "depends_on" for step ordering
-- Use "tool" for tool operations
-- Respond ONLY with valid JSON
\ No newline at end of file
diff --git a/config/prompts/sys_util.md b/config/prompts/sys_util.md
deleted file mode 100644
index daeefb2..0000000
--- a/config/prompts/sys_util.md
+++ /dev/null
@@ -1,41 +0,0 @@
-You are a STRICT JSON repair engine inside a production AI runtime.
-Your job is ONLY to fix invalid JSON syntax.
-You are NOT allowed to:
-- change meaning of data
-- add new fields
-- remove valid fields
-- interpret intent
-- explain anything
-- reformat structure logically
----
-INPUT:
-You receive a malformed or invalid JSON string.
----
-OUTPUT RULES:
-- Output ONLY valid JSON
-- No markdown
-- No comments
-- No explanations
-- No extra text
----
-REPAIR RULES (STRICT):
-Fix ONLY syntax issues:
-- missing or extra commas
-- missing quotes
-- incorrect brackets
-- trailing commas
-- invalid escaping
-- broken strings
-- unbalanced braces
-DO NOT:
-- rename keys
-- reorder fields intentionally
-- guess missing semantic data
-- "improve" structure
----
-IMPORTANT:
-If multiple valid repairs exist:
-→ choose the minimal change that makes JSON valid
----
-OUTPUT MUST BE VALID JSON OR NOTHING ELSE
-Invalid JSON:
\ No newline at end of file
diff --git a/config/prompts/system.md b/config/prompts/system.md
deleted file mode 100644
index 6e5c00f..0000000
--- a/config/prompts/system.md
+++ /dev/null
@@ -1,14 +0,0 @@
-You are ducklm, a local AI agent runtime.
-
-STRICT RULES:
-- You MUST strictly follow execution schemas
-- You are NOT allowed to output free-form text
-- All outputs MUST be valid JSON matching runtime contracts
-- Use exact tool names from available tool set
-
-Current capabilities:
-- Execute shell commands (shell_exec)
-- Read/write files (file_read, file_write)
-- Memory operations (memory)
-
-Always respond with valid JSON.
\ No newline at end of file
diff --git a/config/prompts/thinker.md b/config/prompts/thinker.md
deleted file mode 100644
index 679d89d..0000000
--- a/config/prompts/thinker.md
+++ /dev/null
@@ -1,36 +0,0 @@
-You are a Thinker. Analyze user task and create execution plan.
-
-CONTEXT:
-{task_summary}
-{memory_context}
-
-AVAILABLE TOOLS (injected at runtime):
-{tools_json}
-
-INSTRUCTIONS:
-1. Understand what user wants
-2. Create step-by-step plan in natural language
-3. Choose appropriate tools from available
-4. If the user asks about the current local machine, filesystem, processes,
-   packages, logs, runtime state, or anything that must be observed rather than
-   answered from general knowledge, use an appropriate tool.
-5. For exploratory tasks, prefer one robust inspection command over many brittle
-   dependent checks. Missing optional files should be treated as information, not
-   as a fatal failure.
-
-MODE: {mode_hint}
-- If mode is "execution": create a plan with TOOL STEPS (shell_exec, file_write, etc)
-- If mode is "conversation": just respond with text, NO tool execution
-- If mode is "clarification_needed": ask user for clarification
-
-OUTPUT FORMAT (SEMANTIC PLAN - NOT JSON):
-For execution mode:
-ПЛАН:
-Шаг 1: [use tool - e.g., shell_exec]
-Шаг 2: [use tool]
-
-For conversation mode:
-ОТВЕТ: [just text, no tools needed]
-
-For clarification:
-ОТВЕТ: [вопрос пользователю для уточнения]
diff --git a/config/runtime.json b/config/runtime.json
deleted file mode 100644
index 1cf4efb..0000000
--- a/config/runtime.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
-  "step_timeout_ms": 30000,
-  "task_timeout_ms": 300000,
-  "shell_command_timeout_ms": 3600000,
-  "shell_idle_timeout_ms": 600000,
-  "planner_retry_limit": 2,
-  "tool_retry_limit": 1,
-  "replan_limit": 1,
-  "max_execution_steps": 20,
-  "retrieval_top_k": 5,
-  "max_context_tokens": 8192,
-  "context_budgets": {
-    "system": 512,
-    "task": 512,
-    "memory": 2048,
-    "execution": 2048,
-    "tools": 1024,
-    "safety": 512
-  },
-  "reserve_for_generation_pct": 25,
-  "orchestrator_retry_limit": 2,
-  "memory_thresholds": {
-    "default_store_weight": 0.8
-  },
-  "critic_fallback_policy": "continue_without_critic",
-  "checkpoint_policy": {
-    "save_on_transition": true
-  },
-  "event_retention_policy": {
-    "keep_all": true
-  },
-  "streaming_settings": {
-    "enabled": true
-  },
-  "debug": true,
-  "debug_orchestrator_log_length": 500,
-  "json_fix_retry_limit": 2,
-  "json_fix_use_sys_util": true,
-  "intent_classifier": "thinker",
-  "recall_model": "json_compiler",
-  "critic_retry_limit": 2
-}
diff --git a/docker-compose.memory.yml b/docker-compose.memory.yml
new file mode 100644
index 0000000..0040f47
--- /dev/null
+++ b/docker-compose.memory.yml
@@ -0,0 +1,11 @@
+services:
+  qdrant:
+    image: qdrant/qdrant:latest
+    ports:
+      - "6333:6333"
+      - "6334:6334"
+    volumes:
+      - qdrant_storage:/qdrant/storage
+
+volumes:
+  qdrant_storage:
diff --git a/docs/architecture.md b/docs/architecture.md
new file mode 100644
index 0000000..1862a09
--- /dev/null
+++ b/docs/architecture.md
@@ -0,0 +1,5 @@
+# Architecture
+
+DuckLM is organized as WebChat and FastAPI over Duck Core. Duck Core owns task state, context building, model calls, events, tools, approvals, skills, experience, and memory adapters.
+
+The first vertical slice is WebChat -> FastAPI -> RuntimeLoop -> ModelClient -> llama-server -> SQLite event timeline.
diff --git a/docs/experience_learning.md b/docs/experience_learning.md
new file mode 100644
index 0000000..a5ad5a4
--- /dev/null
+++ b/docs/experience_learning.md
@@ -0,0 +1,9 @@
+# Experience Learning
+
+Experience records are stored in SQLite. Suggested skill updates are written to `skills/_proposals/` and are not applied automatically.
+
+Use:
+
+```bash
+curl http://127.0.0.1:8000/v1/experience
+```
diff --git a/docs/how_to_run.md b/docs/how_to_run.md
new file mode 100644
index 0000000..b185a8f
--- /dev/null
+++ b/docs/how_to_run.md
@@ -0,0 +1,71 @@
+# How To Run
+
+1. Install dependencies:
+
+```bash
+python3 -m venv .venv
+. .venv/bin/activate
+python -m pip install -e ".[dev]"
+```
+
+2. Configure:
+
+```bash
+cp .env.example .env
+```
+
+The default `DUCK_MAIN_MODEL_PATH` points to `./models/Qwen3.6/nonMTP/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf`.
+
+3. Start `llama-server`:
+
+```bash
+bash scripts/llama/start_main.sh start
+```
+
+Useful process commands:
+
+```bash
+bash scripts/llama/start_main.sh status
+bash scripts/llama/start_main.sh logs --follow
+bash scripts/llama/start_main.sh restart
+bash scripts/llama/start_main.sh stop
+```
+
+4. Start DuckLM API:
+
+```bash
+python -m duck_core.api
+```
+
+5. Open WebChat:
+
+```text
+http://127.0.0.1:8000/
+```
+
+6. Send a task:
+
+```bash
+curl -X POST http://127.0.0.1:8000/v1/chat \
+  -H "Content-Type: application/json" \
+  -d '{"message":"Скажи коротко, что ты DuckLM","workspace":"./workspace","debug":true}'
+```
+
+7. Inspect events:
+
+```bash
+curl http://127.0.0.1:8000/v1/tasks/<task_id>/events
+```
+
+8. Approvals:
+
+```bash
+curl http://127.0.0.1:8000/v1/approvals/pending
+```
+
+9. Stop services:
+
+```bash
+bash scripts/llama/start_main.sh stop
+docker compose -f docker-compose.memory.yml down
+```
diff --git a/docs/how_to_test.md b/docs/how_to_test.md
new file mode 100644
index 0000000..7f86e0e
--- /dev/null
+++ b/docs/how_to_test.md
@@ -0,0 +1,15 @@
+# How To Test
+
+Run smoke tests:
+
+```bash
+python -m pytest tests/smoke -v
+```
+
+Run verification scripts against a running API:
+
+```bash
+bash scripts/verify/verify_basic_chat.sh
+bash scripts/verify/verify_models_roles.sh
+bash scripts/verify/verify_tool_blocking.sh
+```
diff --git a/docs/local_llama_server.md b/docs/local_llama_server.md
new file mode 100644
index 0000000..8b68e44
--- /dev/null
+++ b/docs/local_llama_server.md
@@ -0,0 +1,44 @@
+# Local Llama Server
+
+DuckLM expects an OpenAI-compatible `llama-server` at `http://127.0.0.1:8081/v1` by default.
+
+On the current Radeon RX580 system, `llama.cpp` is built locally with Vulkan:
+
+```bash
+bash scripts/llama/build_vulkan.sh
+```
+
+The main model is Qwen3.6 35B A3B nonMTP:
+
+```text
+models/Qwen3.6/nonMTP/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf
+```
+
+Start it in the background with:
+
+```bash
+bash scripts/llama/start_main.sh start
+```
+
+Manage the process:
+
+```bash
+bash scripts/llama/start_main.sh status
+bash scripts/llama/start_main.sh logs
+bash scripts/llama/start_main.sh logs --follow
+bash scripts/llama/start_main.sh restart
+bash scripts/llama/start_main.sh stop
+```
+
+The local `.env` uses:
+
+```env
+DUCK_LLAMA_SERVER_BIN=./vendor/llama.cpp/build/bin/llama-server
+DUCK_CTX_SIZE=4096
+DUCK_N_GPU_LAYERS=20
+DUCK_PARALLEL=1
+DUCK_LLAMA_DEVICE=Vulkan0
+DUCK_LLAMA_EXTRA_ARGS="--reasoning off --cache-ram 0"
+```
+
+MTP is available only through `scripts/llama/start_thinker_mtp_experimental.sh` and is not used by the action JSON endpoint by default.
diff --git a/docs/memory_architecture.md b/docs/memory_architecture.md
new file mode 100644
index 0000000..4f40f7f
--- /dev/null
+++ b/docs/memory_architecture.md
@@ -0,0 +1,5 @@
+# Memory Architecture
+
+Semantic memory uses Qdrant as the vector store. Embeddings come from `/v1/embeddings` when the model backend supports it.
+
+If embeddings are unavailable, `VectorMemory` fails explicitly with `EmbeddingsUnavailableError`; it does not invent a local embedding algorithm.
diff --git a/docs/model_roles.md b/docs/model_roles.md
new file mode 100644
index 0000000..be84223
--- /dev/null
+++ b/docs/model_roles.md
@@ -0,0 +1,7 @@
+# Model Roles
+
+Roles are logical, not physical. `thinker`, `critic`, `coder`, `action`, and `summary` may all point to the same model.
+
+Each role can differ by prompt, temperature, output limit, response format, schema, memory scope, and endpoint. Request-level parameters can change per call. Backend-level parameters such as GGUF path, context size, GPU offload, MTP, and server port require the backend to be started with the desired settings.
+
+See `config/models.yaml` for one model mapped to all roles.
diff --git a/docs/performance_mtp.md b/docs/performance_mtp.md
new file mode 100644
index 0000000..49e2c26
--- /dev/null
+++ b/docs/performance_mtp.md
@@ -0,0 +1,5 @@
+# Performance And MTP
+
+MTP/speculative decoding is an inference backend concern. DuckLM keeps action JSON on the normal endpoint by default.
+
+Use `scripts/llama/start_thinker_mtp_experimental.sh` only for experiments. Benchmark scaffolding is in `scripts/bench/bench_runtime.py`.
diff --git a/docs/plans/ui-bootstrap-review-plan.md b/docs/plans/ui-bootstrap-review-plan.md
deleted file mode 100644
index 7b58c27..0000000
--- a/docs/plans/ui-bootstrap-review-plan.md
+++ /dev/null
@@ -1,24 +0,0 @@
-# UI Bootstrap And Review Flow Plan
-
-## Goal
-
-Move the web chat UI to Bootstrap 5.3 with Bootswatch themes and improve review/password/terminal-output ergonomics.
-
-## Required Changes
-
-- Replace the current hand-written visual system in `app/api/static/index.html` with Bootstrap 5.3 layout/components.
-- Add Bootswatch theme support with a visible theme selector and persistent localStorage choice.
-- Password/secret input must submit on Enter as well as the "Отправить" button.
-- Console/tool output must render inside a collapsed Bootstrap accordion item.
-- The accordion body must contain terminal-style output inside `<pre></pre>`.
-- The terminal accordion must expand only when the user clicks it.
-- Review UI must show critic/system assessment and user voting buttons:
-  - `Ошибочное действие`
-  - `Всё верно`
-  - optional correction/comment text.
-
-## Notes
-
-- Keep runtime event handling WebSocket-driven.
-- Do not mix console output with assistant prose.
-- Keep raw tool output available for debugging, but collapsed by default.
diff --git a/docs/skills.md b/docs/skills.md
new file mode 100644
index 0000000..d2275e9
--- /dev/null
+++ b/docs/skills.md
@@ -0,0 +1,9 @@
+# Skills
+
+Skills are procedural memory, not hardcoded routing. The first skill is `analyze_project`, loaded from `skills/analyze_project/skill.yaml`.
+
+Use:
+
+```bash
+curl http://127.0.0.1:8000/v1/skills
+```
diff --git a/docs/superpowers/plans/2026-05-19-ducklm-runtime.md b/docs/superpowers/plans/2026-05-19-ducklm-runtime.md
new file mode 100644
index 0000000..97a10cd
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-19-ducklm-runtime.md
@@ -0,0 +1,83 @@
+# DuckLM Runtime Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Build the first runnable DuckLM local cognitive runtime from `Ducklm.md`.
+
+**Architecture:** FastAPI exposes WebChat and HTTP endpoints, RuntimeLoop coordinates tasks, ModelClient calls an OpenAI-compatible `llama-server`, and SQLite persists tasks/events/approvals/experience. Tools, skills, and memory are small adapters with clear boundaries so later stages can grow without turning the runtime into hardcoded workflow rules.
+
+**Tech Stack:** Python 3.11+, FastAPI, httpx, aiosqlite, Pydantic, Jinja2, PyYAML, jsonschema, Qdrant client.
+
+---
+
+### Task 1: Tests First
+
+**Files:**
+- Create: `tests/smoke/test_models_config.py`
+- Create: `tests/smoke/test_model_client.py`
+- Create: `tests/smoke/test_api_health.py`
+- Create: `tests/smoke/test_event_log.py`
+- Create: `tests/smoke/test_action_directive_schema.py`
+- Create: `tests/smoke/test_tool_gateway.py`
+- Create: `tests/smoke/test_approvals.py`
+- Create: `tests/smoke/test_skill_registry.py`
+- Create: `tests/smoke/test_experience_recorder.py`
+- Create: `tests/smoke/test_vector_memory.py`
+
+- [ ] Write smoke tests for config, model role mapping, stores, tools, approvals, skills, experience, memory, and API health.
+- [ ] Run `python -m pytest tests/smoke -v` and verify tests fail because implementation modules do not exist.
+
+### Task 2: Runtime Core
+
+**Files:**
+- Create: `pyproject.toml`
+- Create: `.env.example`
+- Create: `config/models.yaml`
+- Create: `duck_core/config.py`
+- Create: `duck_core/model_client.py`
+- Create: `duck_core/events/store.py`
+- Create: `duck_core/tasks/store.py`
+- Create: `duck_core/tasks/state.py`
+- Create: `duck_core/context_builder.py`
+- Create: `duck_core/runtime_loop.py`
+- Create: `duck_core/api.py`
+
+- [ ] Implement settings and model config loading.
+- [ ] Implement ModelClient role-based chat calls with latency and usage capture.
+- [ ] Implement SQLite task and event stores.
+- [ ] Implement RuntimeLoop for `POST /v1/chat`.
+- [ ] Implement FastAPI endpoints and WebChat rendering.
+
+### Task 3: Stage Adapters
+
+**Files:**
+- Create: `duck_core/tools/*`
+- Create: `duck_core/approvals/service.py`
+- Create: `duck_core/skills/registry.py`
+- Create: `duck_core/experience/recorder.py`
+- Create: `duck_core/reflection.py`
+- Create: `duck_core/memory/*`
+- Create: `duck_core/schemas/action_directive.schema.json`
+
+- [ ] Implement safe file read/write and allowlisted shell execution.
+- [ ] Implement exact-action approval records.
+- [ ] Implement filesystem SkillRegistry.
+- [ ] Implement experience recording and skill proposal writing.
+- [ ] Implement Qdrant memory adapter with explicit embedding-disabled errors.
+
+### Task 4: Project Surface
+
+**Files:**
+- Create: `scripts/llama/*`
+- Create: `scripts/verify/*`
+- Create: `scripts/bench/bench_runtime.py`
+- Create: `duck_core/web/templates/*`
+- Create: `duck_core/web/static/*`
+- Create: `skills/analyze_project/*`
+- Create: `docker-compose.memory.yml`
+- Create: `Makefile`
+- Create: `README.md`
+- Create: `docs/*.md`
+
+- [ ] Add llama-server scripts, verification scripts, benchmark, WebChat pages, starter skill, compose file, make targets, and docs.
+- [ ] Run smoke tests and syntax checks.
diff --git a/docs/tool_gateway.md b/docs/tool_gateway.md
new file mode 100644
index 0000000..e960d81
--- /dev/null
+++ b/docs/tool_gateway.md
@@ -0,0 +1,9 @@
+# Tool Gateway
+
+The model does not execute tools directly. It emits an action directive and `ToolGateway` validates the tool name and arguments before execution.
+
+Implemented tools:
+
+- `file_read`: reads inside workspace only.
+- `file_write`: writes inside workspace only and refuses overwrites unless `overwrite=true`.
+- `shell_exec_safe`: runs only allowlisted commands and blocks dangerous commands.
diff --git a/docs/web_api.md b/docs/web_api.md
new file mode 100644
index 0000000..edd83a5
--- /dev/null
+++ b/docs/web_api.md
@@ -0,0 +1,25 @@
+# Web API
+
+Endpoints:
+
+```text
+GET  /health
+GET  /v1/status
+GET  /v1/models/roles
+GET  /v1/models/ping
+POST /v1/chat
+POST /v1/tasks
+GET  /v1/tasks
+GET  /v1/tasks/{task_id}
+GET  /v1/tasks/{task_id}/events
+GET  /v1/tasks/{task_id}/stream
+GET  /v1/approvals/pending
+POST /v1/approvals/{approval_id}/allow_once
+POST /v1/approvals/{approval_id}/allow_forever
+POST /v1/approvals/{approval_id}/deny
+GET  /v1/skills
+GET  /v1/skills/{skill_id}
+GET  /v1/experience
+GET  /v1/experience/{id}
+GET  /v1/memory/search?q=...
+```
diff --git a/duck_core/__init__.py b/duck_core/__init__.py
new file mode 100644
index 0000000..a05eb9a
--- /dev/null
+++ b/duck_core/__init__.py
@@ -0,0 +1,3 @@
+__all__ = ["__version__"]
+
+__version__ = "0.1.0"
diff --git a/duck_core/api.py b/duck_core/api.py
new file mode 100644
index 0000000..1abad91
--- /dev/null
+++ b/duck_core/api.py
@@ -0,0 +1,348 @@
+import asyncio
+import json
+import logging
+from pathlib import Path
+from typing import Any
+
+import uvicorn
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.responses import HTMLResponse, StreamingResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from pydantic import BaseModel
+
+from duck_core.approvals.service import ApprovalService
+from duck_core.config import get_settings
+from duck_core.events.store import EventStore
+from duck_core.experience.recorder import ExperienceRecorder
+from duck_core.memory.vector_memory import EmbeddingsUnavailableError, VectorMemory
+from duck_core.model_client import ModelClient
+from duck_core.runtime_loop import RuntimeLoop
+from duck_core.skills.registry import SkillRegistry
+from duck_core.tasks.store import TaskStore
+
+logger = logging.getLogger(__name__)
+
+
+class ChatRequest(BaseModel):
+    message: str
+    workspace: str | None = None
+    debug: bool = False
+
+
+def create_app() -> FastAPI:
+    settings = get_settings()
+    if settings.api_host == "0.0.0.0":
+        logger.warning(
+            "DuckLM API is listening on 0.0.0.0. This may expose local tool execution endpoints."
+        )
+    Path(settings.workspace).mkdir(parents=True, exist_ok=True)
+    Path(settings.db_path).parent.mkdir(parents=True, exist_ok=True)
+
+    app = FastAPI(title="DuckLM", version="0.1.0")
+    templates = Jinja2Templates(directory="duck_core/web/templates")
+    app.mount("/static", StaticFiles(directory="duck_core/web/static"), name="static")
+
+    task_store = TaskStore(settings.db_path)
+    event_store = EventStore(settings.db_path)
+    model_client = ModelClient()
+    approvals = ApprovalService(settings.db_path)
+    runtime = RuntimeLoop(task_store, event_store, model_client, approval_service=approvals)
+    skills = SkillRegistry("skills")
+    experience = ExperienceRecorder(settings.db_path)
+    memory = VectorMemory(settings.qdrant_url, embeddings_base_url=None)
+
+    @app.on_event("startup")
+    async def startup() -> None:
+        await task_store.init()
+        await event_store.init()
+        await approvals.init()
+        await experience.init()
+
+    @app.get("/", response_class=HTMLResponse)
+    async def index(request: Request) -> HTMLResponse:
+        return templates.TemplateResponse(request, "index.html")
+
+    @app.get("/approvals", response_class=HTMLResponse)
+    async def approvals_page(request: Request) -> HTMLResponse:
+        return templates.TemplateResponse(request, "approvals.html")
+
+    @app.get("/skills", response_class=HTMLResponse)
+    async def skills_page(request: Request) -> HTMLResponse:
+        return templates.TemplateResponse(request, "skills.html")
+
+    @app.get("/memory", response_class=HTMLResponse)
+    async def memory_page(request: Request) -> HTMLResponse:
+        return templates.TemplateResponse(request, "memory.html")
+
+    @app.get("/experience", response_class=HTMLResponse)
+    async def experience_page(request: Request) -> HTMLResponse:
+        return templates.TemplateResponse(request, "experience.html")
+
+    @app.get("/health")
+    async def health() -> dict[str, str]:
+        return {"status": "ok"}
+
+    @app.get("/v1/status")
+    async def status() -> dict[str, Any]:
+        return {
+            "name": "DuckLM",
+            "version": "0.1.0",
+            "api_host": settings.api_host,
+            "api_port": settings.api_port,
+            "workspace": settings.workspace,
+            "db_path": settings.db_path,
+        }
+
+    @app.get("/v1/models/roles")
+    async def roles() -> dict[str, Any]:
+        return model_client.list_roles()
+
+    @app.get("/v1/models/ping")
+    async def models_ping() -> dict[str, Any]:
+        return await model_client.ping()
+
+    @app.post("/v1/chat")
+    async def chat(body: ChatRequest) -> dict[str, Any]:
+        result = await runtime.run_chat(body.message, body.workspace or settings.workspace, body.debug)
+        return result.__dict__
+
+    def sse(event: str, payload: dict[str, Any]) -> str:
+        return f"event: {event}\ndata: {json.dumps(payload, ensure_ascii=False)}\n\n"
+
+    async def emit_tool_events(task_id: str, after_sequence: int):
+        events = await event_store.list_events(task_id)
+        visible_types = {
+            "tool_call_started",
+            "tool_call_finished",
+            "tool_approval_requested",
+        }
+        for event in events:
+            if event.sequence > after_sequence and event.event_type in visible_types:
+                yield sse(event.event_type, event.model_dump())
+
+    @app.post("/v1/chat/stream")
+    async def chat_stream(body: ChatRequest) -> StreamingResponse:
+        async def generator():
+            task = await task_store.create_task(
+                body.message, body.workspace or settings.workspace, body.debug
+            )
+            task_event = await event_store.append(
+                task.task_id,
+                "task_created",
+                {
+                    "message": body.message,
+                    "workspace": body.workspace or settings.workspace,
+                    "debug": body.debug,
+                },
+            )
+            yield sse("task_created", task_event.model_dump())
+
+            reasoning_parts: list[str] = []
+            content_parts: list[str] = []
+            try:
+                messages = runtime.context_builder.build_basic_messages(task)
+                tool_observations = await runtime._run_action_tools(
+                    task.task_id, messages, body.workspace or settings.workspace
+                )
+                async for tool_event in emit_tool_events(task.task_id, task_event.sequence):
+                    yield tool_event
+                if any(observation.get("requires_approval") for observation in tool_observations):
+                    await task_store.waiting_for_approval(task.task_id)
+                    await event_store.append(
+                        task.task_id,
+                        "task_waiting_for_approval",
+                        {"observations": tool_observations},
+                    )
+                    yield sse(
+                        "done",
+                        {
+                            "task_id": task.task_id,
+                            "status": "waiting_for_approval",
+                            "final_response": "Waiting for approval.",
+                            "reasoning_content": None,
+                        },
+                    )
+                    return
+                if tool_observations:
+                    messages = [
+                        *messages,
+                        {
+                            "role": "user",
+                            "content": "tool_observations:\n"
+                            + json.dumps(tool_observations, ensure_ascii=False, indent=2),
+                        },
+                    ]
+                await event_store.append(task.task_id, "model_call_started", {"role": "thinker"})
+                async for chunk in model_client.stream_chat("thinker", messages):
+                    delta = str(chunk.get("delta") or "")
+                    if chunk.get("type") == "reasoning_delta":
+                        reasoning_parts.append(delta)
+                        yield sse(
+                            "reasoning_delta",
+                            {"task_id": task.task_id, "delta": delta},
+                        )
+                    elif chunk.get("type") == "content_delta":
+                        content_parts.append(delta)
+                        yield sse(
+                            "content_delta",
+                            {"task_id": task.task_id, "delta": delta},
+                        )
+
+                content = "".join(content_parts)
+                reasoning_content = "".join(reasoning_parts) or None
+                await event_store.append(
+                    task.task_id,
+                    "cognition_response",
+                    {
+                        "role": "thinker",
+                        "content": content,
+                        "reasoning_content": reasoning_content,
+                    },
+                )
+                await event_store.append(
+                    task.task_id,
+                    "model_call_finished",
+                    {
+                        "role": "thinker",
+                        "model": model_client.get_role_config("thinker").model,
+                    },
+                )
+                await task_store.complete_task(task.task_id, content)
+                await event_store.append(
+                    task.task_id,
+                    "task_completed",
+                    {
+                        "final_response": content,
+                        "reasoning_content": reasoning_content,
+                    },
+                )
+                yield sse(
+                    "done",
+                    {
+                        "task_id": task.task_id,
+                        "status": "completed",
+                        "final_response": content,
+                        "reasoning_content": reasoning_content,
+                    },
+                )
+            except Exception as exc:
+                await task_store.fail_task(task.task_id, str(exc))
+                await event_store.append(task.task_id, "task_failed", {"error": str(exc)})
+                yield sse(
+                    "error",
+                    {
+                        "task_id": task.task_id,
+                        "status": "failed",
+                        "error": str(exc),
+                    },
+                )
+
+        return StreamingResponse(generator(), media_type="text/event-stream")
+
+    @app.post("/v1/tasks")
+    async def create_task(body: ChatRequest) -> dict[str, Any]:
+        task = await task_store.create_task(body.message, body.workspace or settings.workspace, body.debug)
+        await event_store.append(task.task_id, "task_created", body.model_dump())
+        return task.model_dump()
+
+    @app.get("/v1/tasks")
+    async def list_tasks() -> list[dict[str, Any]]:
+        return [task.model_dump() for task in await task_store.list_tasks()]
+
+    @app.get("/v1/tasks/{task_id}")
+    async def get_task(task_id: str) -> dict[str, Any]:
+        task = await task_store.get_task(task_id)
+        if task is None:
+            raise HTTPException(status_code=404, detail="Task not found")
+        return task.model_dump()
+
+    @app.get("/v1/tasks/{task_id}/events")
+    async def get_events(task_id: str) -> list[dict[str, Any]]:
+        return [event.model_dump() for event in await event_store.list_events(task_id)]
+
+    @app.get("/v1/tasks/{task_id}/stream")
+    async def stream_events(task_id: str) -> StreamingResponse:
+        async def generator():
+            sent = 0
+            for _ in range(30):
+                events = await event_store.list_events(task_id)
+                for event in events[sent:]:
+                    yield f"data: {json.dumps(event.model_dump())}\n\n"
+                sent = len(events)
+                await asyncio.sleep(1)
+
+        return StreamingResponse(generator(), media_type="text/event-stream")
+
+    @app.post("/v1/tasks/{task_id}/continue")
+    async def continue_task(task_id: str) -> dict[str, str]:
+        task = await task_store.get_task(task_id)
+        if task is None:
+            raise HTTPException(status_code=404, detail="Task not found")
+        await task_store.update_status(task_id, "running")
+        await event_store.append(task_id, "task_continued", {})
+        return {"status": "running"}
+
+    @app.post("/v1/tasks/{task_id}/cancel")
+    async def cancel_task(task_id: str) -> dict[str, str]:
+        await task_store.cancel_task(task_id)
+        await event_store.append(task_id, "task_cancelled", {})
+        return {"status": "cancelled"}
+
+    @app.get("/v1/approvals/pending")
+    async def pending_approvals() -> list[dict[str, Any]]:
+        return [approval.model_dump() for approval in await approvals.pending()]
+
+    @app.post("/v1/approvals/{approval_id}/allow_once")
+    async def allow_once(approval_id: str) -> dict[str, str]:
+        await approvals.allow_once(approval_id)
+        return {"status": "allowed_once"}
+
+    @app.post("/v1/approvals/{approval_id}/allow_forever")
+    async def allow_forever(approval_id: str) -> dict[str, str]:
+        await approvals.allow_forever(approval_id)
+        return {"status": "allowed_forever"}
+
+    @app.post("/v1/approvals/{approval_id}/deny")
+    async def deny(approval_id: str) -> dict[str, str]:
+        await approvals.deny(approval_id)
+        return {"status": "denied"}
+
+    @app.get("/v1/skills")
+    async def list_skills() -> list[dict[str, Any]]:
+        return [skill.model_dump() for skill in skills.load_skills()]
+
+    @app.get("/v1/skills/{skill_id}")
+    async def get_skill(skill_id: str) -> dict[str, Any]:
+        skill = skills.get_skill(skill_id)
+        if skill is None:
+            raise HTTPException(status_code=404, detail="Skill not found")
+        return skill.model_dump()
+
+    @app.get("/v1/experience")
+    async def list_experience() -> list[dict[str, Any]]:
+        return [record.model_dump() for record in await experience.list_records()]
+
+    @app.get("/v1/experience/{record_id}")
+    async def get_experience(record_id: int) -> dict[str, Any]:
+        record = await experience.get_record(record_id)
+        if record is None:
+            raise HTTPException(status_code=404, detail="Experience record not found")
+        return record.model_dump()
+
+    @app.get("/v1/memory/search")
+    async def search_memory(q: str) -> dict[str, Any]:
+        try:
+            return {"results": await memory.search_memory(q)}
+        except EmbeddingsUnavailableError as exc:
+            return {"results": [], "warning": str(exc)}
+
+    return app
+
+
+app = create_app()
+
+
+if __name__ == "__main__":
+    settings = get_settings()
+    uvicorn.run("duck_core.api:app", host=settings.api_host, port=settings.api_port, reload=False)
diff --git a/data/.gitkeep b/duck_core/approvals/__init__.py
similarity index 100%
rename from data/.gitkeep
rename to duck_core/approvals/__init__.py
diff --git a/duck_core/approvals/service.py b/duck_core/approvals/service.py
new file mode 100644
index 0000000..f62eecf
--- /dev/null
+++ b/duck_core/approvals/service.py
@@ -0,0 +1,143 @@
+import hashlib
+import json
+from pathlib import Path
+from typing import Any
+from uuid import uuid4
+
+import aiosqlite
+from pydantic import BaseModel
+
+from duck_core.tasks.store import utc_now
+
+
+class Approval(BaseModel):
+    id: int | None = None
+    approval_id: str
+    task_id: str
+    action_hash: str
+    normalized_action: dict[str, Any]
+    status: str
+    decision: str | None = None
+    created_at: str
+    updated_at: str
+
+
+def normalize_action(action: dict[str, Any]) -> str:
+    return json.dumps(action, sort_keys=True, separators=(",", ":"))
+
+
+def action_hash(action: dict[str, Any]) -> str:
+    return hashlib.sha256(normalize_action(action).encode()).hexdigest()
+
+
+class ApprovalService:
+    def __init__(self, db_path: str):
+        self.db_path = Path(db_path)
+
+    async def init(self) -> None:
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                create table if not exists approvals (
+                  id integer primary key autoincrement,
+                  approval_id text not null unique,
+                  task_id text not null,
+                  action_hash text not null,
+                  normalized_action_json text not null,
+                  status text not null,
+                  decision text,
+                  created_at text not null,
+                  updated_at text not null
+                )
+                """
+            )
+            await db.commit()
+
+    async def create_pending(self, task_id: str, action: dict[str, Any]) -> Approval:
+        await self.init()
+        now = utc_now()
+        approval_id = f"approval_{uuid4().hex[:12]}"
+        normalized = normalize_action(action)
+        digest = action_hash(action)
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                """
+                insert into approvals(
+                  approval_id, task_id, action_hash, normalized_action_json,
+                  status, created_at, updated_at
+                ) values (?, ?, ?, ?, ?, ?, ?)
+                """,
+                (approval_id, task_id, digest, normalized, "pending", now, now),
+            )
+            await db.commit()
+            row_id = cursor.lastrowid
+        return Approval(
+            id=row_id,
+            approval_id=approval_id,
+            task_id=task_id,
+            action_hash=digest,
+            normalized_action=action,
+            status="pending",
+            created_at=now,
+            updated_at=now,
+        )
+
+    async def pending(self) -> list[Approval]:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cursor = await db.execute(
+                "select * from approvals where status = 'pending' order by created_at"
+            )
+            rows = await cursor.fetchall()
+        return [self._row_to_approval(row) for row in rows]
+
+    async def allow_once(self, approval_id: str) -> None:
+        await self._decide(approval_id, "resolved", "allow_once")
+
+    async def allow_forever(self, approval_id: str) -> None:
+        await self._decide(approval_id, "allowed_forever", "allow_forever")
+
+    async def deny(self, approval_id: str) -> None:
+        await self._decide(approval_id, "resolved", "deny")
+
+    async def is_allowed_forever(self, action: dict[str, Any]) -> bool:
+        await self.init()
+        digest = action_hash(action)
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                """
+                select 1 from approvals
+                where action_hash = ? and status = 'allowed_forever'
+                limit 1
+                """,
+                (digest,),
+            )
+            row = await cursor.fetchone()
+        return row is not None
+
+    async def _decide(self, approval_id: str, status: str, decision: str) -> None:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                update approvals set status = ?, decision = ?, updated_at = ?
+                where approval_id = ?
+                """,
+                (status, decision, utc_now(), approval_id),
+            )
+            await db.commit()
+
+    def _row_to_approval(self, row: aiosqlite.Row) -> Approval:
+        return Approval(
+            id=row["id"],
+            approval_id=row["approval_id"],
+            task_id=row["task_id"],
+            action_hash=row["action_hash"],
+            normalized_action=json.loads(row["normalized_action_json"]),
+            status=row["status"],
+            decision=row["decision"],
+            created_at=row["created_at"],
+            updated_at=row["updated_at"],
+        )
diff --git a/duck_core/config.py b/duck_core/config.py
new file mode 100644
index 0000000..0bb0e0e
--- /dev/null
+++ b/duck_core/config.py
@@ -0,0 +1,56 @@
+import os
+from dataclasses import dataclass
+from functools import lru_cache
+from pathlib import Path
+
+from dotenv import load_dotenv
+
+
+@dataclass(frozen=True)
+class Settings:
+    llama_server_bin: str = "llama-server"
+    main_model_path: str = "./models/Qwen3.6/nonMTP/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf"
+    main_port: int = 8081
+    ctx_size: int = 65536
+    n_gpu_layers: str = "auto"
+    host: str = "127.0.0.1"
+    api_host: str = "127.0.0.1"
+    api_port: int = 8000
+    workspace: str = "./workspace"
+    db_path: str = "./data/duck.sqlite3"
+    max_input_tokens: int = 49152
+    max_recent_events_tokens: int = 12000
+    max_memory_tokens: int = 8000
+    max_skill_tokens: int = 6000
+    qdrant_url: str = "http://127.0.0.1:6333"
+    skip_live_llm_tests: int = 0
+
+    @property
+    def db_file(self) -> Path:
+        return Path(self.db_path)
+
+
+@lru_cache
+def get_settings() -> Settings:
+    load_dotenv()
+    return Settings(
+        llama_server_bin=os.getenv("DUCK_LLAMA_SERVER_BIN", "llama-server"),
+        main_model_path=os.getenv(
+            "DUCK_MAIN_MODEL_PATH",
+            "./models/Qwen3.6/nonMTP/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf",
+        ),
+        main_port=int(os.getenv("DUCK_MAIN_PORT", "8081")),
+        ctx_size=int(os.getenv("DUCK_CTX_SIZE", "65536")),
+        n_gpu_layers=os.getenv("DUCK_N_GPU_LAYERS", "auto"),
+        host=os.getenv("DUCK_HOST", "127.0.0.1"),
+        api_host=os.getenv("DUCK_API_HOST", "127.0.0.1"),
+        api_port=int(os.getenv("DUCK_API_PORT", "8000")),
+        workspace=os.getenv("DUCK_WORKSPACE", "./workspace"),
+        db_path=os.getenv("DUCK_DB_PATH", "./data/duck.sqlite3"),
+        max_input_tokens=int(os.getenv("DUCK_MAX_INPUT_TOKENS", "49152")),
+        max_recent_events_tokens=int(os.getenv("DUCK_MAX_RECENT_EVENTS_TOKENS", "12000")),
+        max_memory_tokens=int(os.getenv("DUCK_MAX_MEMORY_TOKENS", "8000")),
+        max_skill_tokens=int(os.getenv("DUCK_MAX_SKILL_TOKENS", "6000")),
+        qdrant_url=os.getenv("QDRANT_URL", "http://127.0.0.1:6333"),
+        skip_live_llm_tests=int(os.getenv("DUCK_SKIP_LIVE_LLM_TESTS", "0")),
+    )
diff --git a/duck_core/context_builder.py b/duck_core/context_builder.py
new file mode 100644
index 0000000..7f14ea7
--- /dev/null
+++ b/duck_core/context_builder.py
@@ -0,0 +1,11 @@
+from duck_core.tasks.state import TaskState
+
+
+class ContextBuilder:
+    def build_basic_messages(self, task: TaskState) -> list[dict[str, str]]:
+        return [
+            {
+                "role": "user",
+                "content": task.user_message,
+            }
+        ]
diff --git a/data/events/.gitkeep b/duck_core/events/__init__.py
similarity index 100%
rename from data/events/.gitkeep
rename to duck_core/events/__init__.py
diff --git a/duck_core/events/store.py b/duck_core/events/store.py
new file mode 100644
index 0000000..a9f8bb3
--- /dev/null
+++ b/duck_core/events/store.py
@@ -0,0 +1,92 @@
+import json
+from pathlib import Path
+from typing import Any
+
+import aiosqlite
+from pydantic import BaseModel
+
+from duck_core.tasks.store import utc_now
+
+
+class Event(BaseModel):
+    id: int
+    task_id: str
+    sequence: int
+    event_type: str
+    payload: dict[str, Any]
+    created_at: str
+
+
+class EventStore:
+    def __init__(self, db_path: str):
+        self.db_path = Path(db_path)
+
+    async def init(self) -> None:
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                create table if not exists events (
+                  id integer primary key autoincrement,
+                  task_id text not null,
+                  sequence integer not null,
+                  event_type text not null,
+                  payload_json text not null,
+                  created_at text not null
+                )
+                """
+            )
+            await db.execute(
+                """
+                create unique index if not exists idx_events_task_sequence
+                on events(task_id, sequence)
+                """
+            )
+            await db.commit()
+
+    async def append(self, task_id: str, event_type: str, payload: dict[str, Any]) -> Event:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                "select coalesce(max(sequence), 0) + 1 from events where task_id = ?",
+                (task_id,),
+            )
+            sequence = (await cursor.fetchone())[0]
+            created_at = utc_now()
+            cursor = await db.execute(
+                """
+                insert into events(task_id, sequence, event_type, payload_json, created_at)
+                values (?, ?, ?, ?, ?)
+                """,
+                (task_id, sequence, event_type, json.dumps(payload), created_at),
+            )
+            await db.commit()
+            event_id = cursor.lastrowid
+        return Event(
+            id=event_id,
+            task_id=task_id,
+            sequence=sequence,
+            event_type=event_type,
+            payload=payload,
+            created_at=created_at,
+        )
+
+    async def list_events(self, task_id: str) -> list[Event]:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cursor = await db.execute(
+                "select * from events where task_id = ? order by sequence", (task_id,)
+            )
+            rows = await cursor.fetchall()
+        return [
+            Event(
+                id=row["id"],
+                task_id=row["task_id"],
+                sequence=row["sequence"],
+                event_type=row["event_type"],
+                payload=json.loads(row["payload_json"]),
+                created_at=row["created_at"],
+            )
+            for row in rows
+        ]
diff --git a/data/memory/.gitkeep b/duck_core/experience/__init__.py
similarity index 100%
rename from data/memory/.gitkeep
rename to duck_core/experience/__init__.py
diff --git a/duck_core/experience/recorder.py b/duck_core/experience/recorder.py
new file mode 100644
index 0000000..c78dea7
--- /dev/null
+++ b/duck_core/experience/recorder.py
@@ -0,0 +1,172 @@
+import json
+from pathlib import Path
+
+import aiosqlite
+from pydantic import BaseModel
+
+from duck_core.tasks.store import utc_now
+
+
+class ExperienceRecord(BaseModel):
+    id: int | None = None
+    task_id: str
+    skill_id: str | None = None
+    summary: str
+    result: str
+    what_worked: list[str] = []
+    what_failed: list[str] = []
+    reusable_lesson: str | None = None
+    suggested_skill_patch: str | None = None
+    confidence: float | None = None
+    created_at: str
+
+
+class ExperienceRecorder:
+    def __init__(self, db_path: str):
+        self.db_path = Path(db_path)
+
+    async def init(self) -> None:
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                create table if not exists experience_records (
+                  id integer primary key autoincrement,
+                  task_id text not null,
+                  skill_id text,
+                  summary text not null,
+                  result text not null,
+                  what_worked_json text,
+                  what_failed_json text,
+                  reusable_lesson text,
+                  suggested_skill_patch text,
+                  confidence real,
+                  created_at text not null
+                )
+                """
+            )
+            await db.commit()
+
+    async def record(
+        self,
+        task_id: str,
+        summary: str,
+        result: str,
+        skill_id: str | None = None,
+        what_worked: list[str] | None = None,
+        what_failed: list[str] | None = None,
+        reusable_lesson: str | None = None,
+        suggested_skill_patch: str | None = None,
+        confidence: float | None = None,
+    ) -> ExperienceRecord:
+        await self.init()
+        now = utc_now()
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                """
+                insert into experience_records(
+                  task_id, skill_id, summary, result, what_worked_json,
+                  what_failed_json, reusable_lesson, suggested_skill_patch,
+                  confidence, created_at
+                ) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """,
+                (
+                    task_id,
+                    skill_id,
+                    summary,
+                    result,
+                    json.dumps(what_worked or []),
+                    json.dumps(what_failed or []),
+                    reusable_lesson,
+                    suggested_skill_patch,
+                    confidence,
+                    now,
+                ),
+            )
+            await db.commit()
+            row_id = cursor.lastrowid
+        if suggested_skill_patch and skill_id:
+            self.write_skill_update_proposal(task_id, skill_id, suggested_skill_patch)
+        return ExperienceRecord(
+            id=row_id,
+            task_id=task_id,
+            skill_id=skill_id,
+            summary=summary,
+            result=result,
+            what_worked=what_worked or [],
+            what_failed=what_failed or [],
+            reusable_lesson=reusable_lesson,
+            suggested_skill_patch=suggested_skill_patch,
+            confidence=confidence,
+            created_at=now,
+        )
+
+    async def list_records(self) -> list[ExperienceRecord]:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cursor = await db.execute(
+                "select * from experience_records order by created_at desc"
+            )
+            rows = await cursor.fetchall()
+        return [self._row_to_record(row) for row in rows]
+
+    async def get_record(self, record_id: int) -> ExperienceRecord | None:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cursor = await db.execute(
+                "select * from experience_records where id = ?", (record_id,)
+            )
+            row = await cursor.fetchone()
+        return self._row_to_record(row) if row else None
+
+    def write_skill_update_proposal(self, task_id: str, skill_id: str, patch: str) -> Path:
+        directory = Path("skills/_proposals")
+        directory.mkdir(parents=True, exist_ok=True)
+        path = directory / f"{utc_now().replace(':', '').replace('+', '_')}_{skill_id}.patch.md"
+        path.write_text(
+            "\n".join(
+                [
+                    "# Skill update proposal",
+                    "",
+                    f"Skill: {skill_id}",
+                    "",
+                    "## Reason",
+                    "",
+                    "Reflection suggested a reusable skill improvement.",
+                    "",
+                    "## Proposed changes",
+                    "",
+                    patch,
+                    "",
+                    "## Evidence",
+                    "",
+                    f"Task id: {task_id}",
+                    "",
+                    "## Risk",
+                    "",
+                    "Low.",
+                    "",
+                    "## Requires human approval",
+                    "",
+                    "Yes.",
+                ]
+            )
+        )
+        return path
+
+    def _row_to_record(self, row: aiosqlite.Row) -> ExperienceRecord:
+        return ExperienceRecord(
+            id=row["id"],
+            task_id=row["task_id"],
+            skill_id=row["skill_id"],
+            summary=row["summary"],
+            result=row["result"],
+            what_worked=json.loads(row["what_worked_json"] or "[]"),
+            what_failed=json.loads(row["what_failed_json"] or "[]"),
+            reusable_lesson=row["reusable_lesson"],
+            suggested_skill_patch=row["suggested_skill_patch"],
+            confidence=row["confidence"],
+            created_at=row["created_at"],
+        )
diff --git a/data/permissions/.gitkeep b/duck_core/memory/__init__.py
similarity index 100%
rename from data/permissions/.gitkeep
rename to duck_core/memory/__init__.py
diff --git a/duck_core/memory/policy.py b/duck_core/memory/policy.py
new file mode 100644
index 0000000..05c29ec
--- /dev/null
+++ b/duck_core/memory/policy.py
@@ -0,0 +1,20 @@
+from pydantic import BaseModel
+
+
+class MemoryDecision(BaseModel):
+    should_store: bool
+    memory_type: str
+    summary: str
+    importance: float
+    metadata: dict[str, str] = {}
+
+
+class MemoryPolicy:
+    async def classify(self, summary: str, task_id: str) -> MemoryDecision:
+        return MemoryDecision(
+            should_store=False,
+            memory_type="event",
+            summary=summary,
+            importance=0.0,
+            metadata={"task_id": task_id, "source": "stub_policy"},
+        )
diff --git a/duck_core/memory/vector_memory.py b/duck_core/memory/vector_memory.py
new file mode 100644
index 0000000..2dba15f
--- /dev/null
+++ b/duck_core/memory/vector_memory.py
@@ -0,0 +1,70 @@
+from typing import Any
+from uuid import uuid4
+
+import httpx
+
+
+class EmbeddingsUnavailableError(RuntimeError):
+    pass
+
+
+class VectorMemory:
+    def __init__(
+        self,
+        qdrant_url: str,
+        collection_name: str = "duck_memory",
+        embeddings_base_url: str | None = "http://127.0.0.1:8081/v1",
+    ):
+        self.qdrant_url = qdrant_url.rstrip("/")
+        self.collection_name = collection_name
+        self.embeddings_base_url = embeddings_base_url.rstrip("/") if embeddings_base_url else None
+
+    async def add_memory(self, text: str, metadata: dict[str, Any] | None = None) -> str:
+        vector = await self._embed(text)
+        point_id = str(uuid4())
+        async with httpx.AsyncClient(timeout=20.0, trust_env=False) as client:
+            await client.put(
+                f"{self.qdrant_url}/collections/{self.collection_name}",
+                json={"vectors": {"size": len(vector), "distance": "Cosine"}},
+            )
+            response = await client.put(
+                f"{self.qdrant_url}/collections/{self.collection_name}/points",
+                json={
+                    "points": [
+                        {
+                            "id": point_id,
+                            "vector": vector,
+                            "payload": {"text": text, **(metadata or {})},
+                        }
+                    ]
+                },
+            )
+            response.raise_for_status()
+        return point_id
+
+    async def search_memory(self, query: str, limit: int = 5) -> list[dict[str, Any]]:
+        vector = await self._embed(query)
+        async with httpx.AsyncClient(timeout=20.0, trust_env=False) as client:
+            response = await client.post(
+                f"{self.qdrant_url}/collections/{self.collection_name}/points/search",
+                json={"vector": vector, "limit": limit, "with_payload": True},
+            )
+            response.raise_for_status()
+        return response.json().get("result", [])
+
+    async def _embed(self, text: str) -> list[float]:
+        if not self.embeddings_base_url:
+            raise EmbeddingsUnavailableError(
+                "Embeddings endpoint is not configured; vector memory is explicit stub."
+            )
+        async with httpx.AsyncClient(timeout=20.0, trust_env=False) as client:
+            response = await client.post(
+                f"{self.embeddings_base_url}/embeddings",
+                json={"model": "local-main", "input": text},
+            )
+        if response.status_code >= 400:
+            raise EmbeddingsUnavailableError(
+                f"Embeddings endpoint unavailable: HTTP {response.status_code}"
+            )
+        data = response.json()["data"][0]["embedding"]
+        return [float(value) for value in data]
diff --git a/duck_core/model_client.py b/duck_core/model_client.py
new file mode 100644
index 0000000..4c01d5d
--- /dev/null
+++ b/duck_core/model_client.py
@@ -0,0 +1,217 @@
+import json
+import logging
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+import httpx
+import yaml
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass(frozen=True)
+class RoleConfig:
+    role: str
+    provider: str
+    base_url: str
+    model: str
+    purpose: str
+    structured_output: bool
+    temperature: float
+    max_output_tokens: int
+    system_prompt: str
+    response_schema: str | None = None
+
+
+@dataclass
+class ModelResponse:
+    role: str
+    model: str
+    content: str
+    reasoning_content: str | None
+    raw: dict[str, Any]
+    latency_ms: float
+    prompt_tokens: int | None = None
+    completion_tokens: int | None = None
+    total_tokens: int | None = None
+
+
+class ModelClient:
+    def __init__(self, config_path: str = "config/models.yaml", timeout: float = 120.0):
+        self.config_path = Path(config_path)
+        self.timeout = timeout
+        data = yaml.safe_load(self.config_path.read_text())
+        self.default_provider = data["default_provider"]
+        self._roles = {
+            role: RoleConfig(role=role, **settings)
+            for role, settings in data["models"].items()
+        }
+
+    def list_roles(self) -> dict[str, dict[str, Any]]:
+        return {
+            role: {
+                "provider": cfg.provider,
+                "base_url": cfg.base_url,
+                "model": cfg.model,
+                "purpose": cfg.purpose,
+                "structured_output": cfg.structured_output,
+                "temperature": cfg.temperature,
+                "max_output_tokens": cfg.max_output_tokens,
+                "system_prompt": cfg.system_prompt,
+                "response_schema": cfg.response_schema,
+            }
+            for role, cfg in self._roles.items()
+        }
+
+    def get_role_config(self, role: str) -> RoleConfig:
+        try:
+            return self._roles[role]
+        except KeyError as exc:
+            raise KeyError(f"Unknown model role: {role}") from exc
+
+    def _system_message(self, cfg: RoleConfig) -> dict[str, str] | None:
+        path = Path(cfg.system_prompt)
+        if not path.exists():
+            return None
+        return {"role": "system", "content": path.read_text()}
+
+    def _response_format(
+        self, cfg: RoleConfig, response_format: dict[str, Any] | None
+    ) -> dict[str, Any] | None:
+        if response_format is not None:
+            return response_format
+        if not cfg.structured_output:
+            return None
+        if cfg.response_schema and Path(cfg.response_schema).exists():
+            schema = json.loads(Path(cfg.response_schema).read_text())
+            return {
+                "type": "json_schema",
+                "json_schema": {"name": "action_directive", "schema": schema, "strict": True},
+            }
+        return {"type": "json_object"}
+
+    async def chat(
+        self,
+        role: str,
+        messages: list[dict[str, str]],
+        temperature: float | None = None,
+        max_output_tokens: int | None = None,
+        response_format: dict[str, Any] | None = None,
+    ) -> ModelResponse:
+        cfg = self.get_role_config(role)
+        outbound = list(messages)
+        system_message = self._system_message(cfg)
+        if system_message and not any(message["role"] == "system" for message in outbound):
+            outbound.insert(0, system_message)
+
+        payload: dict[str, Any] = {
+            "model": cfg.model,
+            "messages": outbound,
+            "temperature": cfg.temperature if temperature is None else temperature,
+            "max_tokens": cfg.max_output_tokens if max_output_tokens is None else max_output_tokens,
+        }
+        fmt = self._response_format(cfg, response_format)
+        if fmt is not None:
+            payload["response_format"] = fmt
+
+        start = time.perf_counter()
+        try:
+            async with httpx.AsyncClient(timeout=self.timeout, trust_env=False) as client:
+                response = await client.post(f"{cfg.base_url}/chat/completions", json=payload)
+                response.raise_for_status()
+                raw = response.json()
+        except httpx.HTTPError as exc:
+            raise ConnectionError(f"Model backend unavailable for role {role}: {exc}") from exc
+
+        latency_ms = (time.perf_counter() - start) * 1000
+        usage = raw.get("usage") or {}
+        message = raw.get("choices", [{}])[0].get("message", {})
+        content = message.get("content") or ""
+        reasoning_content = message.get("reasoning_content")
+        logger.info("model role=%s model=%s latency_ms=%.1f usage=%s", role, cfg.model, latency_ms, usage)
+        return ModelResponse(
+            role=role,
+            model=cfg.model,
+            content=content,
+            reasoning_content=reasoning_content,
+            raw=raw,
+            latency_ms=latency_ms,
+            prompt_tokens=usage.get("prompt_tokens"),
+            completion_tokens=usage.get("completion_tokens"),
+            total_tokens=usage.get("total_tokens"),
+        )
+
+    async def stream_chat(
+        self,
+        role: str,
+        messages: list[dict[str, str]],
+        temperature: float | None = None,
+        max_output_tokens: int | None = None,
+        response_format: dict[str, Any] | None = None,
+    ):
+        cfg = self.get_role_config(role)
+        outbound = list(messages)
+        system_message = self._system_message(cfg)
+        if system_message and not any(message["role"] == "system" for message in outbound):
+            outbound.insert(0, system_message)
+
+        payload: dict[str, Any] = {
+            "model": cfg.model,
+            "messages": outbound,
+            "temperature": cfg.temperature if temperature is None else temperature,
+            "max_tokens": cfg.max_output_tokens if max_output_tokens is None else max_output_tokens,
+            "stream": True,
+        }
+        fmt = self._response_format(cfg, response_format)
+        if fmt is not None:
+            payload["response_format"] = fmt
+
+        try:
+            async with httpx.AsyncClient(timeout=self.timeout, trust_env=False) as client:
+                async with client.stream(
+                    "POST", f"{cfg.base_url}/chat/completions", json=payload
+                ) as response:
+                    response.raise_for_status()
+                    async for line in response.aiter_lines():
+                        if not line.startswith("data: "):
+                            continue
+                        raw_data = line.removeprefix("data: ").strip()
+                        if raw_data == "[DONE]":
+                            break
+                        if not raw_data:
+                            continue
+                        chunk = json.loads(raw_data)
+                        delta = chunk.get("choices", [{}])[0].get("delta", {})
+                        reasoning_delta = delta.get("reasoning_content")
+                        content_delta = delta.get("content")
+                        if reasoning_delta:
+                            yield {"type": "reasoning_delta", "delta": reasoning_delta}
+                        if content_delta:
+                            yield {"type": "content_delta", "delta": content_delta}
+        except httpx.HTTPError as exc:
+            raise ConnectionError(f"Model backend unavailable for role {role}: {exc}") from exc
+
+    async def ping(self) -> dict[str, Any]:
+        results: dict[str, Any] = {}
+        async with httpx.AsyncClient(timeout=10.0, trust_env=False) as client:
+            for role, cfg in self._roles.items():
+                try:
+                    started = time.perf_counter()
+                    response = await client.get(f"{cfg.base_url}/models")
+                    response.raise_for_status()
+                    results[role] = {
+                        "ok": True,
+                        "base_url": cfg.base_url,
+                        "model": cfg.model,
+                        "latency_ms": round((time.perf_counter() - started) * 1000, 1),
+                    }
+                except httpx.HTTPError as exc:
+                    results[role] = {
+                        "ok": False,
+                        "base_url": cfg.base_url,
+                        "model": cfg.model,
+                        "error": str(exc),
+                    }
+        return results
diff --git a/duck_core/reflection.py b/duck_core/reflection.py
new file mode 100644
index 0000000..53cae06
--- /dev/null
+++ b/duck_core/reflection.py
@@ -0,0 +1,29 @@
+from duck_core.experience.recorder import ExperienceRecorder, ExperienceRecord
+from duck_core.model_client import ModelClient
+
+
+class Reflection:
+    def __init__(self, model_client: ModelClient, recorder: ExperienceRecorder):
+        self.model_client = model_client
+        self.recorder = recorder
+
+    async def reflect(self, task_id: str, transcript: str) -> ExperienceRecord:
+        response = await self.model_client.chat(
+            "critic",
+            [
+                {
+                    "role": "user",
+                    "content": (
+                        "Reflect on this DuckLM task. Cover outcome, waste, JSON/tool issues, "
+                        f"and reusable lesson.\n\n{transcript}"
+                    ),
+                }
+            ],
+        )
+        return await self.recorder.record(
+            task_id=task_id,
+            summary=response.content[:500],
+            result="unknown",
+            reusable_lesson=response.content,
+            confidence=0.5,
+        )
diff --git a/duck_core/runtime_loop.py b/duck_core/runtime_loop.py
new file mode 100644
index 0000000..ed6b22c
--- /dev/null
+++ b/duck_core/runtime_loop.py
@@ -0,0 +1,197 @@
+import json
+from dataclasses import dataclass
+from typing import Any
+
+from duck_core.approvals.service import ApprovalService
+from duck_core.context_builder import ContextBuilder
+from duck_core.events.store import EventStore
+from duck_core.model_client import ModelClient
+from duck_core.tasks.store import TaskStore
+from duck_core.tools.gateway import ToolGateway
+
+
+@dataclass
+class ChatResult:
+    task_id: str
+    status: str
+    final_response: str
+    reasoning_content: str | None = None
+
+
+class RuntimeLoop:
+    def __init__(
+        self,
+        task_store: TaskStore,
+        event_store: EventStore,
+        model_client: ModelClient | None = None,
+        context_builder: ContextBuilder | None = None,
+        approval_service: ApprovalService | None = None,
+    ):
+        self.task_store = task_store
+        self.event_store = event_store
+        self.model_client = model_client or ModelClient()
+        self.context_builder = context_builder or ContextBuilder()
+        self.approval_service = approval_service
+
+    async def run_chat(
+        self, message: str, workspace: str | None = None, debug: bool = False
+    ) -> ChatResult:
+        task = await self.task_store.create_task(message, workspace, debug)
+        await self.event_store.append(
+            task.task_id,
+            "task_created",
+            {"message": message, "workspace": workspace, "debug": debug},
+        )
+        try:
+            messages = self.context_builder.build_basic_messages(task)
+            tool_observations = await self._run_action_tools(task.task_id, messages, workspace)
+            if any(observation.get("requires_approval") for observation in tool_observations):
+                await self.task_store.waiting_for_approval(task.task_id)
+                await self.event_store.append(
+                    task.task_id,
+                    "task_waiting_for_approval",
+                    {"observations": tool_observations},
+                )
+                return ChatResult(
+                    task_id=task.task_id,
+                    status="waiting_for_approval",
+                    final_response="Waiting for approval.",
+                    reasoning_content=None,
+                )
+            if tool_observations:
+                messages = [
+                    *messages,
+                    {
+                        "role": "user",
+                        "content": "tool_observations:\n"
+                        + json.dumps(tool_observations, ensure_ascii=False, indent=2),
+                    },
+                ]
+            await self.event_store.append(
+                task.task_id, "model_call_started", {"role": "thinker"}
+            )
+            response = await self.model_client.chat("thinker", messages)
+            await self.event_store.append(
+                task.task_id,
+                "cognition_response",
+                {
+                    "role": response.role,
+                    "content": response.content,
+                    "reasoning_content": response.reasoning_content,
+                },
+            )
+            await self.event_store.append(
+                task.task_id,
+                "model_call_finished",
+                {
+                    "role": response.role,
+                    "model": response.model,
+                    "latency_ms": response.latency_ms,
+                    "prompt_tokens": response.prompt_tokens,
+                    "completion_tokens": response.completion_tokens,
+                    "total_tokens": response.total_tokens,
+                },
+            )
+            await self.task_store.complete_task(task.task_id, response.content)
+            await self.event_store.append(
+                task.task_id,
+                "task_completed",
+                {
+                    "final_response": response.content,
+                    "reasoning_content": response.reasoning_content,
+                },
+            )
+            return ChatResult(
+                task_id=task.task_id,
+                status="completed",
+                final_response=response.content,
+                reasoning_content=response.reasoning_content,
+            )
+        except Exception as exc:
+            await self.task_store.fail_task(task.task_id, str(exc))
+            await self.event_store.append(
+                task.task_id, "task_failed", {"error": str(exc)}
+            )
+            return ChatResult(
+                task_id=task.task_id,
+                status="failed",
+                final_response=str(exc),
+                reasoning_content=None,
+            )
+
+    async def _run_action_tools(
+        self, task_id: str, messages: list[dict[str, str]], workspace: str | None
+    ) -> list[dict[str, Any]]:
+        try:
+            await self.event_store.append(task_id, "model_call_started", {"role": "action"})
+            response = await self.model_client.chat("action", messages)
+            directive = json.loads(response.content)
+        except Exception as exc:
+            await self.event_store.append(
+                task_id,
+                "action_directive_failed",
+                {"error": str(exc)},
+            )
+            return []
+
+        await self.event_store.append(task_id, "action_directive", directive)
+        actions = directive.get("actions") or []
+        if not isinstance(actions, list) or not actions:
+            return []
+
+        gateway = ToolGateway.default(workspace or ".")
+        observations: list[dict[str, Any]] = []
+        for index, action in enumerate(actions, start=1):
+            if not isinstance(action, dict):
+                observations.append(
+                    {"index": index, "ok": False, "error": "Action must be an object"}
+                )
+                continue
+            tool_name = str(action.get("tool", ""))
+            await self.event_store.append(
+                task_id,
+                "tool_call_started",
+                {"index": index, "tool": tool_name, "args": action.get("args") or {}},
+            )
+            result = await gateway.run_action(action)
+            result_payload = result.model_dump()
+            if result.metadata.get("requires_approval"):
+                approval = None
+                if self.approval_service is not None:
+                    approval = await self.approval_service.create_pending(task_id, action)
+                await self.event_store.append(
+                    task_id,
+                    "tool_approval_requested",
+                    {
+                        "index": index,
+                        "tool": tool_name,
+                        "action": action,
+                        "approval_id": approval.approval_id if approval else None,
+                        "reason": result.error,
+                    },
+                )
+                observations.append(
+                    {
+                        "index": index,
+                        "tool": tool_name,
+                        "reason": action.get("reason"),
+                        "requires_approval": True,
+                        "approval_id": approval.approval_id if approval else None,
+                        "result": result_payload,
+                    }
+                )
+                break
+            await self.event_store.append(
+                task_id,
+                "tool_call_finished",
+                {"index": index, "tool": tool_name, "result": result_payload},
+            )
+            observations.append(
+                {
+                    "index": index,
+                    "tool": tool_name,
+                    "reason": action.get("reason"),
+                    "result": result_payload,
+                }
+            )
+        return observations
diff --git a/duck_core/schemas/action_directive.schema.json b/duck_core/schemas/action_directive.schema.json
new file mode 100644
index 0000000..12d2863
--- /dev/null
+++ b/duck_core/schemas/action_directive.schema.json
@@ -0,0 +1,55 @@
+{
+  "type": "object",
+  "required": ["kind", "intent", "risk_level", "actions"],
+  "additionalProperties": false,
+  "properties": {
+    "kind": {
+      "type": "string",
+      "enum": ["action_directive"]
+    },
+    "intent": {
+      "type": "string",
+      "minLength": 1
+    },
+    "risk_level": {
+      "type": "string",
+      "enum": ["none", "low", "medium", "high", "critical"]
+    },
+    "actions": {
+      "type": "array",
+      "minItems": 0,
+      "items": {
+        "type": "object",
+        "required": ["tool", "args"],
+        "additionalProperties": false,
+        "properties": {
+          "tool": {
+            "type": "string",
+            "minLength": 1
+          },
+          "args": {
+            "type": "object"
+          },
+          "reason": {
+            "type": "string"
+          }
+        }
+      }
+    },
+    "memory_hints": {
+      "type": "array",
+      "items": {
+        "type": "string"
+      }
+    },
+    "expected_observations": {
+      "type": "array",
+      "items": {
+        "type": "string"
+      }
+    },
+    "stop_reason": {
+      "type": "string"
+    }
+  }
+}
diff --git a/data/state/.gitkeep b/duck_core/skills/__init__.py
similarity index 100%
rename from data/state/.gitkeep
rename to duck_core/skills/__init__.py
diff --git a/duck_core/skills/registry.py b/duck_core/skills/registry.py
new file mode 100644
index 0000000..323bfd4
--- /dev/null
+++ b/duck_core/skills/registry.py
@@ -0,0 +1,68 @@
+from pathlib import Path
+
+import yaml
+from pydantic import BaseModel
+
+
+class Skill(BaseModel):
+    id: str
+    title: str
+    description: str
+    version: int
+    tags: list[str] = []
+    required_tools: list[str] = []
+    risk_level: str = "low"
+    inputs: list[str] = []
+    outputs: list[str] = []
+    success_criteria: list[str] = []
+    procedure: str = ""
+    examples: str = ""
+    notes: str = ""
+
+
+class SkillCandidate(BaseModel):
+    skill: Skill
+    score: float
+    reason: str
+
+
+class SkillRegistry:
+    def __init__(self, skills_dir: str = "skills"):
+        self.skills_dir = Path(skills_dir)
+        self._cache: dict[str, Skill] | None = None
+
+    def load_skills(self) -> list[Skill]:
+        skills: dict[str, Skill] = {}
+        if not self.skills_dir.exists():
+            self._cache = {}
+            return []
+        for path in sorted(self.skills_dir.glob("*/skill.yaml")):
+            data = yaml.safe_load(path.read_text()) or {}
+            root = path.parent
+            data["procedure"] = self._read_optional(root / "procedure.md")
+            data["examples"] = self._read_optional(root / "examples.md")
+            data["notes"] = self._read_optional(root / "notes.md")
+            skill = Skill(**data)
+            skills[skill.id] = skill
+        self._cache = skills
+        return list(skills.values())
+
+    def get_skill(self, skill_id: str) -> Skill | None:
+        if self._cache is None:
+            self.load_skills()
+        return (self._cache or {}).get(skill_id)
+
+    async def find_candidate_skills(self, user_request: str, limit: int = 3) -> list[SkillCandidate]:
+        terms = set(user_request.lower().split())
+        candidates: list[SkillCandidate] = []
+        for skill in self.load_skills():
+            haystack = " ".join([skill.title, skill.description, " ".join(skill.tags)]).lower()
+            score = sum(1 for term in terms if term in haystack)
+            if score:
+                candidates.append(
+                    SkillCandidate(skill=skill, score=float(score), reason="keyword match")
+                )
+        return sorted(candidates, key=lambda item: item.score, reverse=True)[:limit]
+
+    def _read_optional(self, path: Path) -> str:
+        return path.read_text() if path.exists() else ""
diff --git a/duck_core/tasks/__init__.py b/duck_core/tasks/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/duck_core/tasks/__init__.py
@@ -0,0 +1 @@
+
diff --git a/duck_core/tasks/state.py b/duck_core/tasks/state.py
new file mode 100644
index 0000000..19e8fd8
--- /dev/null
+++ b/duck_core/tasks/state.py
@@ -0,0 +1,12 @@
+from pydantic import BaseModel
+
+
+class TaskState(BaseModel):
+    task_id: str
+    status: str
+    user_message: str
+    workspace: str | None = None
+    debug: bool = False
+    final_response: str | None = None
+    created_at: str
+    updated_at: str
diff --git a/duck_core/tasks/store.py b/duck_core/tasks/store.py
new file mode 100644
index 0000000..f473dc2
--- /dev/null
+++ b/duck_core/tasks/store.py
@@ -0,0 +1,115 @@
+from datetime import UTC, datetime
+from pathlib import Path
+from uuid import uuid4
+
+import aiosqlite
+
+from duck_core.tasks.state import TaskState
+
+
+def utc_now() -> str:
+    return datetime.now(UTC).isoformat()
+
+
+class TaskStore:
+    def __init__(self, db_path: str):
+        self.db_path = Path(db_path)
+
+    async def init(self) -> None:
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                create table if not exists tasks (
+                  task_id text primary key,
+                  status text not null,
+                  user_message text not null,
+                  workspace text,
+                  debug integer not null default 0,
+                  final_response text,
+                  created_at text not null,
+                  updated_at text not null
+                )
+                """
+            )
+            await db.commit()
+
+    async def create_task(self, user_message: str, workspace: str | None, debug: bool) -> TaskState:
+        await self.init()
+        now = utc_now()
+        task_id = f"task_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}_{uuid4().hex[:8]}"
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                insert into tasks(task_id, status, user_message, workspace, debug, created_at, updated_at)
+                values (?, ?, ?, ?, ?, ?, ?)
+                """,
+                (task_id, "running", user_message, workspace, int(debug), now, now),
+            )
+            await db.commit()
+        return TaskState(
+            task_id=task_id,
+            status="running",
+            user_message=user_message,
+            workspace=workspace,
+            debug=debug,
+            created_at=now,
+            updated_at=now,
+        )
+
+    async def update_status(
+        self, task_id: str, status: str, final_response: str | None = None
+    ) -> None:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                update tasks
+                set status = ?, final_response = coalesce(?, final_response), updated_at = ?
+                where task_id = ?
+                """,
+                (status, final_response, utc_now(), task_id),
+            )
+            await db.commit()
+
+    async def complete_task(self, task_id: str, final_response: str) -> None:
+        await self.update_status(task_id, "completed", final_response)
+
+    async def fail_task(self, task_id: str, message: str) -> None:
+        await self.update_status(task_id, "failed", message)
+
+    async def cancel_task(self, task_id: str) -> None:
+        await self.update_status(task_id, "cancelled")
+
+    async def waiting_for_approval(self, task_id: str) -> None:
+        await self.update_status(task_id, "waiting_for_approval")
+
+    async def get_task(self, task_id: str) -> TaskState | None:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cursor = await db.execute("select * from tasks where task_id = ?", (task_id,))
+            row = await cursor.fetchone()
+        return self._row_to_task(row) if row else None
+
+    async def list_tasks(self, limit: int = 50) -> list[TaskState]:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cursor = await db.execute(
+                "select * from tasks order by created_at desc limit ?", (limit,)
+            )
+            rows = await cursor.fetchall()
+        return [self._row_to_task(row) for row in rows]
+
+    def _row_to_task(self, row: aiosqlite.Row) -> TaskState:
+        return TaskState(
+            task_id=row["task_id"],
+            status=row["status"],
+            user_message=row["user_message"],
+            workspace=row["workspace"],
+            debug=bool(row["debug"]),
+            final_response=row["final_response"],
+            created_at=row["created_at"],
+            updated_at=row["updated_at"],
+        )
diff --git a/duck_core/tools/__init__.py b/duck_core/tools/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/duck_core/tools/__init__.py
@@ -0,0 +1 @@
+
diff --git a/duck_core/tools/base.py b/duck_core/tools/base.py
new file mode 100644
index 0000000..abf6cbf
--- /dev/null
+++ b/duck_core/tools/base.py
@@ -0,0 +1,18 @@
+from typing import Any, Protocol
+
+from pydantic import BaseModel, Field
+
+
+class ToolResult(BaseModel):
+    ok: bool
+    output: str | None = None
+    error: str | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+class Tool(Protocol):
+    name: str
+    risk_level: str
+
+    async def run(self, args: dict[str, Any]) -> ToolResult:
+        ...
diff --git a/duck_core/tools/file_read.py b/duck_core/tools/file_read.py
new file mode 100644
index 0000000..d2a879f
--- /dev/null
+++ b/duck_core/tools/file_read.py
@@ -0,0 +1,36 @@
+from pathlib import Path
+from typing import Any
+
+from duck_core.tools.base import ToolResult
+from duck_core.tools.paths import WorkspacePathError, resolve_workspace_path
+
+
+class FileReadTool:
+    name = "file_read"
+    risk_level = "low"
+
+    def __init__(self, workspace: str, max_bytes: int = 1_000_000):
+        self.workspace = workspace
+        self.max_bytes = max_bytes
+
+    async def run(self, args: dict[str, Any]) -> ToolResult:
+        raw_path = str(args.get("path", ""))
+        try:
+            path = resolve_workspace_path(self.workspace, raw_path)
+        except WorkspacePathError as exc:
+            return ToolResult(ok=False, error=str(exc))
+        if self._requires_approval(path):
+            return ToolResult(ok=False, error=f"Reading {raw_path} requires explicit approval")
+        if not path.is_file():
+            return ToolResult(ok=False, error=f"File not found: {raw_path}")
+        if path.stat().st_size > self.max_bytes:
+            return ToolResult(ok=False, error=f"File exceeds max size: {self.max_bytes}")
+        return ToolResult(
+            ok=True,
+            output=path.read_text(errors="replace"),
+            metadata={"path": str(path), "bytes_read": path.stat().st_size},
+        )
+
+    def _requires_approval(self, path: Path) -> bool:
+        parts = set(path.parts)
+        return path.name == ".env" or ".ssh" in parts or str(path) == "/etc/shadow"
diff --git a/duck_core/tools/file_write.py b/duck_core/tools/file_write.py
new file mode 100644
index 0000000..2dc1618
--- /dev/null
+++ b/duck_core/tools/file_write.py
@@ -0,0 +1,40 @@
+from typing import Any
+
+from duck_core.tools.base import ToolResult
+from duck_core.tools.paths import WorkspacePathError, resolve_workspace_path
+
+
+class FileWriteTool:
+    name = "file_write"
+    risk_level = "medium"
+
+    def __init__(self, workspace: str):
+        self.workspace = workspace
+
+    async def run(self, args: dict[str, Any]) -> ToolResult:
+        raw_path = str(args.get("path", ""))
+        content = str(args.get("content", ""))
+        overwrite = bool(args.get("overwrite", False))
+        try:
+            path = resolve_workspace_path(self.workspace, raw_path)
+        except WorkspacePathError as exc:
+            return ToolResult(ok=False, error=str(exc))
+        if path.exists() and not overwrite:
+            return ToolResult(
+                ok=False,
+                error="Refusing to overwrite existing file without overwrite=true or approval",
+                metadata={"path": str(path)},
+            )
+        path.parent.mkdir(parents=True, exist_ok=True)
+        existed = path.exists()
+        path.write_text(content)
+        return ToolResult(
+            ok=True,
+            output=f"Wrote {raw_path}",
+            metadata={
+                "path": str(path),
+                "bytes_written": len(content.encode()),
+                "created": not existed,
+                "updated": existed,
+            },
+        )
diff --git a/duck_core/tools/gateway.py b/duck_core/tools/gateway.py
new file mode 100644
index 0000000..b9df257
--- /dev/null
+++ b/duck_core/tools/gateway.py
@@ -0,0 +1,31 @@
+from typing import Any
+
+from duck_core.tools.base import Tool, ToolResult
+from duck_core.tools.file_read import FileReadTool
+from duck_core.tools.file_write import FileWriteTool
+from duck_core.tools.shell_exec_safe import ShellExecSafeTool
+
+
+class ToolGateway:
+    def __init__(self, tools: list[Tool]):
+        self.tools = {tool.name: tool for tool in tools}
+
+    @classmethod
+    def default(cls, workspace: str) -> "ToolGateway":
+        return cls(
+            [
+                FileReadTool(workspace),
+                FileWriteTool(workspace),
+                ShellExecSafeTool(workspace),
+            ]
+        )
+
+    async def run_action(self, action: dict[str, Any]) -> ToolResult:
+        tool_name = str(action.get("tool", ""))
+        tool = self.tools.get(tool_name)
+        if tool is None:
+            return ToolResult(ok=False, error=f"Unknown tool: {tool_name}")
+        args = action.get("args") or {}
+        if not isinstance(args, dict):
+            return ToolResult(ok=False, error="Tool args must be an object")
+        return await tool.run(args)
diff --git a/duck_core/tools/paths.py b/duck_core/tools/paths.py
new file mode 100644
index 0000000..9b8019d
--- /dev/null
+++ b/duck_core/tools/paths.py
@@ -0,0 +1,13 @@
+from pathlib import Path
+
+
+class WorkspacePathError(ValueError):
+    pass
+
+
+def resolve_workspace_path(workspace: str, relative_path: str) -> Path:
+    root = Path(workspace).resolve()
+    path = (root / relative_path).resolve()
+    if root != path and root not in path.parents:
+        raise WorkspacePathError(f"Path escapes workspace: {relative_path}")
+    return path
diff --git a/duck_core/tools/shell_exec_safe.py b/duck_core/tools/shell_exec_safe.py
new file mode 100644
index 0000000..a015545
--- /dev/null
+++ b/duck_core/tools/shell_exec_safe.py
@@ -0,0 +1,95 @@
+import shlex
+import subprocess
+from typing import Any
+
+from duck_core.tools.base import ToolResult
+
+
+ALLOWLIST = {
+    "pwd",
+    "ls",
+    "cat",
+    "head",
+    "tail",
+    "grep",
+    "find",
+    "pytest",
+    "python -m pytest",
+    "python3 -m pytest",
+    "git status",
+    "git diff",
+    "git log",
+}
+
+BLOCKLIST = {
+    "rm",
+    "sudo",
+    "su",
+    "dd",
+    "mkfs",
+    "mount",
+    "umount",
+    "shutdown",
+    "reboot",
+    "poweroff",
+    "systemctl",
+    "service",
+    "apt install",
+    "apt remove",
+    "pacman -S",
+    "pacman -R",
+    "pip install",
+    "npm install -g",
+    "chmod -R",
+    "chown -R",
+    "curl | sh",
+    "wget | sh",
+}
+
+
+class ShellExecSafeTool:
+    name = "shell_exec_safe"
+    risk_level = "medium"
+
+    def __init__(self, workspace: str, timeout_seconds: int = 30):
+        self.workspace = workspace
+        self.timeout_seconds = timeout_seconds
+
+    async def run(self, args: dict[str, Any]) -> ToolResult:
+        command = str(args.get("command", "")).strip()
+        allowed, reason = self._is_allowed(command)
+        if not allowed:
+            return ToolResult(ok=False, error=reason, metadata={"requires_approval": True})
+        try:
+            completed = subprocess.run(
+                command,
+                cwd=self.workspace,
+                shell=True,
+                text=True,
+                capture_output=True,
+                timeout=self.timeout_seconds,
+                check=False,
+            )
+        except subprocess.SubprocessError as exc:
+            return ToolResult(ok=False, error=str(exc))
+        return ToolResult(
+            ok=completed.returncode == 0,
+            output=completed.stdout,
+            error=completed.stderr if completed.returncode else None,
+            metadata={"returncode": completed.returncode, "command": command},
+        )
+
+    def _is_allowed(self, command: str) -> tuple[bool, str | None]:
+        if not command:
+            return False, "Empty command"
+        lowered = command.lower()
+        for blocked in BLOCKLIST:
+            if lowered.startswith(blocked.lower()) or blocked.lower() in lowered:
+                return False, f"Command is blocked: {blocked}"
+        parts = shlex.split(command)
+        prefix1 = parts[0] if parts else ""
+        prefix2 = " ".join(parts[:2])
+        prefix3 = " ".join(parts[:3])
+        if prefix1 in ALLOWLIST or prefix2 in ALLOWLIST or prefix3 in ALLOWLIST:
+            return True, None
+        return False, "Command is outside allowlist and requires approval"
diff --git a/duck_core/web/static/app.js b/duck_core/web/static/app.js
new file mode 100644
index 0000000..602a5d3
--- /dev/null
+++ b/duck_core/web/static/app.js
@@ -0,0 +1,510 @@
+const state = {
+  running: false,
+  messages: [],
+};
+
+async function jsonFetch(url, options) {
+  const response = await fetch(url, options);
+  if (!response.ok) throw new Error(await response.text());
+  return response.json();
+}
+
+function escapeText(value) {
+  return String(value ?? "");
+}
+
+function setStatus(id, text, tone = "neutral") {
+  const node = document.querySelector(id);
+  if (!node) return;
+  node.textContent = text;
+  node.dataset.tone = tone;
+}
+
+function addMessage(role, content, meta = "", options = {}) {
+  const list = document.querySelector("#messages");
+  if (!list) return;
+
+  const article = document.createElement("article");
+  article.className = `message ${role}`;
+
+  const avatar = document.createElement("div");
+  avatar.className = "avatar";
+  avatar.textContent = role === "user" ? "U" : "D";
+
+  const bubble = document.createElement("div");
+  bubble.className = "bubble";
+
+  const messageMeta = document.createElement("div");
+  messageMeta.className = "message-meta";
+  messageMeta.innerHTML = `<strong>${role === "user" ? "You" : "DuckLM"}</strong><span>${escapeText(meta)}</span>`;
+
+  const text = document.createElement("p");
+  text.textContent = content;
+
+  bubble.append(messageMeta);
+  if (role === "assistant" && options.reasoning) {
+    bubble.append(createInlineReasoning());
+  }
+  bubble.append(text);
+  article.append(avatar, bubble);
+  list.append(article);
+  list.scrollTop = list.scrollHeight;
+  return article;
+}
+
+function createInlineReasoning() {
+  const section = document.createElement("section");
+  section.className = "message-reasoning is-collapsed";
+
+  const button = document.createElement("button");
+  button.className = "message-reasoning-toggle";
+  button.type = "button";
+  button.setAttribute("aria-expanded", "false");
+
+  const title = document.createElement("span");
+  title.textContent = "Размышление";
+  const status = document.createElement("span");
+  status.className = "message-reasoning-status";
+  status.textContent = "streaming";
+  button.append(title, status);
+
+  const body = document.createElement("pre");
+  body.hidden = true;
+  body.textContent = "";
+
+  section.append(button, body);
+  return section;
+}
+
+function createToolTerminal(eventPayload) {
+  const payload = eventPayload.payload || eventPayload;
+  const args = payload.args || {};
+  const terminal = document.createElement("section");
+  terminal.className = "tool-terminal";
+  terminal.dataset.toolIndex = String(payload.index || "");
+
+  const header = document.createElement("div");
+  header.className = "tool-terminal-header";
+
+  const dots = document.createElement("span");
+  dots.className = "terminal-dots";
+  dots.innerHTML = "<i></i><i></i><i></i>";
+
+  const title = document.createElement("span");
+  title.className = "tool-terminal-title";
+  title.textContent = formatToolCommand(payload.tool, args);
+
+  const status = document.createElement("span");
+  status.className = "tool-terminal-status";
+  status.textContent = "running";
+
+  header.append(dots, title, status);
+
+  const body = document.createElement("pre");
+  body.className = "tool-terminal-body";
+  body.textContent = formatToolStart(payload.tool, args);
+
+  terminal.append(header, body);
+  return terminal;
+}
+
+function formatToolCommand(tool, args) {
+  if (tool === "shell_exec_safe") return `$ ${args.command || tool}`;
+  if (tool === "file_read") return `$ file_read ${args.path || ""}`.trim();
+  if (tool === "file_write") return `$ file_write ${args.path || ""}`.trim();
+  return `$ ${tool || "tool"}`;
+}
+
+function formatToolStart(tool, args) {
+  const lines = [formatToolCommand(tool, args)];
+  const serializedArgs = JSON.stringify(args || {}, null, 2);
+  if (serializedArgs !== "{}") lines.push(serializedArgs);
+  return lines.join("\n");
+}
+
+function appendToolTerminal(article, eventPayload) {
+  const paragraph = article?.querySelector("p");
+  const terminal = createToolTerminal(eventPayload);
+  paragraph?.before(terminal);
+  document.querySelector("#messages").scrollTop = document.querySelector("#messages").scrollHeight;
+}
+
+function updateToolTerminal(article, eventPayload) {
+  const payload = eventPayload.payload || eventPayload;
+  const terminal = article?.querySelector(`.tool-terminal[data-tool-index="${payload.index || ""}"]`);
+  const body = terminal?.querySelector(".tool-terminal-body");
+  const status = terminal?.querySelector(".tool-terminal-status");
+  const result = payload.result || {};
+  if (!body || !status) return;
+  terminal.classList.toggle("is-error", !result.ok);
+  status.textContent = result.ok ? "ok" : "error";
+
+  const parts = [body.textContent.trim()];
+  if (result.output) parts.push("\nstdout\n" + result.output.trimEnd());
+  if (result.error) parts.push("\nstderr\n" + result.error.trimEnd());
+  if (result.metadata && Object.keys(result.metadata).length) {
+    parts.push("\nmetadata\n" + JSON.stringify(result.metadata, null, 2));
+  }
+  body.textContent = parts.join("\n");
+  document.querySelector("#messages").scrollTop = document.querySelector("#messages").scrollHeight;
+}
+
+function appendApprovalTerminal(article, eventPayload) {
+  const payload = eventPayload.payload || eventPayload;
+  appendToolTerminal(article, {
+    payload: {
+      index: payload.index,
+      tool: payload.tool,
+      args: payload.action?.args || {},
+    },
+  });
+  const terminal = article?.querySelector(`.tool-terminal[data-tool-index="${payload.index || ""}"]`);
+  const body = terminal?.querySelector(".tool-terminal-body");
+  const status = terminal?.querySelector(".tool-terminal-status");
+  terminal?.classList.add("is-waiting");
+  if (status) status.textContent = "approval";
+  if (body) body.textContent += `\n\napproval required\n${payload.reason || ""}`;
+}
+
+function setMessagePending(article, text) {
+  const paragraph = article?.querySelector("p");
+  if (paragraph) paragraph.textContent = text;
+}
+
+function appendMessageText(article, delta) {
+  const paragraph = article?.querySelector("p");
+  if (!paragraph) return;
+  paragraph.textContent += delta;
+  document.querySelector("#messages").scrollTop = document.querySelector("#messages").scrollHeight;
+}
+
+function appendInlineReasoning(article, delta) {
+  const block = article?.querySelector(".message-reasoning");
+  const body = block?.querySelector("pre");
+  const status = block?.querySelector(".message-reasoning-status");
+  if (!body) return;
+  body.textContent += delta;
+  if (status) status.textContent = "streaming";
+  document.querySelector("#messages").scrollTop = document.querySelector("#messages").scrollHeight;
+}
+
+function finishInlineReasoning(article, reasoning) {
+  const block = article?.querySelector(".message-reasoning");
+  const body = block?.querySelector("pre");
+  const status = block?.querySelector(".message-reasoning-status");
+  if (!body) return;
+  body.textContent = reasoning?.trim() || body.textContent.trim() || "Размышления не были получены.";
+  if (status) status.textContent = "done";
+}
+
+async function refreshEvents(taskId) {
+  const events = await jsonFetch(`/v1/tasks/${taskId}/events`);
+  const list = document.querySelector("#events");
+  if (!list) return events;
+
+  list.innerHTML = "";
+  for (const event of events) {
+    const item = document.createElement("li");
+    const title = document.createElement("strong");
+    const detail = document.createElement("span");
+    title.textContent = `${event.sequence}. ${event.event_type}`;
+    detail.textContent = summarizeEvent(event.payload);
+    item.append(title, detail);
+    list.appendChild(item);
+  }
+  return events;
+}
+
+function summarizeEvent(payload) {
+  if (!payload || typeof payload !== "object") return "";
+  if (payload.role && payload.latency_ms) {
+    return `${payload.role} · ${Math.round(payload.latency_ms)} ms`;
+  }
+  if (payload.content) {
+    return payload.content.slice(0, 140);
+  }
+  if (payload.final_response) {
+    return payload.final_response.slice(0, 140);
+  }
+  if (payload.error) {
+    return payload.error;
+  }
+  return JSON.stringify(payload);
+}
+
+function toggleInlineReasoning(button) {
+  const block = button.closest(".message-reasoning");
+  const body = block?.querySelector("pre");
+  if (!block || !body) return;
+  const expanded = button.getAttribute("aria-expanded") === "true";
+  button.setAttribute("aria-expanded", String(!expanded));
+  body.hidden = expanded;
+  block.classList.toggle("is-collapsed", expanded);
+}
+
+function parseSseBlock(block) {
+  const event = {name: "message", data: ""};
+  for (const line of block.split("\n")) {
+    if (line.startsWith("event:")) event.name = line.slice(6).trim();
+    if (line.startsWith("data:")) event.data += line.slice(5).trimStart();
+  }
+  if (!event.data) return null;
+  return {name: event.name, data: JSON.parse(event.data)};
+}
+
+async function streamChat(payload, onEvent) {
+  const response = await fetch("/v1/chat/stream", {
+    method: "POST",
+    headers: {"Content-Type": "application/json"},
+    body: JSON.stringify(payload),
+  });
+  if (!response.ok) throw new Error(await response.text());
+  if (!response.body) throw new Error("Streaming response is not available in this browser.");
+
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder();
+  let buffer = "";
+  while (true) {
+    const {value, done} = await reader.read();
+    if (done) break;
+    buffer += decoder.decode(value, {stream: true});
+    const blocks = buffer.split("\n\n");
+    buffer = blocks.pop() || "";
+    for (const block of blocks) {
+      const event = parseSseBlock(block);
+      if (event) await onEvent(event);
+    }
+  }
+  buffer += decoder.decode();
+  if (buffer.trim()) {
+    const event = parseSseBlock(buffer);
+    if (event) await onEvent(event);
+  }
+}
+
+async function sendMessage() {
+  if (state.running) return;
+  const input = document.querySelector("#message");
+  const message = input.value.trim();
+  if (!message) return;
+
+  state.running = true;
+  document.querySelector("#run").disabled = true;
+  setStatus("#task-status", "running", "warn");
+  addMessage("user", message, "submitted");
+  input.value = "";
+  const pending = addMessage("assistant", "", "thinking", {reasoning: true});
+  let taskId = "";
+  let contentStarted = false;
+
+  try {
+    await streamChat({
+      message,
+      workspace: document.querySelector("#workspace").value,
+      debug: document.querySelector("#debug").checked,
+    }, async ({name, data}) => {
+      if (data.task_id) taskId = data.task_id;
+      if (name === "task_created") {
+        taskId = data.task_id;
+        setStatus("#task-status", taskId, "warn");
+        return;
+      }
+      if (name === "reasoning_delta") {
+        pending.querySelector(".message-meta span").textContent = "reasoning";
+        appendInlineReasoning(pending, data.delta || "");
+        return;
+      }
+      if (name === "tool_call_started") {
+        pending.querySelector(".message-meta span").textContent = "tool";
+        appendToolTerminal(pending, data);
+        return;
+      }
+      if (name === "tool_call_finished") {
+        pending.querySelector(".message-meta span").textContent = "tool";
+        updateToolTerminal(pending, data);
+        return;
+      }
+      if (name === "tool_approval_requested") {
+        pending.querySelector(".message-meta span").textContent = "approval";
+        appendApprovalTerminal(pending, data);
+        return;
+      }
+      if (name === "content_delta") {
+        if (!contentStarted) {
+          contentStarted = true;
+          setMessagePending(pending, "");
+        }
+        pending.querySelector(".message-meta span").textContent = "answering";
+        appendMessageText(pending, data.delta || "");
+        return;
+      }
+      if (name === "done") {
+        if (!contentStarted) {
+          setMessagePending(pending, data.final_response || "No final content returned.");
+        }
+        pending.querySelector(".message-meta span").textContent = data.status;
+        setStatus("#task-status", data.task_id, data.status === "completed" ? "ok" : "warn");
+        finishInlineReasoning(pending, data.reasoning_content);
+        await refreshEvents(data.task_id);
+        return;
+      }
+      if (name === "error") {
+        throw new Error(data.error || "Stream failed.");
+      }
+    });
+  } catch (error) {
+    if (!taskId) input.value = message;
+    setMessagePending(pending, error.message);
+    pending.querySelector(".message-meta span").textContent = "failed";
+    setStatus("#task-status", "failed", "bad");
+    if (taskId) await refreshEvents(taskId);
+  } finally {
+    state.running = false;
+    document.querySelector("#run").disabled = false;
+    input.focus();
+  }
+}
+
+async function checkRuntime() {
+  try {
+    await jsonFetch("/health");
+    setStatus("#api-status", "online", "ok");
+  } catch {
+    setStatus("#api-status", "offline", "bad");
+  }
+
+  try {
+    const roles = await jsonFetch("/v1/models/ping");
+    const ok = Object.values(roles).every((item) => item.ok);
+    setStatus("#model-status", ok ? "online" : "degraded", ok ? "ok" : "warn");
+  } catch {
+    setStatus("#model-status", "offline", "bad");
+  }
+}
+
+function bindChat() {
+  const composer = document.querySelector("#composer");
+  const input = document.querySelector("#message");
+  composer?.addEventListener("submit", (event) => {
+    event.preventDefault();
+    sendMessage();
+  });
+  input?.addEventListener("keydown", (event) => {
+    if (event.key === "Enter" && !event.shiftKey) {
+      event.preventDefault();
+      sendMessage();
+    }
+  });
+  document.querySelector("#new-chat")?.addEventListener("click", () => {
+    const messages = document.querySelector("#messages");
+    messages.innerHTML = "";
+    addMessage("assistant", "Новая сессия готова.", "ready");
+    document.querySelector("#events").innerHTML = "";
+    setStatus("#task-status", "none");
+  });
+  document.querySelector("#messages")?.addEventListener("click", (event) => {
+    const button = event.target.closest(".message-reasoning-toggle");
+    if (button) toggleInlineReasoning(button);
+  });
+  document.querySelector("#debug")?.addEventListener("change", (event) => {
+    document.querySelector("#debug-panel").hidden = !event.target.checked;
+  });
+}
+
+async function loadSimplePages() {
+  const skills = document.querySelector("#skills");
+  if (skills) skills.textContent = JSON.stringify(await jsonFetch("/v1/skills"), null, 2);
+  const experience = document.querySelector("#experience");
+  if (experience) experience.textContent = JSON.stringify(await jsonFetch("/v1/experience"), null, 2);
+  const approvals = document.querySelector("#approvals");
+  if (approvals) await renderApprovals(approvals);
+}
+
+async function renderApprovals(container) {
+  const approvals = await jsonFetch("/v1/approvals/pending");
+  container.innerHTML = "";
+  if (!approvals.length) {
+    const empty = document.createElement("p");
+    empty.className = "empty-state";
+    empty.textContent = "No pending approvals.";
+    container.append(empty);
+    return;
+  }
+
+  for (const approval of approvals) {
+    const card = document.createElement("article");
+    card.className = "approval-card";
+    card.dataset.approvalId = approval.approval_id;
+
+    const header = document.createElement("div");
+    header.className = "approval-card-header";
+    const title = document.createElement("h2");
+    title.textContent = approval.normalized_action?.tool || "Tool action";
+    const status = document.createElement("span");
+    status.textContent = approval.status;
+    header.append(title, status);
+
+    const meta = document.createElement("dl");
+    meta.className = "approval-meta";
+    meta.append(metaRow("Task", approval.task_id));
+    meta.append(metaRow("Approval", approval.approval_id));
+    meta.append(metaRow("Created", approval.created_at));
+
+    const action = document.createElement("pre");
+    action.className = "approval-action";
+    action.textContent = JSON.stringify(approval.normalized_action, null, 2);
+
+    const actions = document.createElement("div");
+    actions.className = "approval-actions";
+    actions.append(
+      approvalButton("Allow once", "allow_once"),
+      approvalButton("Allow forever", "allow_forever"),
+      approvalButton("Deny", "deny", "danger"),
+    );
+
+    card.append(header, meta, action, actions);
+    container.append(card);
+  }
+}
+
+function metaRow(label, value) {
+  const row = document.createElement("div");
+  const dt = document.createElement("dt");
+  const dd = document.createElement("dd");
+  dt.textContent = label;
+  dd.textContent = value || "";
+  row.append(dt, dd);
+  return row;
+}
+
+function approvalButton(label, action, tone = "") {
+  const button = document.createElement("button");
+  button.type = "button";
+  button.textContent = label;
+  button.dataset.approvalAction = action;
+  if (tone) button.dataset.tone = tone;
+  return button;
+}
+
+document.querySelector("#approvals")?.addEventListener("click", async (event) => {
+  const button = event.target.closest("[data-approval-action]");
+  if (!button) return;
+  const card = button.closest(".approval-card");
+  const approvalId = card?.dataset.approvalId;
+  if (!approvalId) return;
+
+  button.disabled = true;
+  const action = button.dataset.approvalAction;
+  await jsonFetch(`/v1/approvals/${approvalId}/${action}`, {method: "POST"});
+  await renderApprovals(document.querySelector("#approvals"));
+});
+
+document.querySelector("#memory-search")?.addEventListener("click", async () => {
+  const q = document.querySelector("#memory-query").value;
+  document.querySelector("#memory-results").textContent =
+    JSON.stringify(await jsonFetch(`/v1/memory/search?q=${encodeURIComponent(q)}`), null, 2);
+});
+
+bindChat();
+checkRuntime();
+loadSimplePages().catch(console.error);
diff --git a/duck_core/web/static/style.css b/duck_core/web/static/style.css
new file mode 100644
index 0000000..7429f98
--- /dev/null
+++ b/duck_core/web/static/style.css
@@ -0,0 +1,673 @@
+:root {
+  color-scheme: light;
+  --bg: #eef2f6;
+  --sidebar: #111827;
+  --sidebar-soft: #1f2937;
+  --panel: #ffffff;
+  --panel-strong: #f8fafc;
+  --text: #111827;
+  --muted: #64748b;
+  --border: #d7dee8;
+  --accent: #1f6feb;
+  --accent-strong: #174ea6;
+  --ok: #12805c;
+  --warn: #b7791f;
+  --bad: #b42318;
+  --shadow: 0 18px 50px rgba(15, 23, 42, 0.14);
+}
+
+* { box-sizing: border-box; }
+
+body {
+  margin: 0;
+  min-height: 100vh;
+  font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+  background: var(--bg);
+  color: var(--text);
+}
+
+.simple-page {
+  max-width: 980px;
+  margin: 0 auto;
+  padding: 28px;
+}
+
+.simple-header {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 16px;
+  margin-bottom: 18px;
+}
+
+.simple-header h1,
+.simple-header p {
+  margin: 0;
+}
+
+.simple-header h1 {
+  font-size: 24px;
+}
+
+.simple-header p {
+  margin-top: 4px;
+  color: var(--muted);
+}
+
+.approval-list {
+  display: grid;
+  gap: 14px;
+}
+
+.approval-card {
+  display: grid;
+  gap: 14px;
+  padding: 16px;
+  background: var(--panel);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  box-shadow: var(--shadow);
+}
+
+.approval-card-header {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 12px;
+}
+
+.approval-card h2 {
+  margin: 0;
+  font-size: 17px;
+}
+
+.approval-card-header span {
+  padding: 3px 8px;
+  border-radius: 999px;
+  background: #fef3c7;
+  color: #854d0e;
+  font-size: 12px;
+  font-weight: 800;
+}
+
+.approval-meta {
+  display: grid;
+  gap: 6px;
+}
+
+.approval-meta div {
+  justify-content: flex-start;
+}
+
+.approval-meta dd {
+  max-width: none;
+  color: var(--text);
+}
+
+.approval-action {
+  margin: 0;
+  max-height: 220px;
+  overflow: auto;
+  padding: 12px;
+  background: #0f172a;
+  border-radius: 8px;
+  color: #d1fae5;
+  font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
+  font-size: 12px;
+  line-height: 1.5;
+}
+
+.approval-actions {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 10px;
+}
+
+.approval-actions button {
+  border: 0;
+  border-radius: 8px;
+  padding: 9px 12px;
+  background: var(--accent);
+  color: #ffffff;
+  font-weight: 750;
+}
+
+.approval-actions button[data-tone="danger"] {
+  background: var(--bad);
+}
+
+.approval-actions button:disabled {
+  cursor: wait;
+  opacity: 0.65;
+}
+
+.empty-state {
+  margin: 0;
+  padding: 16px;
+  background: var(--panel);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  color: var(--muted);
+}
+
+button, input, textarea {
+  font: inherit;
+}
+
+button {
+  cursor: pointer;
+}
+
+.app-shell {
+  display: grid;
+  grid-template-columns: 292px minmax(0, 1fr);
+  min-height: 100vh;
+}
+
+.sidebar {
+  display: flex;
+  flex-direction: column;
+  gap: 18px;
+  min-height: 100vh;
+  padding: 22px;
+  background: var(--sidebar);
+  color: #e5edf7;
+}
+
+.brand {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+  padding-bottom: 12px;
+  border-bottom: 1px solid rgba(255,255,255,0.12);
+}
+
+.brand-mark, .avatar {
+  display: grid;
+  place-items: center;
+  width: 36px;
+  height: 36px;
+  border-radius: 8px;
+  font-weight: 800;
+}
+
+.brand-mark {
+  background: #f8fafc;
+  color: #111827;
+}
+
+.brand h1, .brand p,
+.chat-header h2, .chat-header p,
+.settings-panel h2, .status-panel h2 {
+  margin: 0;
+}
+
+.brand h1 {
+  font-size: 18px;
+  line-height: 1.2;
+}
+
+.brand p {
+  margin-top: 2px;
+  color: #9ca3af;
+  font-size: 12px;
+}
+
+.side-nav {
+  display: grid;
+  gap: 6px;
+}
+
+.side-nav a {
+  color: #cbd5e1;
+  text-decoration: none;
+  padding: 10px 12px;
+  border-radius: 7px;
+  font-size: 14px;
+}
+
+.side-nav a:hover,
+.side-nav a.active {
+  background: var(--sidebar-soft);
+  color: #ffffff;
+}
+
+.settings-panel,
+.status-panel {
+  display: grid;
+  gap: 12px;
+  padding: 14px;
+  background: rgba(255,255,255,0.06);
+  border: 1px solid rgba(255,255,255,0.10);
+  border-radius: 8px;
+}
+
+.settings-panel h2,
+.status-panel h2 {
+  font-size: 13px;
+  color: #f8fafc;
+}
+
+label {
+  display: grid;
+  gap: 7px;
+  font-size: 13px;
+  font-weight: 650;
+}
+
+.toggle-row {
+  grid-template-columns: auto 1fr;
+  align-items: center;
+  font-weight: 500;
+  color: #cbd5e1;
+}
+
+input,
+textarea {
+  width: 100%;
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  padding: 11px 12px;
+  background: #ffffff;
+  color: var(--text);
+}
+
+.sidebar input {
+  border-color: rgba(255,255,255,0.16);
+  background: rgba(255,255,255,0.08);
+  color: #ffffff;
+}
+
+dl {
+  display: grid;
+  gap: 9px;
+  margin: 0;
+}
+
+dl div {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 12px;
+}
+
+dt {
+  color: #9ca3af;
+  font-size: 12px;
+}
+
+dd {
+  margin: 0;
+  max-width: 160px;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+  color: #e5edf7;
+  font-size: 12px;
+}
+
+[data-tone="ok"] { color: #86efac; }
+[data-tone="warn"] { color: #fde68a; }
+[data-tone="bad"] { color: #fca5a5; }
+
+.chat-shell {
+  display: grid;
+  grid-template-rows: auto minmax(0, 1fr) auto auto;
+  gap: 16px;
+  min-width: 0;
+  height: 100vh;
+  padding: 22px;
+}
+
+.chat-header {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 16px;
+  padding: 18px 20px;
+  background: var(--panel);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  box-shadow: var(--shadow);
+}
+
+.chat-header h2 {
+  font-size: 20px;
+}
+
+.chat-header p {
+  margin-top: 4px;
+  color: var(--muted);
+  font-size: 13px;
+}
+
+.secondary-button,
+.composer button {
+  border: 0;
+  border-radius: 8px;
+  padding: 10px 14px;
+  font-weight: 750;
+}
+
+.secondary-button {
+  background: #edf2f7;
+  color: #1f2937;
+}
+
+.messages {
+  display: flex;
+  flex-direction: column;
+  gap: 14px;
+  min-height: 0;
+  overflow-y: auto;
+  padding: 18px;
+  background: var(--panel);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  box-shadow: var(--shadow);
+}
+
+.message {
+  display: grid;
+  grid-template-columns: 36px minmax(0, 1fr);
+  gap: 10px;
+  max-width: 860px;
+}
+
+.message.user {
+  align-self: flex-end;
+  grid-template-columns: minmax(0, 1fr) 36px;
+}
+
+.message.user .avatar {
+  grid-column: 2;
+  grid-row: 1;
+  background: #dbeafe;
+  color: #1d4ed8;
+}
+
+.message.assistant .avatar {
+  background: #e5e7eb;
+  color: #111827;
+}
+
+.message.user .bubble {
+  grid-column: 1;
+  grid-row: 1;
+  background: #eff6ff;
+  border-color: #bfdbfe;
+}
+
+.bubble {
+  padding: 12px 14px;
+  background: var(--panel-strong);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+}
+
+.bubble p {
+  margin: 8px 0 0;
+  white-space: pre-wrap;
+  overflow-wrap: anywhere;
+  line-height: 1.5;
+}
+
+.message-reasoning {
+  display: grid;
+  gap: 8px;
+  margin-top: 10px;
+  padding: 9px 10px;
+  background: #f1f5f9;
+  border: 1px solid #dbe3ee;
+  border-radius: 8px;
+}
+
+.message-reasoning.is-collapsed {
+  gap: 0;
+}
+
+.message-reasoning-toggle {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 12px;
+  width: 100%;
+  border: 0;
+  padding: 0;
+  background: transparent;
+  color: #475569;
+  font-size: 12px;
+  font-weight: 750;
+  text-align: left;
+}
+
+.message-reasoning-status {
+  flex: 0 0 auto;
+  padding: 2px 7px;
+  border-radius: 999px;
+  background: #e2e8f0;
+  color: #64748b;
+  font-size: 11px;
+}
+
+.message-reasoning pre {
+  margin: 0;
+  max-height: 220px;
+  overflow: auto;
+  color: #334155;
+  font-size: 12px;
+  line-height: 1.45;
+  white-space: pre-wrap;
+  overflow-wrap: anywhere;
+}
+
+.tool-terminal {
+  margin-top: 10px;
+  overflow: hidden;
+  background: #0f172a;
+  border: 1px solid #1e293b;
+  border-radius: 8px;
+  box-shadow: inset 0 1px 0 rgba(255,255,255,0.05);
+}
+
+.tool-terminal-header {
+  display: grid;
+  grid-template-columns: auto minmax(0, 1fr) auto;
+  align-items: center;
+  gap: 10px;
+  min-height: 34px;
+  padding: 8px 10px;
+  background: #111827;
+  border-bottom: 1px solid #1e293b;
+}
+
+.terminal-dots {
+  display: flex;
+  gap: 5px;
+}
+
+.terminal-dots i {
+  width: 9px;
+  height: 9px;
+  border-radius: 999px;
+}
+
+.terminal-dots i:nth-child(1) { background: #ef4444; }
+.terminal-dots i:nth-child(2) { background: #f59e0b; }
+.terminal-dots i:nth-child(3) { background: #22c55e; }
+
+.tool-terminal-title {
+  min-width: 0;
+  overflow: hidden;
+  color: #d1d5db;
+  font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
+  font-size: 12px;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+
+.tool-terminal-status {
+  padding: 2px 7px;
+  border-radius: 999px;
+  background: #1d4ed8;
+  color: #dbeafe;
+  font-size: 11px;
+  font-weight: 800;
+}
+
+.tool-terminal.is-error .tool-terminal-status {
+  background: #7f1d1d;
+  color: #fecaca;
+}
+
+.tool-terminal.is-waiting .tool-terminal-status {
+  background: #854d0e;
+  color: #fef3c7;
+}
+
+.tool-terminal-body {
+  margin: 0;
+  max-height: 220px;
+  overflow: auto;
+  padding: 10px 12px;
+  color: #d1fae5;
+  font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
+  font-size: 12px;
+  line-height: 1.55;
+  white-space: pre-wrap;
+  overflow-wrap: anywhere;
+}
+
+.message-meta {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 12px;
+  color: var(--muted);
+  font-size: 12px;
+}
+
+.message-meta strong {
+  color: var(--text);
+  font-size: 13px;
+}
+
+.debug-panel {
+  display: grid;
+  grid-template-columns: minmax(0, 1fr);
+  gap: 16px;
+  min-height: 180px;
+}
+
+.debug-column {
+  min-width: 0;
+  padding: 14px;
+  background: var(--panel);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+}
+
+.debug-column h3 {
+  margin: 0 0 10px;
+  font-size: 13px;
+}
+
+pre,
+#events {
+  margin: 0;
+  max-height: 170px;
+  overflow: auto;
+  color: #334155;
+  font-size: 12px;
+  line-height: 1.45;
+  white-space: pre-wrap;
+  overflow-wrap: anywhere;
+}
+
+#events {
+  display: grid;
+  gap: 8px;
+  padding-left: 18px;
+}
+
+#events li strong,
+#events li span {
+  display: block;
+}
+
+#events li span {
+  color: var(--muted);
+}
+
+.composer {
+  display: grid;
+  gap: 10px;
+  padding: 14px;
+  background: var(--panel);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  box-shadow: var(--shadow);
+}
+
+.composer textarea {
+  min-height: 86px;
+  resize: vertical;
+}
+
+.composer-actions {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 12px;
+}
+
+#composer-hint {
+  color: var(--muted);
+  font-size: 12px;
+}
+
+.composer button {
+  min-width: 96px;
+  background: var(--accent);
+  color: #ffffff;
+}
+
+.composer button:hover {
+  background: var(--accent-strong);
+}
+
+.composer button:disabled {
+  cursor: wait;
+  opacity: 0.7;
+}
+
+[hidden] {
+  display: none !important;
+}
+
+@media (max-width: 860px) {
+  .app-shell {
+    grid-template-columns: 1fr;
+  }
+
+  .sidebar {
+    min-height: auto;
+  }
+
+  .chat-shell {
+    height: auto;
+    min-height: 100vh;
+  }
+
+  .chat-header,
+  .debug-panel,
+  .composer-actions {
+    grid-template-columns: 1fr;
+    flex-direction: column;
+    align-items: stretch;
+  }
+
+  .debug-panel {
+    display: grid;
+  }
+}
diff --git a/duck_core/web/templates/approvals.html b/duck_core/web/templates/approvals.html
new file mode 100644
index 0000000..9142e80
--- /dev/null
+++ b/duck_core/web/templates/approvals.html
@@ -0,0 +1,22 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>DuckLM Approvals</title>
+    <link rel="stylesheet" href="/static/style.css">
+  </head>
+  <body>
+    <main class="simple-page">
+      <header class="simple-header">
+        <div>
+          <h1>Approvals</h1>
+          <p>Review pending local tool actions before DuckLM continues.</p>
+        </div>
+        <a class="secondary-button" href="/">Back to Chat</a>
+      </header>
+      <section id="approvals" class="approval-list" aria-live="polite"></section>
+    </main>
+    <script src="/static/app.js"></script>
+  </body>
+</html>
diff --git a/duck_core/web/templates/experience.html b/duck_core/web/templates/experience.html
new file mode 100644
index 0000000..ceadc09
--- /dev/null
+++ b/duck_core/web/templates/experience.html
@@ -0,0 +1,2 @@
+<!doctype html>
+<html lang="en"><head><meta charset="utf-8"><title>DuckLM Experience</title><link rel="stylesheet" href="/static/style.css"></head><body><main class="shell"><h1>Experience</h1><pre id="experience"></pre><script src="/static/app.js"></script></main></body></html>
diff --git a/duck_core/web/templates/index.html b/duck_core/web/templates/index.html
new file mode 100644
index 0000000..8ffdd9b
--- /dev/null
+++ b/duck_core/web/templates/index.html
@@ -0,0 +1,99 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>DuckLM WebChat</title>
+    <link rel="stylesheet" href="/static/style.css">
+  </head>
+  <body>
+    <div class="app-shell">
+      <aside class="sidebar">
+        <div class="brand">
+          <div class="brand-mark">D</div>
+          <div>
+            <h1>DuckLM</h1>
+            <p>Local cognitive runtime</p>
+          </div>
+        </div>
+
+        <nav class="side-nav" aria-label="DuckLM sections">
+          <a href="/" class="active">Chat</a>
+          <a href="/approvals">Approvals</a>
+          <a href="/skills">Skills</a>
+          <a href="/memory">Memory</a>
+          <a href="/experience">Experience</a>
+        </nav>
+
+        <section class="settings-panel" aria-labelledby="settings-title">
+          <h2 id="settings-title">Session</h2>
+          <label>
+            Workspace
+            <input id="workspace" value="./workspace" autocomplete="off">
+          </label>
+          <label class="toggle-row">
+            <input id="debug" type="checkbox" checked>
+            <span>Show reasoning and events</span>
+          </label>
+        </section>
+
+        <section class="status-panel" aria-labelledby="status-title">
+          <h2 id="status-title">Runtime</h2>
+          <dl>
+            <div>
+              <dt>API</dt>
+              <dd id="api-status">checking</dd>
+            </div>
+            <div>
+              <dt>Model</dt>
+              <dd id="model-status">checking</dd>
+            </div>
+            <div>
+              <dt>Last task</dt>
+              <dd id="task-status">none</dd>
+            </div>
+          </dl>
+        </section>
+      </aside>
+
+      <main class="chat-shell">
+        <header class="chat-header">
+          <div>
+            <h2>Chat</h2>
+            <p>Messages are processed by the local Qwen role mapping through Duck Core.</p>
+          </div>
+          <button id="new-chat" class="secondary-button" type="button">New Chat</button>
+        </header>
+
+        <section id="messages" class="messages" aria-live="polite">
+          <article class="message assistant">
+            <div class="avatar">D</div>
+            <div class="bubble">
+              <div class="message-meta">
+                <strong>DuckLM</strong>
+                <span>ready</span>
+              </div>
+              <p>Готов. Напиши задачу, я отправлю её в локальный runtime и покажу ответ, reasoning и timeline.</p>
+            </div>
+          </article>
+        </section>
+
+        <section id="debug-panel" class="debug-panel">
+          <div class="debug-column">
+            <h3>Event Timeline</h3>
+            <ol id="events"></ol>
+          </div>
+        </section>
+
+        <form id="composer" class="composer">
+          <textarea id="message" rows="3" placeholder="Напиши сообщение DuckLM...">Скажи коротко, что ты DuckLM</textarea>
+          <div class="composer-actions">
+            <span id="composer-hint">Enter sends, Shift+Enter inserts a new line</span>
+            <button id="run" type="submit">Send</button>
+          </div>
+        </form>
+      </main>
+    </div>
+    <script src="/static/app.js"></script>
+  </body>
+</html>
diff --git a/duck_core/web/templates/memory.html b/duck_core/web/templates/memory.html
new file mode 100644
index 0000000..2053b08
--- /dev/null
+++ b/duck_core/web/templates/memory.html
@@ -0,0 +1,2 @@
+<!doctype html>
+<html lang="en"><head><meta charset="utf-8"><title>DuckLM Memory</title><link rel="stylesheet" href="/static/style.css"></head><body><main class="shell"><h1>Memory</h1><input id="memory-query" placeholder="Search memory"><button id="memory-search">Search</button><pre id="memory-results"></pre><script src="/static/app.js"></script></main></body></html>
diff --git a/duck_core/web/templates/skills.html b/duck_core/web/templates/skills.html
new file mode 100644
index 0000000..f73f5b4
--- /dev/null
+++ b/duck_core/web/templates/skills.html
@@ -0,0 +1,2 @@
+<!doctype html>
+<html lang="en"><head><meta charset="utf-8"><title>DuckLM Skills</title><link rel="stylesheet" href="/static/style.css"></head><body><main class="shell"><h1>Skills</h1><pre id="skills"></pre><script src="/static/app.js"></script></main></body></html>
diff --git a/duck_core/web/templates/task.html b/duck_core/web/templates/task.html
new file mode 100644
index 0000000..4a0b720
--- /dev/null
+++ b/duck_core/web/templates/task.html
@@ -0,0 +1,2 @@
+<!doctype html>
+<html lang="en"><head><meta charset="utf-8"><title>DuckLM Task</title><link rel="stylesheet" href="/static/style.css"></head><body><main class="shell"><h1>Task</h1><pre id="task"></pre></main></body></html>
diff --git a/main.py b/main.py
deleted file mode 100644
index f11b955..0000000
--- a/main.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from app.api.server import app
-
-
-__all__ = ["app"]
-
diff --git a/prompts/roles/action.md b/prompts/roles/action.md
new file mode 100644
index 0000000..41cfab3
--- /dev/null
+++ b/prompts/roles/action.md
@@ -0,0 +1,16 @@
+You are DuckLM action role. Return only valid JSON matching the requested schema.
+
+Your job is to decide whether the user request needs local tool execution before
+the thinker answers.
+
+Available tools:
+- file_read: read a file inside the current workspace.
+  Args: {"path": "relative/path.txt"}
+- file_write: write a file inside the current workspace.
+  Args: {"path": "relative/path.txt", "content": "text", "overwrite": false}
+- shell_exec_safe: run a safe allowlisted shell command in the current workspace.
+  Args: {"command": "pwd"}
+
+Return actions=[] when the user can be answered directly without tools.
+Use only the listed tools. Keep actions minimal and directly tied to the user's
+request. Do not invent tool names.
diff --git a/prompts/roles/coder.md b/prompts/roles/coder.md
new file mode 100644
index 0000000..3b575f2
--- /dev/null
+++ b/prompts/roles/coder.md
@@ -0,0 +1,2 @@
+You are DuckLM, a local cognitive runtime running over a local language model.
+Your current logical role is coder. Produce practical code-oriented answers grounded in context.
diff --git a/prompts/roles/critic.md b/prompts/roles/critic.md
new file mode 100644
index 0000000..82a133e
--- /dev/null
+++ b/prompts/roles/critic.md
@@ -0,0 +1,2 @@
+You are DuckLM, a local cognitive runtime running over a local language model.
+Your current logical role is critic. Reflect on results, risks, waste, and reusable lessons.
diff --git a/prompts/roles/summary.md b/prompts/roles/summary.md
new file mode 100644
index 0000000..1d017de
--- /dev/null
+++ b/prompts/roles/summary.md
@@ -0,0 +1 @@
+You are DuckLM summary role. Compress task context without losing decisions or outcomes.
diff --git a/prompts/roles/thinker.md b/prompts/roles/thinker.md
new file mode 100644
index 0000000..5fd0dbf
--- /dev/null
+++ b/prompts/roles/thinker.md
@@ -0,0 +1,7 @@
+You are DuckLM, a local cognitive runtime running over a local language model.
+Your current logical role is thinker.
+
+When asked who or what you are, answer as DuckLM. You may mention that DuckLM is
+powered by a local model, but do not answer as the base model identity.
+
+Respond clearly and briefly unless the task needs detail.
diff --git a/pyproject.toml b/pyproject.toml
index a9f2e9b..5c2c324 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,29 +1,31 @@
 [project]
 name = "ducklm"
 version = "0.1.0"
-description = "Local event-driven multi-model execution runtime"
+description = "Local agent runtime with WebChat, API, tools, memory and experience"
 requires-python = ">=3.11"
 dependencies = [
-  "fastapi>=0.115",
-  "httpx>=0.28",
-  "pydantic>=2.7",
-  "uvicorn>=0.30",
-  "websockets>=15.0",
-  "llama-cpp-python>=0.2.0",
-  "hnswlib>=0.8.0",
-  "sentence-transformers>=3.0",
-  "numpy>=1.26",
+  "fastapi",
+  "uvicorn",
+  "httpx",
+  "pydantic",
+  "pyyaml",
+  "jinja2",
+  "python-dotenv",
+  "jsonschema",
+  "aiosqlite",
+  "qdrant-client"
 ]
 
-[build-system]
-requires = ["setuptools>=68"]
-build-backend = "setuptools.build_meta"
-
-[tool.setuptools]
-packages = ["app"]
-
-[tool.setuptools.package-dir]
-"" = "."
+[project.optional-dependencies]
+dev = [
+  "pytest",
+  "pytest-asyncio",
+  "ruff"
+]
 
 [tool.pytest.ini_options]
-pythonpath = ["."]
+asyncio_mode = "auto"
+testpaths = ["tests"]
+
+[tool.setuptools.packages.find]
+include = ["duck_core*"]
diff --git a/scripts/bench/bench_runtime.py b/scripts/bench/bench_runtime.py
new file mode 100644
index 0000000..2c491b8
--- /dev/null
+++ b/scripts/bench/bench_runtime.py
@@ -0,0 +1,34 @@
+import asyncio
+import time
+
+from duck_core.model_client import ModelClient
+
+
+TASKS = [
+    "Скажи коротко, что ты DuckLM.",
+    "Создай tmp/duck_test_note.md с текстом hello duck и прочитай его обратно.",
+    "Посмотри структуру проекта и кратко опиши модули.",
+    "Найди TODO/FIXME в проекте.",
+    "Запусти тесты и кратко объясни результат.",
+]
+
+
+async def main() -> None:
+    client = ModelClient()
+    print("role -> base_url/model")
+    for role, cfg in client._roles.items():
+        print(f"{role} -> {cfg.base_url}/{cfg.model}")
+    started = time.perf_counter()
+    print(f"test_tasks={len(TASKS)}")
+    print("llm_calls=0")
+    print("tool_calls=0")
+    print("json_directive_validity=not_run")
+    print("retry_count=0")
+    print("memory_writes=0")
+    print("experience_record_created=no")
+    print("selected_skill=not_run")
+    print(f"total_runtime_seconds={time.perf_counter() - started:.3f}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/scripts/llama/build_vulkan.sh b/scripts/llama/build_vulkan.sh
new file mode 100755
index 0000000..90273b8
--- /dev/null
+++ b/scripts/llama/build_vulkan.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+LLAMA_DIR="${ROOT_DIR}/vendor/llama.cpp"
+
+if [[ ! -d "${LLAMA_DIR}/.git" ]]; then
+  git clone --depth 1 https://github.com/ggml-org/llama.cpp "${LLAMA_DIR}"
+fi
+
+cmake -S "${LLAMA_DIR}" -B "${LLAMA_DIR}/build" \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DGGML_VULKAN=ON \
+  -DGGML_NATIVE=ON \
+  -DLLAMA_BUILD_TESTS=OFF
+
+cmake --build "${LLAMA_DIR}/build" --config Release --target llama-server -j "$(nproc)"
+
+"${LLAMA_DIR}/build/bin/llama-server" --list-devices
diff --git a/scripts/llama/healthcheck.sh b/scripts/llama/healthcheck.sh
new file mode 100755
index 0000000..f63dcdb
--- /dev/null
+++ b/scripts/llama/healthcheck.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+BASE_URL="${1:-http://127.0.0.1:8081/v1}"
+
+curl --noproxy "*" -fsS "${BASE_URL}/models" >/dev/null
+
+echo "OK: ${BASE_URL}"
diff --git a/scripts/llama/start_main.sh b/scripts/llama/start_main.sh
new file mode 100755
index 0000000..09c1191
--- /dev/null
+++ b/scripts/llama/start_main.sh
@@ -0,0 +1,260 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+
+ENV_KEYS=(
+  DUCK_LLAMA_SERVER_BIN
+  DUCK_MAIN_MODEL_PATH
+  DUCK_MAIN_PORT
+  DUCK_CTX_SIZE
+  DUCK_N_GPU_LAYERS
+  DUCK_LLAMA_DEVICE
+  DUCK_PARALLEL
+  DUCK_LLAMA_PID_FILE
+  DUCK_LLAMA_LOG_FILE
+  DUCK_LLAMA_EXTRA_ARGS
+  DUCK_HOST
+)
+declare -A ENV_OVERRIDES=()
+for key in "${ENV_KEYS[@]}"; do
+  if [[ -v "${key}" ]]; then
+    ENV_OVERRIDES["${key}"]="${!key}"
+  fi
+done
+
+if [[ -f "${ROOT_DIR}/.env" ]]; then
+  set -a
+  # shellcheck disable=SC1091
+  source "${ROOT_DIR}/.env"
+  set +a
+fi
+for key in "${!ENV_OVERRIDES[@]}"; do
+  export "${key}=${ENV_OVERRIDES[${key}]}"
+done
+
+ACTION="${1:-start}"
+PID_FILE="${DUCK_LLAMA_PID_FILE:-${ROOT_DIR}/data/llama-main.pid}"
+LOG_FILE="${DUCK_LLAMA_LOG_FILE:-${ROOT_DIR}/data/llama-main.log}"
+BASE_URL="http://${DUCK_HOST:-127.0.0.1}:${DUCK_MAIN_PORT:-8081}/v1"
+
+resolve_project_path() {
+  local value="$1"
+  if [[ "${value}" == /* ]]; then
+    printf '%s\n' "${value}"
+  else
+    printf '%s\n' "${ROOT_DIR}/${value#./}"
+  fi
+}
+
+usage() {
+  cat <<'EOF'
+Usage: scripts/llama/start_main.sh <command>
+
+Commands:
+  start       Start llama-server in the background
+  stop        Stop the managed llama-server process
+  restart     Stop and start llama-server
+  status      Print process and HTTP health status
+  logs        Show logs; use --follow/-f and --lines N
+  help        Show this help
+
+Environment:
+  DUCK_LLAMA_SERVER_BIN  Path to llama-server binary
+  DUCK_MAIN_MODEL_PATH   Path to GGUF model
+  DUCK_HOST              Bind host, default 127.0.0.1
+  DUCK_MAIN_PORT         Port, default 8081
+  DUCK_CTX_SIZE          Context size, default 65536
+  DUCK_N_GPU_LAYERS      GPU layers, default auto
+  DUCK_LLAMA_DEVICE      Device name, for example Vulkan0
+  DUCK_PARALLEL          Server slots, default 1
+  DUCK_LLAMA_PID_FILE    PID file path
+  DUCK_LLAMA_LOG_FILE    Log file path
+  DUCK_LLAMA_EXTRA_ARGS  Extra llama-server args
+EOF
+}
+
+is_running() {
+  [[ -f "${PID_FILE}" ]] || return 1
+  local pid
+  pid="$(cat "${PID_FILE}")"
+  [[ "${pid}" =~ ^[0-9]+$ ]] || return 1
+  kill -0 "${pid}" 2>/dev/null
+}
+
+pid_value() {
+  if [[ -f "${PID_FILE}" ]]; then
+    cat "${PID_FILE}"
+  fi
+}
+
+status() {
+  if is_running; then
+    local pid
+    pid="$(pid_value)"
+    echo "llama-server running: pid=${pid}"
+    if command -v curl >/dev/null 2>&1 && curl --noproxy "*" -fsS "${BASE_URL}/models" >/dev/null 2>&1; then
+      echo "HTTP health: ok (${BASE_URL})"
+    else
+      echo "HTTP health: not ready (${BASE_URL})"
+    fi
+    return 0
+  fi
+
+  if [[ -f "${PID_FILE}" ]]; then
+    echo "llama-server not running; removing stale pid file ${PID_FILE}"
+    rm -f "${PID_FILE}"
+  else
+    echo "llama-server not running"
+  fi
+  return 3
+}
+
+start() {
+  if is_running; then
+    echo "llama-server already running: pid=$(pid_value)"
+    return 0
+  fi
+
+  : "${DUCK_MAIN_MODEL_PATH:?DUCK_MAIN_MODEL_PATH is required}"
+
+  mkdir -p "$(dirname "${PID_FILE}")" "$(dirname "${LOG_FILE}")"
+  rm -f "${PID_FILE}"
+
+  local llama_bin model_path
+  llama_bin="${DUCK_LLAMA_SERVER_BIN:-llama-server}"
+  if [[ "${llama_bin}" == */* ]]; then
+    llama_bin="$(resolve_project_path "${llama_bin}")"
+  fi
+  model_path="$(resolve_project_path "${DUCK_MAIN_MODEL_PATH}")"
+  local command=(
+    "${llama_bin}"
+    -m "${model_path}"
+    --alias local-main
+    --host "${DUCK_HOST:-127.0.0.1}"
+    --port "${DUCK_MAIN_PORT:-8081}"
+    -c "${DUCK_CTX_SIZE:-65536}"
+    --parallel "${DUCK_PARALLEL:-1}"
+    -ngl "${DUCK_N_GPU_LAYERS:-auto}"
+    --flash-attn on
+    --cache-prompt
+    --metrics
+  )
+  if [[ -n "${DUCK_LLAMA_DEVICE:-}" ]]; then
+    command+=(--device "${DUCK_LLAMA_DEVICE}")
+  fi
+  if [[ -n "${DUCK_LLAMA_EXTRA_ARGS:-}" ]]; then
+    # shellcheck disable=SC2206
+    local extra_args=( ${DUCK_LLAMA_EXTRA_ARGS} )
+    command+=("${extra_args[@]}")
+  fi
+
+  echo "Starting llama-server..."
+  echo "Command: ${command[*]}" >> "${LOG_FILE}"
+  if command -v setsid >/dev/null 2>&1; then
+    nohup setsid "${command[@]}" >> "${LOG_FILE}" 2>&1 &
+  else
+    nohup "${command[@]}" >> "${LOG_FILE}" 2>&1 &
+  fi
+  local pid=$!
+  echo "${pid}" > "${PID_FILE}"
+  sleep 0.2
+
+  if is_running; then
+    echo "llama-server started: pid=${pid}"
+    echo "Log: ${LOG_FILE}"
+    return 0
+  fi
+
+  echo "llama-server failed to start. See ${LOG_FILE}" >&2
+  rm -f "${PID_FILE}"
+  return 1
+}
+
+stop() {
+  if ! is_running; then
+    rm -f "${PID_FILE}"
+    echo "llama-server not running"
+    return 0
+  fi
+
+  local pid
+  pid="$(pid_value)"
+  echo "Stopping llama-server: pid=${pid}"
+  kill "${pid}" 2>/dev/null || true
+
+  for _ in {1..30}; do
+    if ! kill -0 "${pid}" 2>/dev/null; then
+      rm -f "${PID_FILE}"
+      echo "llama-server stopped"
+      return 0
+    fi
+    sleep 0.2
+  done
+
+  echo "llama-server did not stop after SIGTERM; sending SIGKILL"
+  kill -9 "${pid}" 2>/dev/null || true
+  rm -f "${PID_FILE}"
+  echo "llama-server stopped"
+}
+
+restart() {
+  stop
+  start
+}
+
+logs() {
+  local follow=0
+  local lines=100
+  shift || true
+  while [[ $# -gt 0 ]]; do
+    case "$1" in
+      -f|--follow)
+        follow=1
+        shift
+        ;;
+      --lines)
+        lines="${2:?--lines requires a value}"
+        shift 2
+        ;;
+      *)
+        echo "Unknown logs argument: $1" >&2
+        return 2
+        ;;
+    esac
+  done
+
+  mkdir -p "$(dirname "${LOG_FILE}")"
+  touch "${LOG_FILE}"
+  if [[ "${follow}" == "1" ]]; then
+    tail -n "${lines}" -f "${LOG_FILE}"
+  else
+    tail -n "${lines}" "${LOG_FILE}"
+  fi
+}
+
+case "${ACTION}" in
+  start)
+    start
+    ;;
+  stop)
+    stop
+    ;;
+  restart)
+    restart
+    ;;
+  status)
+    status
+    ;;
+  logs)
+    logs "$@"
+    ;;
+  help|-h|--help)
+    usage
+    ;;
+  *)
+    echo "Unknown command: ${ACTION}" >&2
+    usage >&2
+    exit 2
+    ;;
+esac
diff --git a/scripts/llama/start_thinker_mtp_experimental.sh b/scripts/llama/start_thinker_mtp_experimental.sh
new file mode 100755
index 0000000..e9b4dba
--- /dev/null
+++ b/scripts/llama/start_thinker_mtp_experimental.sh
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+
+ENV_KEYS=(
+  DUCK_LLAMA_SERVER_BIN
+  DUCK_MAIN_MODEL_PATH
+  DUCK_MTP_MODEL_PATH
+  DUCK_MAIN_MTP_PORT
+  DUCK_CTX_SIZE
+  DUCK_N_GPU_LAYERS
+  DUCK_LLAMA_DEVICE
+  DUCK_PARALLEL
+  DUCK_MTP_FLAGS
+  DUCK_HOST
+)
+declare -A ENV_OVERRIDES=()
+for key in "${ENV_KEYS[@]}"; do
+  if [[ -v "${key}" ]]; then
+    ENV_OVERRIDES["${key}"]="${!key}"
+  fi
+done
+
+if [[ -f "${ROOT_DIR}/.env" ]]; then
+  set -a
+  # shellcheck disable=SC1091
+  source "${ROOT_DIR}/.env"
+  set +a
+fi
+for key in "${!ENV_OVERRIDES[@]}"; do
+  export "${key}=${ENV_OVERRIDES[${key}]}"
+done
+
+: "${DUCK_MAIN_MODEL_PATH:?DUCK_MAIN_MODEL_PATH is required}"
+ACTION="${1:-start}"
+
+usage() {
+  cat <<'EOF'
+Usage: scripts/llama/start_thinker_mtp_experimental.sh <command>
+
+Commands:
+  start   Start experimental MTP/speculative llama-server in foreground
+  check   Check whether the current llama-server binary exposes draft-mtp flags
+  help    Show this help
+EOF
+}
+
+resolve_project_path() {
+  local value="$1"
+  if [[ "${value}" == /* ]]; then
+    printf '%s\n' "${value}"
+  else
+    printf '%s\n' "${ROOT_DIR}/${value#./}"
+  fi
+}
+
+LLAMA_BIN="${DUCK_LLAMA_SERVER_BIN:-llama-server}"
+if [[ "${LLAMA_BIN}" == */* ]]; then
+  LLAMA_BIN="$(resolve_project_path "${LLAMA_BIN}")"
+fi
+MAIN_MODEL_PATH="$(resolve_project_path "${DUCK_MAIN_MODEL_PATH}")"
+
+HELP_TEXT="$("${LLAMA_BIN}" --help 2>&1 || true)"
+if ! grep -qi "draft-mtp" <<< "${HELP_TEXT}"; then
+  echo "This llama-server build does not expose draft-mtp speculative decoding."
+  exit 1
+fi
+
+case "${ACTION}" in
+  check)
+    echo "OK: draft-mtp speculative decoding is exposed by ${LLAMA_BIN}"
+    exit 0
+    ;;
+  help|-h|--help)
+    usage
+    exit 0
+    ;;
+  start)
+    ;;
+  *)
+    echo "Unknown command: ${ACTION}" >&2
+    usage >&2
+    exit 2
+    ;;
+esac
+
+command=(
+  "${LLAMA_BIN}"
+  -m "${MAIN_MODEL_PATH}"
+  --alias local-main-mtp
+  --host "${DUCK_HOST:-127.0.0.1}"
+  --port "${DUCK_MAIN_MTP_PORT:-8085}"
+  -c "${DUCK_CTX_SIZE:-65536}"
+  --parallel "${DUCK_PARALLEL:-1}"
+  -ngl "${DUCK_N_GPU_LAYERS:-auto}"
+  --flash-attn on
+  --cache-prompt
+  --metrics
+  --spec-type draft-mtp
+)
+
+if [[ -n "${DUCK_LLAMA_DEVICE:-}" ]]; then
+  command+=(--device "${DUCK_LLAMA_DEVICE}")
+fi
+
+if [[ -n "${DUCK_MTP_MODEL_PATH:-}" ]]; then
+  command+=(--model-draft "$(resolve_project_path "${DUCK_MTP_MODEL_PATH}")")
+fi
+
+if [[ -n "${DUCK_MTP_FLAGS:-}" ]]; then
+  # shellcheck disable=SC2206
+  extra_args=( ${DUCK_MTP_FLAGS} )
+  command+=("${extra_args[@]}")
+fi
+
+exec "${command[@]}"
diff --git a/scripts/server.sh b/scripts/server.sh
deleted file mode 100755
index 21a03bb..0000000
--- a/scripts/server.sh
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
-VENV_PYTHON="$ROOT_DIR/.venv/bin/python"
-PID_FILE="$ROOT_DIR/data/runtime/server.pid"
-LOG_FILE="$ROOT_DIR/data/runtime/server.log"
-HOST="${HOST:-127.0.0.1}"
-PORT="${PORT:-8000}"
-
-mkdir -p "$ROOT_DIR/data/runtime"
-
-export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}$ROOT_DIR/.venv/lib/python3.13/site-packages/llama_cpp/lib"
-export GGML_VULKAN=1
-
-is_running() {
-  if [[ -f "$PID_FILE" ]]; then
-    local pid
-    pid="$(cat "$PID_FILE")"
-    if kill -0 "$pid" >/dev/null 2>&1; then
-      return 0
-    fi
-  fi
-  return 1
-}
-
-start_server() {
-  if is_running; then
-    echo "Server already running with PID $(cat "$PID_FILE")"
-    exit 0
-  fi
-  nohup "$VENV_PYTHON" -m uvicorn main:app --host "$HOST" --port "$PORT" >"$LOG_FILE" 2>&1 &
-  echo $! >"$PID_FILE"
-  echo "Started server on http://$HOST:$PORT with PID $(cat "$PID_FILE")"
-  echo "Log: $LOG_FILE"
-}
-
-stop_server() {
-  if ! is_running; then
-    echo "Server is not running"
-    rm -f "$PID_FILE"
-    exit 0
-  fi
-  local pid
-  pid="$(cat "$PID_FILE")"
-  kill "$pid"
-  rm -f "$PID_FILE"
-  echo "Stopped server PID $pid"
-}
-
-status_server() {
-  if is_running; then
-    echo "Server is running with PID $(cat "$PID_FILE") on http://$HOST:$PORT"
-  else
-    echo "Server is not running"
-  fi
-}
-
-case "${1:-}" in
-  start)
-    start_server
-    ;;
-  stop)
-    stop_server
-    ;;
-  restart)
-    stop_server || true
-    start_server
-    ;;
-  status)
-    status_server
-    ;;
-  logs)
-    touch "$LOG_FILE"
-    tail -n 50 -f "$LOG_FILE"
-    ;;
-  *)
-    echo "Usage: $0 {start|stop|restart|status|logs}"
-    exit 1
-    ;;
-esac
diff --git a/scripts/verify/verify_basic_chat.sh b/scripts/verify/verify_basic_chat.sh
new file mode 100755
index 0000000..cfc8651
--- /dev/null
+++ b/scripts/verify/verify_basic_chat.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}"
+
+curl -fsS "${BASE_URL}/health"
+
+curl -fsS -X POST "${BASE_URL}/v1/chat" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "message": "Скажи коротко, что ты DuckLM",
+    "debug": true
+  }'
diff --git a/scripts/verify/verify_experience.sh b/scripts/verify/verify_experience.sh
new file mode 100755
index 0000000..9529162
--- /dev/null
+++ b/scripts/verify/verify_experience.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}"
+curl -fsS "${BASE_URL}/v1/experience"
diff --git a/scripts/verify/verify_file_write_read.sh b/scripts/verify/verify_file_write_read.sh
new file mode 100755
index 0000000..8dfc4a9
--- /dev/null
+++ b/scripts/verify/verify_file_write_read.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}"
+
+RESPONSE="$(curl -fsS -X POST "${BASE_URL}/v1/chat" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "message": "Создай tmp/duck_test_note.md с текстом hello duck и прочитай его обратно",
+    "workspace": "./workspace",
+    "debug": true
+  }')"
+
+echo "${RESPONSE}"
diff --git a/scripts/verify/verify_memory.sh b/scripts/verify/verify_memory.sh
new file mode 100755
index 0000000..1f0c637
--- /dev/null
+++ b/scripts/verify/verify_memory.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}"
+curl -fsS "${BASE_URL}/v1/memory/search?q=duck"
diff --git a/scripts/verify/verify_models_roles.sh b/scripts/verify/verify_models_roles.sh
new file mode 100755
index 0000000..0f0f654
--- /dev/null
+++ b/scripts/verify/verify_models_roles.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}"
+curl -fsS "${BASE_URL}/v1/models/roles"
diff --git a/scripts/verify/verify_skills.sh b/scripts/verify/verify_skills.sh
new file mode 100755
index 0000000..f849549
--- /dev/null
+++ b/scripts/verify/verify_skills.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}"
+curl -fsS "${BASE_URL}/v1/skills"
diff --git a/scripts/verify/verify_tool_blocking.sh b/scripts/verify/verify_tool_blocking.sh
new file mode 100755
index 0000000..bceaaa2
--- /dev/null
+++ b/scripts/verify/verify_tool_blocking.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+python3 - <<'PY'
+import asyncio
+from duck_core.tools.shell_exec_safe import ShellExecSafeTool
+
+async def main():
+    result = await ShellExecSafeTool(".").run({"command": "rm -rf ."})
+    assert not result.ok
+    print("OK: dangerous command blocked")
+
+asyncio.run(main())
+PY
diff --git a/server.err b/server.err
deleted file mode 100644
index 58def86..0000000
--- a/server.err
+++ /dev/null
@@ -1,274 +0,0 @@
-Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]Loading weights: 100%|██████████| 103/103 [00:00<00:00, 5627.96it/s]
-INFO:     Started server process [221205]
-INFO:     Waiting for application startup.
-llama_context: n_ctx_seq (4096) < n_ctx_train (262144) -- the full capacity of the model will not be utilized
-llama_context: n_ctx_seq (4096) < n_ctx_train (131072) -- the full capacity of the model will not be utilized
-llama_kv_cache_iswa: using full-size SWA cache (ref: https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
-llama_kv_cache: the V embeddings have different sizes across layers and FA is not enabled - padding V cache to 1024
-llama_kv_cache: the V embeddings have different sizes across layers and FA is not enabled - padding V cache to 1024
-llama_context: n_ctx_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
-llama_context: n_ctx_seq (4096) < n_ctx_train (40960) -- the full capacity of the model will not be utilized
-INFO:     Application startup complete.
-INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
-ERROR:    Exception in ASGI application
-Traceback (most recent call last):
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/uvicorn/protocols/http/h11_impl.py", line 415, in run_asgi
-    result = await app(  # type: ignore[func-returns-value]
-             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        self.scope, self.receive, self.send
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    )
-    ^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
-    return await self.app(scope, receive, send)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/applications.py", line 1159, in __call__
-    await super().__call__(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/applications.py", line 90, in __call__
-    await self.middleware_stack(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/errors.py", line 186, in __call__
-    raise exc
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/errors.py", line 164, in __call__
-    await self.app(scope, receive, _send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/exceptions.py", line 63, in __call__
-    await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
-    raise exc
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
-    await app(scope, receive, sender)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/middleware/asyncexitstack.py", line 18, in __call__
-    await self.app(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 660, in __call__
-    await self.middleware_stack(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 680, in app
-    await route.handle(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 276, in handle
-    await self.app(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 134, in app
-    await wrap_app_handling_exceptions(app, request)(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
-    raise exc
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
-    await app(scope, receive, sender)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 120, in app
-    response = await f(request)
-               ^^^^^^^^^^^^^^^^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 674, in app
-    raw_response = await run_endpoint_function(
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    ...<3 lines>...
-    )
-    ^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 330, in run_endpoint_function
-    return await run_in_threadpool(dependant.call, **values)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/concurrency.py", line 32, in run_in_threadpool
-    return await anyio.to_thread.run_sync(func)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/to_thread.py", line 63, in run_sync
-    return await get_async_backend().run_sync_in_worker_thread(
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        func, args, abandon_on_cancel=abandon_on_cancel, limiter=limiter
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    )
-    ^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 2518, in run_sync_in_worker_thread
-    return await future
-           ^^^^^^^^^^^^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 1002, in run
-    result = context.run(func, *args)
-  File "/home/mirivlad/git/ducklm/app/api/server.py", line 103, in resolve_secret
-    return runtime.resolve_secret(task_id=request.task_id, secret=request.secret)
-           ~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/mirivlad/git/ducklm/app/runtime/runtime_controller.py", line 408, in resolve_secret
-    return self.runtime_loop.resolve_secret(
-           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
-        task_id=task_id, secret=secret
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    )
-    ^
-  File "/home/mirivlad/git/ducklm/app/runtime/runtime_loop.py", line 378, in resolve_secret
-    execution_result = self._execution_engine.execute(
-        task=task,
-    ...<2 lines>...
-        secret_override=secret,
-    )
-  File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 87, in execute
-    return self._execute_plan(
-           ~~~~~~~~~~~~~~~~~~^
-        task=task,
-        ^^^^^^^^^^
-    ...<3 lines>...
-        password_override=password_override,
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    )
-    ^
-  File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 211, in _execute_plan
-    result = self._execute_tool(
-        task=task,
-    ...<3 lines>...
-        password_override=password_override,
-    )
-  File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 824, in _execute_tool
-    tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args)
-  File "/home/mirivlad/git/ducklm/app/tools/plugins/shell_exec/__init__.py", line 21, in execute
-    completed = self._sandbox.run_shell(
-        command=command,
-        cwd=str(cwd) if cwd else None,
-        stdin_data=str(stdin_secret) if stdin_secret is not None else None,
-    )
-  File "/home/mirivlad/git/ducklm/app/tools/sandbox.py", line 29, in run_shell
-    return subprocess.run(
-           ~~~~~~~~~~~~~~^
-        command,
-        ^^^^^^^^
-    ...<7 lines>...
-        check=False,
-        ^^^^^^^^^^^^
-    )
-    ^
-  File "/usr/lib/python3.13/subprocess.py", line 556, in run
-    stdout, stderr = process.communicate(input, timeout=timeout)
-                     ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/usr/lib/python3.13/subprocess.py", line 1222, in communicate
-    stdout, stderr = self._communicate(input, endtime, timeout)
-                     ~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/usr/lib/python3.13/subprocess.py", line 2129, in _communicate
-    self._check_timeout(endtime, orig_timeout, stdout, stderr)
-    ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/usr/lib/python3.13/subprocess.py", line 1269, in _check_timeout
-    raise TimeoutExpired(
-    ...<2 lines>...
-            stderr=b''.join(stderr_seq) if stderr_seq else None)
-subprocess.TimeoutExpired: Command 'sudo -S -p '' apt update && apt upgrade -y' timed out after 30.0 seconds
-ERROR:    Exception in ASGI application
-Traceback (most recent call last):
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/uvicorn/protocols/http/h11_impl.py", line 415, in run_asgi
-    result = await app(  # type: ignore[func-returns-value]
-             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        self.scope, self.receive, self.send
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    )
-    ^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
-    return await self.app(scope, receive, send)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/applications.py", line 1159, in __call__
-    await super().__call__(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/applications.py", line 90, in __call__
-    await self.middleware_stack(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/errors.py", line 186, in __call__
-    raise exc
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/errors.py", line 164, in __call__
-    await self.app(scope, receive, _send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/exceptions.py", line 63, in __call__
-    await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
-    raise exc
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
-    await app(scope, receive, sender)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/middleware/asyncexitstack.py", line 18, in __call__
-    await self.app(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 660, in __call__
-    await self.middleware_stack(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 680, in app
-    await route.handle(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 276, in handle
-    await self.app(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 134, in app
-    await wrap_app_handling_exceptions(app, request)(scope, receive, send)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
-    raise exc
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
-    await app(scope, receive, sender)
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 120, in app
-    response = await f(request)
-               ^^^^^^^^^^^^^^^^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 674, in app
-    raw_response = await run_endpoint_function(
-                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    ...<3 lines>...
-    )
-    ^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 330, in run_endpoint_function
-    return await run_in_threadpool(dependant.call, **values)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/concurrency.py", line 32, in run_in_threadpool
-    return await anyio.to_thread.run_sync(func)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/to_thread.py", line 63, in run_sync
-    return await get_async_backend().run_sync_in_worker_thread(
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        func, args, abandon_on_cancel=abandon_on_cancel, limiter=limiter
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    )
-    ^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 2518, in run_sync_in_worker_thread
-    return await future
-           ^^^^^^^^^^^^
-  File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 1002, in run
-    result = context.run(func, *args)
-  File "/home/mirivlad/git/ducklm/app/api/server.py", line 103, in resolve_secret
-    return runtime.resolve_secret(task_id=request.task_id, secret=request.secret)
-           ~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/mirivlad/git/ducklm/app/runtime/runtime_controller.py", line 408, in resolve_secret
-    return self.runtime_loop.resolve_secret(
-           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
-        task_id=task_id, secret=secret
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    )
-    ^
-  File "/home/mirivlad/git/ducklm/app/runtime/runtime_loop.py", line 378, in resolve_secret
-    execution_result = self._execution_engine.execute(
-        task=task,
-    ...<2 lines>...
-        secret_override=secret,
-    )
-  File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 87, in execute
-    return self._execute_plan(
-           ~~~~~~~~~~~~~~~~~~^
-        task=task,
-        ^^^^^^^^^^
-    ...<3 lines>...
-        password_override=password_override,
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    )
-    ^
-  File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 211, in _execute_plan
-    result = self._execute_tool(
-        task=task,
-    ...<3 lines>...
-        password_override=password_override,
-    )
-  File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 824, in _execute_tool
-    tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args)
-  File "/home/mirivlad/git/ducklm/app/tools/plugins/shell_exec/__init__.py", line 21, in execute
-    completed = self._sandbox.run_shell(
-        command=command,
-        cwd=str(cwd) if cwd else None,
-        stdin_data=str(stdin_secret) if stdin_secret is not None else None,
-    )
-  File "/home/mirivlad/git/ducklm/app/tools/sandbox.py", line 29, in run_shell
-    return subprocess.run(
-           ~~~~~~~~~~~~~~^
-        command,
-        ^^^^^^^^
-    ...<7 lines>...
-        check=False,
-        ^^^^^^^^^^^^
-    )
-    ^
-  File "/usr/lib/python3.13/subprocess.py", line 556, in run
-    stdout, stderr = process.communicate(input, timeout=timeout)
-                     ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/usr/lib/python3.13/subprocess.py", line 1222, in communicate
-    stdout, stderr = self._communicate(input, endtime, timeout)
-                     ~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/usr/lib/python3.13/subprocess.py", line 2129, in _communicate
-    self._check_timeout(endtime, orig_timeout, stdout, stderr)
-    ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/usr/lib/python3.13/subprocess.py", line 1269, in _check_timeout
-    raise TimeoutExpired(
-    ...<2 lines>...
-            stderr=b''.join(stderr_seq) if stderr_seq else None)
-subprocess.TimeoutExpired: Command 'sudo -S -p '' apt update && apt upgrade -y' timed out after 30.0 seconds
diff --git a/server.out b/server.out
deleted file mode 100644
index 10943e5..0000000
--- a/server.out
+++ /dev/null
@@ -1,254 +0,0 @@
-Models policy ready
-Registered tool: file_write
-Registered tool: shell_exec
-Registered tool: memory
-Registered tool: file_read
-Lifespan: Starting model loading...
-Lifespan: Loading models...
-Loading thinker model...
-Thinker loaded: <app.models.orchestrator.OrchestratorAdapter object at 0x7f1db5b6cc20> (model: Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf)
-Loading json_compiler model...
-JSON Compiler loaded: <app.models.orchestrator.OrchestratorAdapter object at 0x7f1db5b7bb10> (model: gemma-4-E4B-it-Q4_K_M.gguf)
-Loading coder model...
-Coder loaded: <app.models.coder.CoderAdapter object at 0x7f1db5b6d2b0> (model: X-Coder-SFT-Qwen3-8B.Q6_K.gguf)
-Loading critic model...
-Reusing model instance: gemma-4-E4B-it-Q4_K_M.gguf for critic
-Critic loaded: <app.models.critic.CriticAdapter object at 0x7f1db5b6d160> (model: gemma-4-E4B-it-Q4_K_M.gguf)
-Loading sys_util model...
-Sys_util loaded: <app.models.orchestrator.OrchestratorAdapter object at 0x7f1db30ec2d0> (model: Menlo_Lucy-Q4_K_M.gguf)
-All models loaded successfully
-MemoryRecallService initialized with model: json_compiler
-MemoryWritePolicy set: True
-Lifespan: Models loaded
-Lifespan: Rebuilding vector index (289 entries)...
-Lifespan: Vector index rebuilt
-INFO:     127.0.0.1:47236 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:47238 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:47240 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:45740 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:45754 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:41296 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:41304 - "GET / HTTP/1.1" 200 OK
-INFO:     127.0.0.1:41304 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:41304 - "GET /favicon.ico HTTP/1.1" 404 Not Found
-INFO:     127.0.0.1:41318 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:41310 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:40504 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:45288 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:45302 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:47488 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:47498 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:48888 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:48898 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44008 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44024 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44008 - "POST /chat HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50236 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50246 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:57020 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:57032 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:36982 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:36996 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:35350 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:35358 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:38442 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:38456 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:38442 - "POST /permissions/resolve HTTP/1.1" 200 OK
-INFO:     127.0.0.1:35664 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:35666 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:41680 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:41682 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:55484 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:55486 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:53136 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:53142 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50412 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50412 - "POST /secrets/resolve HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50416 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50384 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50396 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:35882 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:35890 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:34008 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:34012 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:38358 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:38366 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:39500 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:39516 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:52800 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:52812 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:60246 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:60256 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:55192 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:55208 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:55192 - "POST /secrets/resolve HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50170 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50184 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:60392 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:60404 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:42626 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:42630 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:37478 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:37480 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:59892 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:59902 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50284 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50290 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:59488 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:59492 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:53584 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:53590 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50978 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50990 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:43110 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:43118 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:39906 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:39908 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:39100 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:39110 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:43436 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:43448 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:60214 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:60228 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:56192 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:45580 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:59680 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:52038 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:34120 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:54374 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:41916 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:48474 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:58570 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:58284 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:47014 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:37884 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:56196 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:60026 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:48534 - "POST /secrets/resolve HTTP/1.1" 200 OK
-INFO:     127.0.0.1:48536 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:46114 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:49446 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:33518 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:40316 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:47326 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:36022 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:36806 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:54232 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:54248 - "GET / HTTP/1.1" 200 OK
-INFO:     127.0.0.1:54248 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:54248 - "GET /favicon.ico HTTP/1.1" 404 Not Found
-INFO:     127.0.0.1:38470 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:54264 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50474 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50490 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44644 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44652 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:41856 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:57392 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:45778 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:59094 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:39508 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:51214 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:54724 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:41204 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:33686 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:38154 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44658 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:56664 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:33906 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:36934 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:48746 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50876 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:38912 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:40786 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:51882 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:40002 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:43176 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:49824 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44316 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:58994 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:47794 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:37642 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:32882 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:53578 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:35804 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:47732 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:34050 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:55386 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:43992 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:43998 - "GET / HTTP/1.1" 200 OK
-INFO:     127.0.0.1:43998 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:43998 - "GET /favicon.ico HTTP/1.1" 404 Not Found
-INFO:     127.0.0.1:39194 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:33540 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:53022 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:41056 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44000 - "POST /chat HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44000 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44000 - "POST /permissions/resolve HTTP/1.1" 200 OK
-INFO:     127.0.0.1:57534 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:60834 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:59886 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:42774 - "POST /secrets/resolve HTTP/1.1" 500 Internal Server Error
-INFO:     127.0.0.1:50140 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:52360 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:57882 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44816 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:37956 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:37956 - "GET / HTTP/1.1" 200 OK
-INFO:     127.0.0.1:37956 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:37956 - "GET /favicon.ico HTTP/1.1" 404 Not Found
-INFO:     127.0.0.1:50254 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:46082 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:56836 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:35716 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:37656 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:45248 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:50242 - "POST /chat HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44868 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44882 - "POST /permissions/resolve HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44882 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:48796 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:60814 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:53286 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44882 - "POST /secrets/resolve HTTP/1.1" 500 Internal Server Error
-INFO:     127.0.0.1:53816 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:39450 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:53198 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:58340 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:58686 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:47278 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:46400 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:58580 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:35014 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:43342 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:34798 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:41652 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:36938 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:58066 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:45948 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:45656 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:33986 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:52016 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:55700 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:48468 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:33002 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:43004 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:43014 - "POST /secrets/resolve HTTP/1.1" 200 OK
-INFO:     127.0.0.1:43014 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:36870 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:45970 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:60292 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:53738 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:49414 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:56572 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:51224 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:53742 - "POST /secrets/resolve HTTP/1.1" 200 OK
-INFO:     127.0.0.1:42496 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:54868 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:57530 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:60898 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:54112 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:44548 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:37414 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:45064 - "GET /health HTTP/1.1" 200 OK
diff --git a/server.pid b/server.pid
deleted file mode 100644
index 15c7f9e..0000000
--- a/server.pid
+++ /dev/null
@@ -1 +0,0 @@
-844579
diff --git a/skills/analyze_project/examples.md b/skills/analyze_project/examples.md
new file mode 100644
index 0000000..f422d19
--- /dev/null
+++ b/skills/analyze_project/examples.md
@@ -0,0 +1,5 @@
+# Examples
+
+User: "Посмотри структуру проекта."
+
+Output: concise architecture summary, risks, and next actions.
diff --git a/skills/analyze_project/notes.md b/skills/analyze_project/notes.md
new file mode 100644
index 0000000..c04be65
--- /dev/null
+++ b/skills/analyze_project/notes.md
@@ -0,0 +1,3 @@
+# Notes
+
+Use read-only commands unless the user explicitly asks for changes.
diff --git a/skills/analyze_project/procedure.md b/skills/analyze_project/procedure.md
new file mode 100644
index 0000000..ee24768
--- /dev/null
+++ b/skills/analyze_project/procedure.md
@@ -0,0 +1,6 @@
+# Procedure
+
+1. List top-level files and directories.
+2. Read manifest and documentation files.
+3. Identify major modules and runtime entry points.
+4. Summarize risks and next steps using only observed files.
diff --git a/skills/analyze_project/skill.yaml b/skills/analyze_project/skill.yaml
new file mode 100644
index 0000000..9d7bc67
--- /dev/null
+++ b/skills/analyze_project/skill.yaml
@@ -0,0 +1,23 @@
+id: analyze_project
+title: Analyze project structure
+description: Inspect repository structure and summarize architecture.
+version: 1
+tags:
+  - code
+  - repository
+  - analysis
+required_tools:
+  - file_read
+  - shell_exec_safe
+risk_level: low
+inputs:
+  - workspace_path
+outputs:
+  - architecture_summary
+  - risks
+  - suggested_next_steps
+success_criteria:
+  - repository structure inspected
+  - major modules identified
+  - no destructive commands executed
+  - summary is grounded in actual files
diff --git a/test_ducklm.py b/test_ducklm.py
deleted file mode 100755
index a4b4e91..0000000
--- a/test_ducklm.py
+++ /dev/null
@@ -1,314 +0,0 @@
-#!/usr/bin/env python3
-"""
-Тестовый скрипт для проверки работы ducklm.
-Позволяет ИИ-кодеру тестировать систему через отправку запросов и проверку выполнения.
-"""
-
-import json
-import time
-import requests
-import sys
-from typing import Dict, Any, Optional
-
-
-class DuckLMTester:
-    def __init__(self, base_url: str = "http://127.0.0.1:8000"):
-        self.base_url = base_url
-        self.session = requests.Session()
-        self.test_results = []
-    
-    def log_test(self, test_name: str, passed: bool, details: str = ""):
-        """Записать результат теста"""
-        result = {
-            "test": test_name,
-            "passed": passed,
-            "details": details,
-            "timestamp": time.time()
-        }
-        self.test_results.append(result)
-        status = "✓ PASS" if passed else "✗ FAIL"
-        print(f"{status}: {test_name}")
-        if details:
-            print(f"  Details: {details}")
-    
-    def test_health(self) -> bool:
-        """Проверить эндпоинт здоровья"""
-        try:
-            response = self.session.get(f"{self.base_url}/health", timeout=5)
-            if response.status_code == 200:
-                data = response.json()
-                if data.get("status") == "ok":
-                    self.log_test("Health Check", True, "Server is healthy")
-                    return True
-                else:
-                    self.log_test("Health Check", False, f"Unexpected response: {data}")
-                    return False
-            else:
-                self.log_test("Health Check", False, f"HTTP {response.status_code}")
-                return False
-        except Exception as e:
-            self.log_test("Health Check", False, f"Connection error: {str(e)}")
-            return False
-    
-    def test_simple_chat(self) -> bool:
-        """Простой тест чата"""
-        try:
-            payload = {"input": "Привет, как дела?"}
-            response = self.session.post(
-                f"{self.base_url}/chat", 
-                json=payload, 
-                timeout=30
-            )
-            
-            if response.status_code == 200:
-                data = response.json()
-                status = data.get("status")
-                if status in ["completed", "awaiting_permission", "awaiting_input"]:
-                    self.log_test(
-                        "Simple Chat", 
-                        True, 
-                        f"Status: {status}, Response received"
-                    )
-                    return True
-                else:
-                    self.log_test(
-                        "Simple Chat", 
-                        False, 
-                        f"Unexpected status: {status}"
-                    )
-                    return False
-            else:
-                self.log_test(
-                    "Simple Chat", 
-                    False, 
-                    f"HTTP {response.status_code}: {response.text}"
-                )
-                return False
-        except Exception as e:
-            self.log_test("Simple Chat", False, f"Request error: {str(e)}")
-            return False
-    
-    def test_tool_execution(self) -> bool:
-        """Тест выполнения инструмента"""
-        try:
-            # Тест простой команды shell
-            payload = {
-                "input": "Выполни простую команду",
-                "context": {
-                    "requested_tool": "shell_exec",
-                    "tool_args": {"command": "echo 'test'"}
-                }
-            }
-            response = self.session.post(
-                f"{self.base_url}/chat", 
-                json=payload, 
-                timeout=30
-            )
-            
-            if response.status_code == 200:
-                data = response.json()
-                status = data.get("status")
-                if status == "completed":
-                    output = data.get("result", {}).get("output", "")
-                    if "test" in output:
-                        self.log_test(
-                            "Tool Execution", 
-                            True, 
-                            f"Command executed successfully: {output.strip()}"
-                        )
-                        return True
-                    else:
-                        self.log_test(
-                            "Tool Execution", 
-                            False, 
-                            f"Unexpected output: {output}"
-                        )
-                        return False
-                elif status == "awaiting_permission":
-                    self.log_test(
-                        "Tool Execution", 
-                        True, 
-                        "Permission required (expected for some commands)"
-                    )
-                    return True
-                else:
-                    self.log_test(
-                        "Tool Execution", 
-                        False, 
-                        f"Unexpected status: {status}"
-                    )
-                    return False
-            else:
-                self.log_test(
-                    "Tool Execution", 
-                    False, 
-                    f"HTTP {response.status_code}: {response.text}"
-                )
-                return False
-        except Exception as e:
-            self.log_test("Tool Execution", False, f"Request error: {str(e)}")
-            return False
-    
-    def test_permission_flow(self) -> bool:
-        """Тест потока разрешений"""
-        try:
-            # Сначала отправляем задачу, требующую разрешения
-            payload = {
-                "input": "Запусти команду, требующую разрешения",
-                "context": {
-                    "requested_tool": "shell_exec", 
-                    "tool_args": {"command": "whoami"}
-                }
-            }
-            response = self.session.post(
-                f"{self.base_url}/chat", 
-                json=payload, 
-                timeout=30
-            )
-            
-            if response.status_code != 200:
-                self.log_test(
-                    "Permission Flow", 
-                    False, 
-                    f"Initial request failed: HTTP {response.status_code}"
-                )
-                return False
-            
-            data = response.json()
-            if data.get("status") == "awaiting_permission":
-                task_id = data.get("task_id")
-                if not task_id:
-                    self.log_test(
-                        "Permission Flow", 
-                        False, 
-                        "No task_id in response"
-                    )
-                    return False
-                
-                # Теперь разрешаем разрешение
-                resolve_payload = {
-                    "task_id": task_id,
-                    "decision": "allow_once"
-                }
-                resolve_response = self.session.post(
-                    f"{self.base_url}/permissions/resolve",
-                    json=resolve_payload,
-                    timeout=10
-                )
-                
-                if resolve_response.status_code == 200:
-                    resolve_data = resolve_response.json()
-                    final_status = resolve_data.get("status")
-                    if final_status in ["completed", "failed"]:
-                        self.log_test(
-                            "Permission Flow", 
-                            True, 
-                            f"Permission resolved, final status: {final_status}"
-                        )
-                        return True
-                    else:
-                        self.log_test(
-                            "Permission Flow", 
-                            False, 
-                            f"Unexpected final status: {final_status}"
-                        )
-                        return False
-                else:
-                    self.log_test(
-                        "Permission Flow", 
-                        False, 
-                        f"Permission resolution failed: HTTP {resolve_response.status_code}"
-                    )
-                    return False
-            else:
-                # Если разрешение не потребовалось, это тоже нормально для некоторых систем
-                self.log_test(
-                    "Permission Flow", 
-                    True, 
-                    f"No permission required, status: {data.get('status')}"
-                )
-                return True
-                
-        except Exception as e:
-            self.log_test("Permission Flow", False, f"Request error: {str(e)}")
-            return False
-    
-    def run_all_tests(self) -> Dict[str, Any]:
-        """Запустить все тесты"""
-        print("Starting ducklm tests...")
-        print("=" * 50)
-        
-        # Ждем немного, чтобы сервер успел запуститься
-        time.sleep(2)
-        
-        tests = [
-            self.test_health,
-            self.test_simple_chat,
-            self.test_tool_execution,
-            self.test_permission_flow,
-        ]
-        
-        passed = 0
-        total = len(tests)
-        
-        for test in tests:
-            if test():
-                passed += 1
-            time.sleep(1)  # Небольшая пауза между тестами для слабого железа
-        
-        print("=" * 50)
-        print(f"Tests completed: {passed}/{total} passed")
-        
-        # Сводка результатов
-        summary = {
-            "total_tests": total,
-            "passed_tests": passed,
-            "failed_tests": total - passed,
-            "success_rate": passed / total if total > 0 else 0,
-            "test_results": self.test_results
-        }
-        
-        return summary
-
-
-def main():
-    """Основная функция"""
-    import argparse
-    
-    parser = argparse.ArgumentParser(description="Тест ducklm системы")
-    parser.add_argument("--url", default="http://127.0.0.1:8000", help="Base URL for ducklm server")
-    parser.add_argument("--test", choices=["health", "chat", "tool", "permission", "all"], 
-                       default="all", help="Specific test to run")
-    
-    args = parser.parse_args()
-    
-    tester = DuckLMTester(args.url)
-    
-    if args.test == "all":
-        results = tester.run_all_tests()
-        print("\nFINAL RESULTS:")
-        print(f"Passed: {results['passed_tests']}/{results['total_tests']}")
-        print(f"Success Rate: {results['success_rate']*100:.1f}%")
-        
-        # Возвращаем код выхода basado на результатах
-        sys.exit(0 if results['failed_tests'] == 0 else 1)
-    else:
-        # Запуск конкретного теста
-        test_map = {
-            "health": tester.test_health,
-            "chat": tester.test_simple_chat,
-            "tool": tester.test_tool_execution,
-            "permission": tester.test_permission_flow,
-        }
-        
-        test_func = test_map[args.test]
-        if test_func():
-            print(f"Test {args.test}: PASSED")
-            sys.exit(0)
-        else:
-            print(f"Test {args.test}: FAILED")
-            sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/test_ducklm_direct.py b/test_ducklm_direct.py
deleted file mode 100644
index e633bac..0000000
--- a/test_ducklm_direct.py
+++ /dev/null
@@ -1,409 +0,0 @@
-#!/usr/bin/env python3
-"""
-Прямой тест ducklm через RuntimeController (без HTTP сервера).
-Позволяет ИИ-кодеру тестировать систему через отправку запросов и проверку выполнения.
-"""
-
-import json
-import time
-import sys
-from pathlib import Path
-from typing import Dict, Any
-
-# Добавляем текущую директорию в путь для импорта app
-sys.path.insert(0, '.')
-
-from app.runtime.runtime_controller import RuntimeController
-from app.core.contracts import UserTask
-
-
-class DuckLMDirectTester:
-    def __init__(self, base_dir: str = "."):
-        self.base_dir = Path(base_dir)
-        self.test_results = []
-        self.controller = None
-        
-    def setup(self):
-        """Инициализировать контроллер"""
-        try:
-            print("Инициализация RuntimeController...")
-            self.controller = RuntimeController(base_dir=self.base_dir)
-            print("RuntimeController инициализирован успешно")
-            return True
-        except Exception as e:
-            print(f"Ошибка инициализации RuntimeController: {e}")
-            return False
-    
-    def log_test(self, test_name: str, passed: bool, details: str = ""):
-        """Записать результат теста"""
-        result = {
-            "test": test_name,
-            "passed": passed,
-            "details": details,
-            "timestamp": time.time()
-        }
-        self.test_results.append(result)
-        status = "✓ PASS" if passed else "✗ FAIL"
-        print(f"{status}: {test_name}")
-        if details:
-            print(f"  Details: {details}")
-    
-    def test_health(self) -> bool:
-        """Проверить что контроллер работает"""
-        try:
-            if self.controller is None:
-                self.log_test("Health Check", False, "Controller not initialized")
-                return False
-            
-            # Проверяем что основные компоненты присутствуют
-            components = [
-                ("event_bus", self.controller.event_bus),
-                ("permission_service", self.controller.permission_service),
-                ("task_state_store", self.controller.task_state_store),
-                ("checkpoint_store", self.controller.checkpoint_store),
-                ("context_builder", self.controller.context_builder),
-                ("router", self.controller.router),
-                ("execution_engine", self.controller.execution_engine),
-            ]
-            
-            missing = []
-            for name, component in components:
-                if component is None:
-                    missing.append(name)
-            
-            if missing:
-                self.log_test("Health Check", False, f"Missing components: {missing}")
-                return False
-            else:
-                self.log_test("Health Check", True, "Все компоненты инициализированы")
-                return True
-                
-        except Exception as e:
-            self.log_test("Health Check", False, f"Error: {str(e)}")
-            return False
-    
-    def test_simple_task(self) -> bool:
-        """Простой тест задачи"""
-        try:
-            if self.controller is None:
-                self.log_test("Simple Task", False, "Controller not initialized")
-                return False
-            
-            # Создаем простую задачу
-            task = UserTask(input="Привет, как дела?")
-            
-            # Выполняем задачу через контроллер
-            result = self.controller.handle_task(task)
-            
-            status = result.get("status")
-            if status in ["completed", "awaiting_permission", "awaiting_input"]:
-                self.log_test(
-                    "Simple Task", 
-                    True, 
-                    f"Status: {status}, Task ID: {result.get('task_id')}"
-                )
-                return True
-            else:
-                self.log_test(
-                    "Simple Task", 
-                    False, 
-                    f"Unexpected status: {status}"
-                )
-                return False
-                
-        except Exception as e:
-            self.log_test("Simple Task", False, f"Request error: {str(e)}")
-            return False
-    
-    def test_tool_task(self) -> bool:
-        """Тест задачи с инструментом"""
-        try:
-            if self.controller is None:
-                self.log_test("Tool Task", False, "Controller not initialized")
-                return False
-            
-            # Тест простой команды shell через контекст
-            task = UserTask(
-                input="Выполни простую команду",
-                context={
-                    "requested_tool": "shell_exec",
-                    "tool_args": {"command": "echo 'hello from test'"}
-                }
-            )
-            
-            result = self.controller.handle_task(task)
-            
-            status = result.get("status")
-            if status == "completed":
-                output = result.get("result", {}).get("output", "")
-                if "hello from test" in output:
-                    self.log_test(
-                        "Tool Task", 
-                        True, 
-                        f"Command executed successfully: {output.strip()}"
-                    )
-                    return True
-                else:
-                    self.log_test(
-                        "Tool Task", 
-                        False, 
-                        f"Unexpected output: {output}"
-                    )
-                    return False
-            elif status == "awaiting_permission":
-                self.log_test(
-                    "Tool Task", 
-                    True, 
-                    "Permission required (expected for some commands)"
-                )
-                return True
-            else:
-                self.log_test(
-                    "Tool Task", 
-                    False, 
-                    f"Unexpected status: {status}"
-                )
-                return False
-                
-        except Exception as e:
-            self.log_test("Tool Task", False, f"Request error: {str(e)}")
-            return False
-    
-    def test_memory_tools(self) -> bool:
-        """Тест инструментов памяти"""
-        try:
-            if self.controller is None:
-                self.log_test("Memory Tools", False, "Controller not initialized")
-                return False
-            
-            # Тест вставки в память
-            task_insert = UserTask(
-                input="Запомни эту информацию: тестовое значение 123",
-                context={
-                    "requested_tool": "memory",
-                    "tool_args": {
-                        "operation": "insert",
-                        "text": "тестовое значение 123",
-                        "kind": "fact",
-                        "weight": 0.8
-                    }
-                }
-            )
-            
-            result_insert = self.controller.handle_task(task_insert)
-            
-            if result_insert.get("status") != "completed":
-                self.log_test(
-                    "Memory Tools Insert", 
-                    False, 
-                    f"Insert failed: {result_insert.get('status')}"
-                )
-                return False
-            
-            # Тест поиска в памяти
-            task_search = UserTask(
-                input="Найди запомненную информацию",
-                context={
-                    "requested_tool": "memory",
-                    "tool_args": {
-                        "operation": "search",
-                        "query": "тестовое значение",
-                        "limit": 5
-                    }
-                }
-            )
-            
-            result_search = self.controller.handle_task(task_search)
-            
-            if result_search.get("status") == "completed":
-                output = result_search.get("result", {}).get("output", "")
-                self.log_test(
-                    "Memory Tools", 
-                    True, 
-                    f"Memory search successful: {output[:100]}..."
-                )
-                return True
-            else:
-                self.log_test(
-                    "Memory Tools Search", 
-                    False, 
-                    f"Search failed: {result_search.get('status')}"
-                )
-                return False
-                
-        except Exception as e:
-            self.log_test("Memory Tools", False, f"Request error: {str(e)}")
-            return False
-    
-    def test_file_operations(self) -> bool:
-        """Тест операций с файлами"""
-        try:
-            if self.controller is None:
-                self.log_test("File Operations", False, "Controller not initialized")
-                return False
-            
-            import tempfile
-            import os
-            
-            # Создаем временный файл для теста
-            with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
-                temp_path = f.name
-                f.write("initial content for testing")
-            
-            try:
-                # Тест чтения файла
-                task_read = UserTask(
-                    input="Прочитай файл",
-                    context={
-                        "requested_tool": "file_read",
-                        "tool_args": {"path": temp_path}
-                    }
-                )
-                
-                result_read = self.controller.handle_task(task_read)
-                
-                if result_read.get("status") != "completed":
-                    self.log_test(
-                        "File Read", 
-                        False, 
-                        f"Read failed: {result_read.get('status')}"
-                    )
-                    return False
-                
-                # Тест записи файла
-                new_content = "updated content from test"
-                task_write = UserTask(
-                    input="Запиши в файл",
-                    context={
-                        "requested_tool": "file_write",
-                        "tool_args": {
-                            "path": temp_path,
-                            "content": new_content
-                        }
-                    }
-                )
-                
-                result_write = self.controller.handle_task(task_write)
-                
-                if result_write.get("status") != "completed":
-                    self.log_test(
-                        "File Write", 
-                        False, 
-                        f"Write failed: {result_write.get('status')}"
-                    )
-                    return False
-                
-                # Проверяем что файл действительно обновился
-                with open(temp_path, 'r') as f:
-                    actual_content = f.read()
-                
-                if actual_content == new_content:
-                    self.log_test(
-                        "File Operations", 
-                        True, 
-                        f"File read/write successful: {actual_content}"
-                    )
-                    return True
-                else:
-                    self.log_test(
-                        "File Operations", 
-                        False, 
-                        f"File content mismatch. Expected: {new_content}, Got: {actual_content}"
-                    )
-                    return False
-                    
-            finally:
-                # Очищаем временный файл
-                if os.path.exists(temp_path):
-                    os.unlink(temp_path)
-                
-        except Exception as e:
-            self.log_test("File Operations", False, f"Request error: {str(e)}")
-            return False
-    
-    def run_all_tests(self) -> Dict[str, Any]:
-        """Запустить все тесты"""
-        print("Starting direct ducklm tests...")
-        print("=" * 50)
-        
-        if not self.setup():
-            print("Failed to setup controller")
-            return {"error": "Setup failed"}
-        
-        tests = [
-            self.test_health,
-            self.test_simple_task,
-            self.test_tool_task,
-            self.test_memory_tools,
-            self.test_file_operations,
-        ]
-        
-        passed = 0
-        total = len(tests)
-        
-        for test in tests:
-            if test():
-                passed += 1
-            time.sleep(0.5)  # Небольшая пауза между тестами
-        
-        print("=" * 50)
-        print(f"Tests completed: {passed}/{total} passed")
-        
-        # Сводка результатов
-        summary = {
-            "total_tests": total,
-            "passed_tests": passed,
-            "failed_tests": total - passed,
-            "success_rate": passed / total if total > 0 else 0,
-            "test_results": self.test_results
-        }
-        
-        return summary
-
-
-def main():
-    """Основная функция"""
-    import argparse
-    
-    parser = argparse.ArgumentParser(description="Тест ducklm системы (прямой доступ)")
-    parser.add_argument("--basedir", default=".", help="Base directory for ducklm")
-    parser.add_argument("--test", choices=["health", "simple", "tool", "memory", "file", "all"], 
-                       default="all", help="Specific test to run")
-    
-    args = parser.parse_args()
-    
-    tester = DuckLMDirectTester(args.basedir)
-    
-    if args.test == "all":
-        results = tester.run_all_tests()
-        print("\nFINAL RESULTS:")
-        print(f"Passed: {results['passed_tests']}/{results['total_tests']}")
-        print(f"Success Rate: {results['success_rate']*100:.1f}%")
-        
-        # Возвращаем код выхода basado на результатах
-        sys.exit(0 if results['failed_tests'] == 0 else 1)
-    else:
-        # Запуск конкретного теста
-        if not tester.setup():
-            print("Failed to setup controller")
-            sys.exit(1)
-            
-        test_map = {
-            "health": tester.test_health,
-            "simple": tester.test_simple_task,
-            "tool": tester.test_tool_task,
-            "memory": tester.test_memory_tools,
-            "file": tester.test_file_operations,
-        }
-        
-        test_func = test_map[args.test]
-        if test_func():
-            print(f"Test {args.test}: PASSED")
-            sys.exit(0)
-        else:
-            print(f"Test {args.test}: FAILED")
-            sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/tests/smoke/test_action_directive_schema.py b/tests/smoke/test_action_directive_schema.py
new file mode 100644
index 0000000..b3f9649
--- /dev/null
+++ b/tests/smoke/test_action_directive_schema.py
@@ -0,0 +1,16 @@
+import json
+from pathlib import Path
+
+from jsonschema import validate
+
+
+def test_action_directive_schema_accepts_minimal_directive():
+    schema = json.loads(Path("duck_core/schemas/action_directive.schema.json").read_text())
+    directive = {
+        "kind": "action_directive",
+        "intent": "No action needed",
+        "risk_level": "none",
+        "actions": [],
+    }
+
+    validate(directive, schema)
diff --git a/tests/smoke/test_api_health.py b/tests/smoke/test_api_health.py
new file mode 100644
index 0000000..8558ef2
--- /dev/null
+++ b/tests/smoke/test_api_health.py
@@ -0,0 +1,25 @@
+from fastapi.testclient import TestClient
+
+from duck_core.api import create_app
+
+
+def test_health_and_status_endpoints(tmp_path, monkeypatch):
+    monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
+    app = create_app()
+    client = TestClient(app)
+
+    assert client.get("/health").json()["status"] == "ok"
+    status = client.get("/v1/status").json()
+    assert status["name"] == "DuckLM"
+    assert status["api_host"] == "127.0.0.1"
+
+
+def test_webchat_index_renders(tmp_path, monkeypatch):
+    monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
+    app = create_app()
+    client = TestClient(app)
+
+    response = client.get("/")
+
+    assert response.status_code == 200
+    assert "DuckLM" in response.text
diff --git a/tests/smoke/test_api_stream_chat.py b/tests/smoke/test_api_stream_chat.py
new file mode 100644
index 0000000..45d3a11
--- /dev/null
+++ b/tests/smoke/test_api_stream_chat.py
@@ -0,0 +1,103 @@
+from fastapi.testclient import TestClient
+import json
+
+from duck_core.model_client import ModelResponse
+
+from duck_core.api import create_app
+
+
+def test_stream_chat_endpoint_emits_sse_reasoning_and_content(tmp_path, monkeypatch):
+    monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
+
+    async def fake_chat(self, role, messages):
+        return ModelResponse(
+            role=role,
+            model="local-main",
+            content=json.dumps(
+                {
+                    "kind": "action_directive",
+                    "intent": "answer directly",
+                    "risk_level": "none",
+                    "actions": [],
+                }
+            ),
+            reasoning_content=None,
+            raw={},
+            latency_ms=1.0,
+        )
+
+    async def fake_stream_chat(self, role, messages):
+        yield {"type": "reasoning_delta", "delta": "thinking"}
+        yield {"type": "content_delta", "delta": "answer"}
+
+    monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
+    monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat)
+    app = create_app()
+    client = TestClient(app)
+
+    with client.stream(
+        "POST",
+        "/v1/chat/stream",
+        json={"message": "hello", "workspace": "./workspace", "debug": True},
+    ) as response:
+        body = "".join(response.iter_text())
+
+    assert response.status_code == 200
+    assert "event: reasoning_delta" in body
+    assert "event: content_delta" in body
+    assert "event: done" in body
+    assert "thinking" in body
+    assert "answer" in body
+
+
+def test_stream_chat_endpoint_executes_tool_before_streaming_answer(tmp_path, monkeypatch):
+    monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
+    (tmp_path / "note.txt").write_text("stream tool content")
+
+    async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
+        assert role == "action"
+        return ModelResponse(
+            role=role,
+            model="local-main",
+            content=json.dumps(
+                {
+                    "kind": "action_directive",
+                    "intent": "read requested file",
+                    "risk_level": "low",
+                    "actions": [
+                        {
+                            "tool": "file_read",
+                            "args": {"path": "note.txt"},
+                            "reason": "User asked for file contents",
+                        }
+                    ],
+                }
+            ),
+            reasoning_content=None,
+            raw={},
+            latency_ms=1.0,
+        )
+
+    async def fake_stream_chat(self, role, messages):
+        assert role == "thinker"
+        assert any("tool_observations" in message["content"] for message in messages)
+        yield {"type": "content_delta", "delta": "answer from tool"}
+
+    monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
+    monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat)
+    client = TestClient(create_app())
+
+    with client.stream(
+        "POST",
+        "/v1/chat/stream",
+        json={"message": "read note.txt", "workspace": str(tmp_path), "debug": True},
+    ) as response:
+        body = "".join(response.iter_text())
+
+    assert response.status_code == 200
+    assert "event: tool_call_started" in body
+    assert "event: tool_call_finished" in body
+    assert "stream tool content" in body
+    assert "event: content_delta" in body
+    assert "answer from tool" in body
+    assert "event: done" in body
diff --git a/tests/smoke/test_approvals.py b/tests/smoke/test_approvals.py
new file mode 100644
index 0000000..19d8e9c
--- /dev/null
+++ b/tests/smoke/test_approvals.py
@@ -0,0 +1,18 @@
+import pytest
+
+from duck_core.approvals.service import ApprovalService
+
+
+@pytest.mark.asyncio
+async def test_approval_service_allow_forever_is_exact_hash(tmp_path):
+    service = ApprovalService(str(tmp_path / "duck.sqlite3"))
+    await service.init()
+
+    action = {"tool": "shell_exec_safe", "args": {"command": "pytest tests/smoke -v"}}
+    approval = await service.create_pending("task_1", action)
+    await service.allow_forever(approval.approval_id)
+
+    assert await service.is_allowed_forever(action) is True
+    assert await service.is_allowed_forever(
+        {"tool": "shell_exec_safe", "args": {"command": "pytest -v"}}
+    ) is False
diff --git a/tests/smoke/test_chat_api.py b/tests/smoke/test_chat_api.py
new file mode 100644
index 0000000..edc0b85
--- /dev/null
+++ b/tests/smoke/test_chat_api.py
@@ -0,0 +1,96 @@
+from dataclasses import dataclass
+import json
+
+from fastapi.testclient import TestClient
+
+from duck_core.api import create_app
+from duck_core.model_client import ModelResponse
+
+
+@dataclass
+class FakeResponse:
+    role: str = "thinker"
+    model: str = "local-main"
+    content: str = "Я DuckLM, локальная агентная система."
+    raw: dict = None
+    latency_ms: float = 1.0
+    prompt_tokens: int | None = 1
+    completion_tokens: int | None = 1
+    total_tokens: int | None = 2
+
+
+def test_chat_api_uses_runtime_and_records_events(tmp_path, monkeypatch):
+    monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
+    monkeypatch.setenv("DUCK_SKIP_LIVE_LLM_TESTS", "1")
+
+    async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
+        return ModelResponse(
+            role="thinker",
+            model="local-main",
+            content="Я DuckLM, локальная агентная система.",
+            reasoning_content=None,
+            raw={},
+            latency_ms=1.0,
+            prompt_tokens=1,
+            completion_tokens=1,
+            total_tokens=2,
+        )
+
+    monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
+    app = create_app()
+    client = TestClient(app)
+
+    response = client.post("/v1/chat", json={"message": "Кто ты?", "debug": True})
+    payload = response.json()
+    events = client.get(f"/v1/tasks/{payload['task_id']}/events").json()
+
+    assert payload["status"] == "completed"
+    assert "DuckLM" in payload["final_response"]
+    assert [event["event_type"] for event in events] == [
+        "task_created",
+        "model_call_started",
+        "action_directive_failed",
+        "model_call_started",
+        "cognition_response",
+        "model_call_finished",
+        "task_completed",
+    ]
+
+
+def test_chat_api_exposes_pending_approval_from_runtime_tool_gate(tmp_path, monkeypatch):
+    monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3"))
+
+    async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None):
+        if role == "action":
+            return ModelResponse(
+                role=role,
+                model="local-main",
+                content=json.dumps(
+                    {
+                        "kind": "action_directive",
+                        "intent": "run command",
+                        "risk_level": "medium",
+                        "actions": [
+                            {
+                                "tool": "shell_exec_safe",
+                                "args": {"command": "uname -a"},
+                                "reason": "needs shell command",
+                            }
+                        ],
+                    }
+                ),
+                reasoning_content=None,
+                raw={},
+                latency_ms=1.0,
+            )
+        raise AssertionError("thinker should not run while approval is pending")
+
+    monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat)
+    client = TestClient(create_app())
+
+    response = client.post("/v1/chat", json={"message": "run uname", "debug": True})
+    approvals = client.get("/v1/approvals/pending").json()
+
+    assert response.status_code == 200
+    assert response.json()["status"] == "waiting_for_approval"
+    assert approvals[0]["normalized_action"]["tool"] == "shell_exec_safe"
diff --git a/tests/smoke/test_event_log.py b/tests/smoke/test_event_log.py
new file mode 100644
index 0000000..8e4c073
--- /dev/null
+++ b/tests/smoke/test_event_log.py
@@ -0,0 +1,25 @@
+import pytest
+
+from duck_core.events.store import EventStore
+from duck_core.tasks.store import TaskStore
+
+
+@pytest.mark.asyncio
+async def test_task_and_event_store_round_trip(tmp_path):
+    db_path = tmp_path / "duck.sqlite3"
+    tasks = TaskStore(str(db_path))
+    events = EventStore(str(db_path))
+    await tasks.init()
+    await events.init()
+
+    task = await tasks.create_task("hello", "./workspace", True)
+    await events.append(task.task_id, "task_created", {"message": "hello"})
+    await tasks.complete_task(task.task_id, "done")
+
+    loaded = await tasks.get_task(task.task_id)
+    timeline = await events.list_events(task.task_id)
+
+    assert loaded is not None
+    assert loaded.status == "completed"
+    assert loaded.final_response == "done"
+    assert [event.event_type for event in timeline] == ["task_created"]
diff --git a/tests/smoke/test_experience_recorder.py b/tests/smoke/test_experience_recorder.py
new file mode 100644
index 0000000..bbf9ae2
--- /dev/null
+++ b/tests/smoke/test_experience_recorder.py
@@ -0,0 +1,24 @@
+import pytest
+
+from duck_core.experience.recorder import ExperienceRecorder
+
+
+@pytest.mark.asyncio
+async def test_experience_recorder_round_trip(tmp_path):
+    recorder = ExperienceRecorder(str(tmp_path / "duck.sqlite3"))
+    await recorder.init()
+
+    created = await recorder.record(
+        task_id="task_1",
+        skill_id="analyze_project",
+        summary="Checked project",
+        result="success",
+        what_worked=["events"],
+        what_failed=[],
+        reusable_lesson="Keep context grounded in files.",
+        confidence=0.8,
+    )
+    loaded = await recorder.list_records()
+
+    assert created.id is not None
+    assert loaded[0].summary == "Checked project"
diff --git a/tests/smoke/test_llama_server_connection.py b/tests/smoke/test_llama_server_connection.py
new file mode 100644
index 0000000..6fc9a49
--- /dev/null
+++ b/tests/smoke/test_llama_server_connection.py
@@ -0,0 +1,13 @@
+import os
+
+import pytest
+
+from duck_core.model_client import ModelClient
+
+
+@pytest.mark.asyncio
+async def test_llama_server_connection_live_skip_by_env(monkeypatch):
+    if os.getenv("DUCK_SKIP_LIVE_LLM_TESTS", "1") == "1":
+        pytest.skip("Live LLM tests skipped")
+    result = await ModelClient().ping()
+    assert any(item["ok"] for item in result.values())
diff --git a/tests/smoke/test_llama_service_script.py b/tests/smoke/test_llama_service_script.py
new file mode 100644
index 0000000..b0472c8
--- /dev/null
+++ b/tests/smoke/test_llama_service_script.py
@@ -0,0 +1,57 @@
+import os
+import subprocess
+import textwrap
+import time
+from pathlib import Path
+
+
+def test_start_main_script_manages_pid_status_stop_and_logs(tmp_path):
+    fake_bin = tmp_path / "llama-server"
+    fake_bin.write_text(
+        textwrap.dedent(
+            """\
+            #!/usr/bin/env bash
+            echo "fake llama-server $*" >&2
+            trap 'exit 0' TERM INT
+            while true; do sleep 1; done
+            """
+        )
+    )
+    fake_bin.chmod(0o755)
+    pid_file = tmp_path / "llama.pid"
+    log_file = tmp_path / "llama.log"
+
+    env = {
+        **os.environ,
+        "DUCK_LLAMA_SERVER_BIN": str(fake_bin),
+        "DUCK_MAIN_MODEL_PATH": str(tmp_path / "model.gguf"),
+        "DUCK_LLAMA_PID_FILE": str(pid_file),
+        "DUCK_LLAMA_LOG_FILE": str(log_file),
+        "DUCK_MAIN_PORT": "18081",
+    }
+    Path(env["DUCK_MAIN_MODEL_PATH"]).write_text("fake")
+    script = "scripts/llama/start_main.sh"
+
+    stopped = subprocess.run([script, "status"], env=env, text=True, capture_output=True)
+    assert stopped.returncode == 3
+    assert "not running" in stopped.stdout
+
+    started = subprocess.run([script, "start"], env=env, text=True, capture_output=True)
+    assert started.returncode == 0
+    assert pid_file.exists()
+
+    try:
+        running = subprocess.run([script, "status"], env=env, text=True, capture_output=True)
+        assert running.returncode == 0
+        assert "running" in running.stdout
+
+        time.sleep(0.2)
+        logs = subprocess.run(
+            [script, "logs", "--lines", "20"], env=env, text=True, capture_output=True
+        )
+        assert logs.returncode == 0
+        assert "--alias local-main" in logs.stdout
+    finally:
+        stopped = subprocess.run([script, "stop"], env=env, text=True, capture_output=True)
+        assert stopped.returncode == 0
+        assert not pid_file.exists()
diff --git a/tests/smoke/test_model_client.py b/tests/smoke/test_model_client.py
new file mode 100644
index 0000000..5901b29
--- /dev/null
+++ b/tests/smoke/test_model_client.py
@@ -0,0 +1,92 @@
+import pytest
+import httpx
+
+from duck_core.model_client import ModelClient
+
+
+def test_model_client_loads_role_settings():
+    client = ModelClient("config/models.yaml")
+
+    thinker = client.get_role_config("thinker")
+    action = client.get_role_config("action")
+
+    assert thinker.model == "local-main"
+    assert thinker.temperature == 0.4
+    assert action.structured_output is True
+    assert action.response_schema == "duck_core/schemas/action_directive.schema.json"
+
+
+@pytest.mark.asyncio
+async def test_model_client_missing_role_is_clear_error():
+    client = ModelClient("config/models.yaml")
+
+    with pytest.raises(KeyError, match="Unknown model role"):
+        await client.chat("missing", [{"role": "user", "content": "hello"}])
+
+
+@pytest.mark.asyncio
+async def test_model_client_preserves_reasoning_content(monkeypatch):
+    async def fake_post(self, url, json):
+        return httpx.Response(
+            200,
+            json={
+                "choices": [
+                    {
+                        "message": {
+                            "role": "assistant",
+                            "content": "final answer",
+                            "reasoning_content": "private reasoning",
+                        }
+                    }
+                ],
+                "usage": {
+                    "prompt_tokens": 3,
+                    "completion_tokens": 2,
+                    "total_tokens": 5,
+                },
+            },
+            request=httpx.Request("POST", url),
+        )
+
+    monkeypatch.setattr(httpx.AsyncClient, "post", fake_post)
+    client = ModelClient("config/models.yaml")
+
+    response = await client.chat("thinker", [{"role": "user", "content": "hello"}])
+
+    assert response.content == "final answer"
+    assert response.reasoning_content == "private reasoning"
+
+
+@pytest.mark.asyncio
+async def test_model_client_stream_chat_yields_reasoning_then_content(monkeypatch):
+    class FakeStreamResponse:
+        def raise_for_status(self):
+            return None
+
+        async def aiter_lines(self):
+            yield 'data: {"choices":[{"delta":{"reasoning_content":"thinking "}}]}'
+            yield 'data: {"choices":[{"delta":{"content":"answer"}}]}'
+            yield "data: [DONE]"
+
+    class FakeStreamContext:
+        async def __aenter__(self):
+            return FakeStreamResponse()
+
+        async def __aexit__(self, exc_type, exc, tb):
+            return False
+
+    def fake_stream(self, method, url, json):
+        return FakeStreamContext()
+
+    monkeypatch.setattr(httpx.AsyncClient, "stream", fake_stream)
+    client = ModelClient("config/models.yaml")
+
+    chunks = [
+        chunk
+        async for chunk in client.stream_chat("thinker", [{"role": "user", "content": "hello"}])
+    ]
+
+    assert chunks == [
+        {"type": "reasoning_delta", "delta": "thinking "},
+        {"type": "content_delta", "delta": "answer"},
+    ]
diff --git a/tests/smoke/test_models_config.py b/tests/smoke/test_models_config.py
new file mode 100644
index 0000000..36f3309
--- /dev/null
+++ b/tests/smoke/test_models_config.py
@@ -0,0 +1,16 @@
+from pathlib import Path
+
+import yaml
+
+
+def test_models_config_maps_roles_to_same_qwen_non_mtp_model():
+    config = yaml.safe_load(Path("config/models.yaml").read_text())
+
+    assert config["default_provider"] == "llama_server"
+    roles = config["models"]
+    for role in ["thinker", "critic", "coder", "action", "summary"]:
+        assert roles[role]["base_url"] == "http://127.0.0.1:8081/v1"
+        assert roles[role]["model"] == "local-main"
+
+    assert roles["action"]["structured_output"] is True
+    assert roles["thinker"]["max_output_tokens"] == 8192
diff --git a/tests/smoke/test_runtime_reasoning.py b/tests/smoke/test_runtime_reasoning.py
new file mode 100644
index 0000000..962a6bd
--- /dev/null
+++ b/tests/smoke/test_runtime_reasoning.py
@@ -0,0 +1,37 @@
+import pytest
+
+from duck_core.events.store import EventStore
+from duck_core.model_client import ModelResponse
+from duck_core.runtime_loop import RuntimeLoop
+from duck_core.tasks.store import TaskStore
+
+
+class FakeModelClient:
+    async def chat(self, role, messages):
+        return ModelResponse(
+            role=role,
+            model="local-main",
+            content="visible answer",
+            reasoning_content="reasoning trace",
+            raw={},
+            latency_ms=12.0,
+            prompt_tokens=1,
+            completion_tokens=2,
+            total_tokens=3,
+        )
+
+
+@pytest.mark.asyncio
+async def test_runtime_returns_and_logs_reasoning_content(tmp_path):
+    db_path = str(tmp_path / "duck.sqlite3")
+    task_store = TaskStore(db_path)
+    event_store = EventStore(db_path)
+    loop = RuntimeLoop(task_store, event_store, FakeModelClient())
+
+    result = await loop.run_chat("hello", "./workspace", debug=True)
+    events = await event_store.list_events(result.task_id)
+    cognition = next(event for event in events if event.event_type == "cognition_response")
+
+    assert result.final_response == "visible answer"
+    assert result.reasoning_content == "reasoning trace"
+    assert cognition.payload["reasoning_content"] == "reasoning trace"
diff --git a/tests/smoke/test_runtime_tools.py b/tests/smoke/test_runtime_tools.py
new file mode 100644
index 0000000..9eca0c2
--- /dev/null
+++ b/tests/smoke/test_runtime_tools.py
@@ -0,0 +1,112 @@
+import json
+
+import pytest
+
+from duck_core.events.store import EventStore
+from duck_core.model_client import ModelResponse
+from duck_core.approvals.service import ApprovalService
+from duck_core.runtime_loop import RuntimeLoop
+from duck_core.tasks.store import TaskStore
+
+
+class FakeToolModelClient:
+    async def chat(self, role, messages):
+        if role == "action":
+            return ModelResponse(
+                role=role,
+                model="local-main",
+                content=json.dumps(
+                    {
+                        "kind": "action_directive",
+                        "intent": "read requested file",
+                        "risk_level": "low",
+                        "actions": [
+                            {
+                                "tool": "file_read",
+                                "args": {"path": "note.txt"},
+                                "reason": "User asked for file contents",
+                            }
+                        ],
+                    }
+                ),
+                reasoning_content=None,
+                raw={},
+                latency_ms=5.0,
+            )
+        assert role == "thinker"
+        assert any("tool_observations" in message["content"] for message in messages)
+        return ModelResponse(
+            role=role,
+            model="local-main",
+            content="The file says: hello from tool",
+            reasoning_content="used file_read",
+            raw={},
+            latency_ms=12.0,
+        )
+
+
+@pytest.mark.asyncio
+async def test_runtime_executes_action_directive_tool_and_finishes_with_observation(tmp_path):
+    (tmp_path / "note.txt").write_text("hello from tool")
+    db_path = str(tmp_path / "duck.sqlite3")
+    task_store = TaskStore(db_path)
+    event_store = EventStore(db_path)
+    loop = RuntimeLoop(task_store, event_store, FakeToolModelClient())
+
+    result = await loop.run_chat("read note.txt", str(tmp_path), debug=True)
+    events = await event_store.list_events(result.task_id)
+    event_types = [event.event_type for event in events]
+    tool_finished = next(event for event in events if event.event_type == "tool_call_finished")
+
+    assert result.status == "completed"
+    assert result.final_response == "The file says: hello from tool"
+    assert "action_directive" in event_types
+    assert "tool_call_started" in event_types
+    assert tool_finished.payload["tool"] == "file_read"
+    assert tool_finished.payload["result"]["ok"] is True
+    assert tool_finished.payload["result"]["output"] == "hello from tool"
+
+
+class FakeApprovalModelClient:
+    async def chat(self, role, messages):
+        if role == "action":
+            return ModelResponse(
+                role=role,
+                model="local-main",
+                content=json.dumps(
+                    {
+                        "kind": "action_directive",
+                        "intent": "run command",
+                        "risk_level": "medium",
+                        "actions": [
+                            {
+                                "tool": "shell_exec_safe",
+                                "args": {"command": "uname -a"},
+                                "reason": "User requested system information",
+                            }
+                        ],
+                    }
+                ),
+                reasoning_content=None,
+                raw={},
+                latency_ms=5.0,
+            )
+        raise AssertionError("thinker must not be called while approval is pending")
+
+
+@pytest.mark.asyncio
+async def test_runtime_creates_pending_approval_when_tool_requires_it(tmp_path):
+    db_path = str(tmp_path / "duck.sqlite3")
+    task_store = TaskStore(db_path)
+    event_store = EventStore(db_path)
+    approvals = ApprovalService(db_path)
+    loop = RuntimeLoop(task_store, event_store, FakeApprovalModelClient(), approval_service=approvals)
+
+    result = await loop.run_chat("run uname", str(tmp_path), debug=True)
+    pending = await approvals.pending()
+    events = await event_store.list_events(result.task_id)
+
+    assert result.status == "waiting_for_approval"
+    assert pending[0].task_id == result.task_id
+    assert pending[0].normalized_action["tool"] == "shell_exec_safe"
+    assert any(event.event_type == "tool_approval_requested" for event in events)
diff --git a/tests/smoke/test_skill_registry.py b/tests/smoke/test_skill_registry.py
new file mode 100644
index 0000000..32b3d3c
--- /dev/null
+++ b/tests/smoke/test_skill_registry.py
@@ -0,0 +1,9 @@
+from duck_core.skills.registry import SkillRegistry
+
+
+def test_skill_registry_loads_analyze_project_skill():
+    registry = SkillRegistry("skills")
+    skills = registry.load_skills()
+
+    assert any(skill.id == "analyze_project" for skill in skills)
+    assert registry.get_skill("analyze_project").risk_level == "low"
diff --git a/tests/smoke/test_tool_gateway.py b/tests/smoke/test_tool_gateway.py
new file mode 100644
index 0000000..fb3a596
--- /dev/null
+++ b/tests/smoke/test_tool_gateway.py
@@ -0,0 +1,42 @@
+import pytest
+
+from duck_core.tools.file_read import FileReadTool
+from duck_core.tools.file_write import FileWriteTool
+from duck_core.tools.gateway import ToolGateway
+from duck_core.tools.shell_exec_safe import ShellExecSafeTool
+
+
+@pytest.mark.asyncio
+async def test_file_tools_stay_inside_workspace(tmp_path):
+    write = FileWriteTool(str(tmp_path))
+    read = FileReadTool(str(tmp_path))
+
+    result = await write.run({"path": "tmp/note.txt", "content": "hello duck"})
+    loaded = await read.run({"path": "tmp/note.txt"})
+    escaped = await read.run({"path": "../outside.txt"})
+
+    assert result.ok is True
+    assert loaded.output == "hello duck"
+    assert escaped.ok is False
+
+
+@pytest.mark.asyncio
+async def test_shell_tool_blocks_dangerous_commands(tmp_path):
+    shell = ShellExecSafeTool(str(tmp_path))
+
+    allowed = await shell.run({"command": "pwd"})
+    blocked = await shell.run({"command": "rm -rf ."})
+
+    assert allowed.ok is True
+    assert blocked.ok is False
+
+
+@pytest.mark.asyncio
+async def test_tool_gateway_runs_allowed_directive(tmp_path):
+    gateway = ToolGateway.default(str(tmp_path))
+    result = await gateway.run_action(
+        {"tool": "file_write", "args": {"path": "a.txt", "content": "x"}}
+    )
+
+    assert result.ok is True
+    assert result.metadata["path"].endswith("a.txt")
diff --git a/tests/smoke/test_vector_memory.py b/tests/smoke/test_vector_memory.py
new file mode 100644
index 0000000..ac148ec
--- /dev/null
+++ b/tests/smoke/test_vector_memory.py
@@ -0,0 +1,11 @@
+import pytest
+
+from duck_core.memory.vector_memory import EmbeddingsUnavailableError, VectorMemory
+
+
+@pytest.mark.asyncio
+async def test_vector_memory_stub_is_explicit_when_embeddings_unavailable():
+    memory = VectorMemory(qdrant_url="http://127.0.0.1:6333", embeddings_base_url=None)
+
+    with pytest.raises(EmbeddingsUnavailableError):
+        await memory.add_memory("remember this")
diff --git a/tests/test_api_handlers.py b/tests/test_api_handlers.py
deleted file mode 100644
index dd98429..0000000
--- a/tests/test_api_handlers.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import asyncio
-import time
-
-import app.api.server as server
-from app.api.server import chat, critic_feedback, health, list_events, resolve_permission, resolve_review, resolve_secret
-from app.core.permission_resolution import PermissionResolutionRequest, ReviewResolutionRequest, SecretResolutionRequest
-from app.api.server import CriticFeedbackRequest
-from app.core.contracts import UserTask
-
-
-def test_health_handler() -> None:
-    assert health() == {"status": "ok"}
-
-
-def test_events_handler_returns_event_list() -> None:
-    body = list_events(limit=10)
-    assert "events" in body
-    assert isinstance(body["events"], list)
-
-
-def test_chat_handler_returns_runtime_events() -> None:
-    body = chat(UserTask(input="hello from handler test"))
-    assert body["status"] in {"accepted", "completed"}
-    if body["status"] == "completed":
-        assert body["events"][0]["type"] == "task_received"
-
-
-def test_chat_handler_submits_task_without_waiting_for_completion(monkeypatch) -> None:
-    class SlowRuntime:
-        def submit_task(self, task):
-            return {"task_id": task.task_id, "status": "accepted"}
-
-        def handle_task(self, task):
-            time.sleep(0.25)
-            return {"task_id": task.task_id, "status": "completed", "events": []}
-
-    monkeypatch.setattr("app.api.server.runtime", SlowRuntime())
-    started = time.monotonic()
-    body = chat(UserTask(input="long task"))
-
-    assert time.monotonic() - started < 0.1
-    assert body["status"] == "accepted"
-
-
-def test_lifespan_loads_models_without_threadpool_executor(monkeypatch) -> None:
-    class FakeRuntime:
-        _memory_interface = None
-
-        def __init__(self) -> None:
-            self.loaded = False
-
-        def load_models_at_startup(self) -> None:
-            self.loaded = True
-
-    class FailingLoop:
-        def run_in_executor(self, *args, **kwargs):
-            raise AssertionError("lifespan must not load llama models via run_in_executor")
-
-    fake_runtime = FakeRuntime()
-    monkeypatch.setattr(server, "runtime", fake_runtime)
-    monkeypatch.setattr(server.asyncio, "get_event_loop", lambda: FailingLoop())
-
-    async def run_lifespan() -> None:
-        async with server.lifespan(None):
-            pass
-
-    asyncio.run(run_lifespan())
-    assert fake_runtime.loaded is True
-
-
-def test_resolve_permission_handler_allows_completion() -> None:
-    initial = chat(UserTask(input="запусти pwd"))
-    if initial["status"] == "awaiting_permission":
-        body = resolve_permission(
-            PermissionResolutionRequest(task_id=initial["task_id"], decision="allow_once")
-        )
-        assert body["status"] in {"completed", "failed"}
-
-
-def test_resolve_secret_handler_requires_pending_request() -> None:
-    body = resolve_secret(SecretResolutionRequest(task_id="missing", secret="x"))
-    assert body["status"] == "failed"
-
-
-def test_resolve_review_handler_submits_review_resolution(monkeypatch) -> None:
-    class ReviewRuntime:
-        def submit_review_resolution(self, task_id, decision, correction=None):
-            return {
-                "task_id": task_id,
-                "status": "accepted",
-                "decision": decision,
-                "correction": correction,
-            }
-
-    monkeypatch.setattr("app.api.server.runtime", ReviewRuntime())
-    body = resolve_review(
-        ReviewResolutionRequest(
-            task_id="task-1",
-            decision="wrong_action",
-            correction="replan",
-        )
-    )
-
-    assert body["status"] == "accepted"
-    assert body["decision"] == "wrong_action"
-
-
-def test_structured_feedback_can_be_accepted_without_memory_write() -> None:
-    initial = chat(UserTask(input="feedback target"))
-    body = critic_feedback(
-        CriticFeedbackRequest(
-            task_id=initial["task_id"],
-            feedback="wrong answer",
-            feedback_type="hallucination",
-            severity="major",
-            correction="check first",
-            remember=False,
-        )
-    )
-    assert body["status"] == "ok"
-    assert body["stored"] is False
-    assert "hallucination" in body["lesson"]
diff --git a/tests/test_command_analyzer.py b/tests/test_command_analyzer.py
deleted file mode 100644
index f45e615..0000000
--- a/tests/test_command_analyzer.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from app.core.command_analyzer import CommandAnalyzer
-from app.core.permission_service import PermissionService
-
-
-def _permission_service() -> PermissionService:
-    return PermissionService(
-        config={
-            "settings": {},
-            "command_categories": {
-                "no_always": {
-                    "allow_once": True,
-                    "allow_always": False,
-                    "commands": ["apt", "apt-get", "dpkg", "systemctl"],
-                }
-            },
-            "path_settings": {},
-        }
-    )
-
-
-def test_detects_unelevated_root_required_segment_after_sudo_chain() -> None:
-    analyzer = CommandAnalyzer(_permission_service())
-
-    diagnosis = analyzer.analyze(
-        command="sudo apt update && apt upgrade -y",
-        task_id="task-1",
-        session_id="session-1",
-    )
-
-    assert diagnosis["type"] == "privilege_scope_error"
-    assert diagnosis["root_required_segments"] == ["apt update", "apt upgrade -y"]
-    assert diagnosis["elevated_segments"] == ["apt update"]
-    assert diagnosis["unelevated_root_segments"] == ["apt upgrade -y"]
-
-
-def test_accepts_each_root_required_segment_when_each_is_elevated() -> None:
-    analyzer = CommandAnalyzer(_permission_service())
-
-    diagnosis = analyzer.analyze(
-        command="sudo apt update && sudo apt upgrade -y",
-        task_id="task-1",
-        session_id="session-1",
-    )
-
-    assert diagnosis["type"] == "ok"
-    assert diagnosis["unelevated_root_segments"] == []
diff --git a/tests/test_contracts.py b/tests/test_contracts.py
deleted file mode 100644
index 7dee997..0000000
--- a/tests/test_contracts.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import asyncio
-
-from app.core.async_router import AsyncRouter
-from app.core.contracts import CriticScore, ExecutionDirective, PlanStep, UserTask
-
-
-class _FakeAdapter:
-    def __init__(self, responses: list[str]) -> None:
-        self._responses = responses
-
-    async def generate(self, prompt: str, max_tokens: int | None = None) -> str:
-        return self._responses.pop(0)
-
-
-def test_user_task_defaults() -> None:
-    task = UserTask(input="hello")
-    assert task.task_id
-    assert task.session_id
-
-
-def test_plan_step_supports_dependencies() -> None:
-    step = PlanStep(
-        id="step-1",
-        kind="tool",
-        tool="shell_exec",
-        description="run command",
-        depends_on=[],
-    )
-    assert step.tool == "shell_exec"
-
-
-def test_critic_score_bounds() -> None:
-    score = CriticScore(
-        correctness=1.0,
-        usefulness=0.5,
-        safety=0.0,
-        memory_store=False,
-        weight=0.2,
-        explanation="ok",
-    )
-    assert score.weight == 0.2
-
-
-def test_execution_directive_defaults() -> None:
-    directive = ExecutionDirective(type="noop")
-    assert directive.payload == {}
-    assert directive.confidence == 0.0
-
-
-def test_router_compiles_tool_plan_even_when_classifier_says_conversation() -> None:
-    router = AsyncRouter(
-        thinker=_FakeAdapter([
-            "conversation",
-            "ПЛАН:\nШаг 1: [shell_exec] выполнить `uptime`",
-        ]),
-        json_compiler=_FakeAdapter([
-            '{"type":"plan","payload":{"steps":[{"id":"1","tool":"shell_exec","args":{"command":"uptime"},"depends_on":[]}]}}'
-        ]),
-    )
-    directive = asyncio.run(
-        router.decide(
-            state={},
-            context={"task_summary": "Проверь аптайм ПК", "task_context": {}},
-        )
-    )
-    assert directive.type == "plan"
-    assert directive.payload["steps"][0]["tool"] == "shell_exec"
diff --git a/tests/test_runtime_loop.py b/tests/test_runtime_loop.py
deleted file mode 100644
index dfb3dc2..0000000
--- a/tests/test_runtime_loop.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from app.core.contracts import UserTask
-from app.runtime.runtime_controller import RuntimeController
-
-
-def test_runtime_loop_emits_basic_events() -> None:
-    controller = RuntimeController()
-    result = controller.handle_task(UserTask(input="hello runtime"))
-    event_types = [event["type"] for event in result["events"]]
-    assert result["status"] == "completed"
-    assert "message" in result["result"]
-    assert "task_received" in event_types
-    assert "context_built" in event_types
-    assert "task_completed" in event_types
-
-
-def test_runtime_loop_routes_natural_language_shell_request_to_permission_flow() -> None:
-    import os, shutil
-    # Clear permission cache to ensure clean state
-    cache_file = os.path.join(os.path.dirname(__file__), '..', 'data', 'runtime', 'allowed_commands.json')
-    if os.path.exists(cache_file):
-        os.remove(cache_file)
-    
-    controller = RuntimeController()
-    result = controller.handle_task(UserTask(input="запусти sudo apt update"))
-    event_types = [event["type"] for event in result["events"]]
-    # sudo commands require both permission and password
-    # First step: permission request
-    assert result["status"] == "awaiting_permission"
-    assert result["directive"]["type"] == "tool"
-    assert result["directive"]["payload"]["tool"] == "shell_exec"
-    assert "permission_requested" in event_types
-    assert "task_awaiting_permission" in event_types
-    assert result["result"]["error"] == "Permission required before execution."
-
-    # After granting permission, should request sudo password
-    resumed = controller.resolve_permission(task_id=result["task_id"], decision="allow_once")
-    assert resumed["status"] == "awaiting_input"
-    assert resumed["result"]["secret_request"]["kind"] == "sudo_password"
diff --git a/tests/test_tools_flow.py b/tests/test_tools_flow.py
deleted file mode 100644
index 203a343..0000000
--- a/tests/test_tools_flow.py
+++ /dev/null
@@ -1,489 +0,0 @@
-import json
-from pathlib import Path
-
-from app.core.contracts import ExecutionDirective, UserTask
-from app.core.contracts import PermissionDecision
-from app.core.contracts import ToolResult
-from app.events.event_types import TOOL_OUTPUT_CHUNK
-from app.runtime.runtime_controller import RuntimeController
-from app.tools.sandbox import ToolSandbox
-
-
-def _write_config_tree(base_dir: Path) -> None:
-    (base_dir / "config").mkdir()
-    (base_dir / "data" / "events").mkdir(parents=True, exist_ok=True)
-    (base_dir / "data" / "state").mkdir(parents=True, exist_ok=True)
-    (base_dir / "data" / "permissions").mkdir(parents=True, exist_ok=True)
-    (base_dir / "models").mkdir(exist_ok=True)
-
-    configs = {
-        "models.json": {
-            "orchestrator_path": "models/llama.gguf",
-            "coder_path": "models/xcoder.gguf",
-            "critic_path": "models/gemma.gguf",
-            "embeddings_path": "models/all-MiniLM-L6-v2",
-            "inference": {},
-        },
-        "prompts.json": {
-            "orchestration_prompt": "",
-            "planning_prompt": "",
-            "coder_prompt": "",
-            "critic_prompt": "",
-        },
-        "permissions.json": {
-            "settings": {
-                "allow_caching": True,
-                "cache_file": str(base_dir / "data/runtime/allowed_commands.json"),
-                "normalize_commands": True,
-                "split_chained": True
-            },
-            "command_categories": {
-                "hard_stop": {
-                    "commands": ["rm -rf /", "rm -rf /*", "dd if=/dev/zero of=/dev/sd*"]
-                },
-                "no_always": {
-                    "allow_once": True,
-                    "allow_always": False,
-                    "commands": [
-                        "rm -rf *", "rm -rf .*", "shutdown", "reboot", "halt",
-                        "apt", "apt-get", "dpkg", "yum", "dnf", "pacman",
-                        "systemctl stop", "systemctl start", "systemctl restart",
-                        "service stop", "service start", "killall", "pkill -9"
-                    ]
-                },
-                "normal": {
-                    "allow_once": True,
-                    "allow_always": True,
-                    "commands": ["shell_exec", "file_write"]
-                }
-            },
-            "path_settings": {
-                "allow_read_outside": True,
-                "allow_write_paths": [str(base_dir), "/tmp"],
-                "require_confirmation_for_write": True,
-                "require_confirmation_for_shell": True
-            }
-        },
-        "runtime.json": {
-            "step_timeout_ms": 5000,
-            "task_timeout_ms": 30000,
-            "planner_retry_limit": 1,
-            "tool_retry_limit": 0,
-            "replan_limit": 0,
-            "max_execution_steps": 5,
-            "retrieval_top_k": 3,
-            "memory_thresholds": {},
-            "critic_fallback_policy": "continue_without_critic",
-            "checkpoint_policy": {"save_on_transition": True},
-            "event_retention_policy": {"keep_all": True},
-            "streaming_settings": {"enabled": True},
-        },
-    }
-    for name, payload in configs.items():
-        (base_dir / "config" / name).write_text(json.dumps(payload), encoding="utf-8")
-
-
-def test_file_write_and_read_tool_flow(tmp_path: Path) -> None:
-    _write_config_tree(tmp_path)
-    controller = RuntimeController(base_dir=tmp_path)
-    target = tmp_path / "notes" / "test.txt"
-
-    write_result = controller.handle_task(
-        UserTask(
-            input="write a file",
-            context={
-                "requested_tool": "file_write",
-                "tool_args": {"path": str(target), "content": "hello from ducklm"},
-            },
-        )
-    )
-    assert write_result["status"] == "completed"
-    assert target.read_text(encoding="utf-8") == "hello from ducklm"
-
-    read_result = controller.handle_task(
-        UserTask(
-            input="read the file",
-            context={
-                "requested_tool": "file_read",
-                "tool_args": {"path": str(target)},
-            },
-        )
-    )
-    assert read_result["status"] == "completed"
-    assert read_result["result"]["output"] == "hello from ducklm"
-
-
-def test_shell_exec_requires_permission_for_dangerous_command(tmp_path: Path) -> None:
-    _write_config_tree(tmp_path)
-    controller = RuntimeController(base_dir=tmp_path)
-    result = controller.handle_task(
-        UserTask(
-            input="run dangerous shell command",
-            context={
-                "requested_tool": "shell_exec",
-                "tool_args": {"command": "rm -rf /tmp/nonexistent"},
-            },
-        )
-    )
-    # rm -rf /tmp/nonexistent is not hard_stop (only exact "rm -rf /" is)
-    # but it matches "rm -rf *" in no_always category
-    assert result["status"] == "awaiting_permission"
-    assert "permission_request" in result["result"]
-
-
-def test_shell_exec_allows_safe_command(tmp_path: Path) -> None:
-    _write_config_tree(tmp_path)
-    controller = RuntimeController(base_dir=tmp_path)
-    result = controller.handle_task(
-        UserTask(
-            input="run safe shell command",
-            context={
-                "requested_tool": "shell_exec",
-                "tool_args": {"command": "pwd"},
-            },
-        )
-    )
-    # Even safe commands require permission in the new permission model
-    assert result["status"] == "awaiting_permission"
-    assert "permission_request" in result["result"]
-    # Grant permission and verify execution
-    resumed = controller.resolve_permission(task_id=result["task_id"], decision="allow_once")
-    assert resumed["status"] == "completed"
-    assert str(tmp_path) in resumed["result"]["output"]
-
-
-def test_shell_exec_publishes_output_chunks_before_completion(tmp_path: Path) -> None:
-    _write_config_tree(tmp_path)
-    controller = RuntimeController(base_dir=tmp_path)
-    perm_override = PermissionDecision(
-        action_type="shell_command",
-        pattern="printf",
-        decision="allow_always",
-    )
-
-    task = UserTask(
-        input="stream shell output",
-        context={
-            "requested_tool": "shell_exec",
-            "tool_args": {"command": "printf 'first\\n'; sleep 0.1; printf 'second\\n'"},
-        },
-    )
-    result = controller.execution_engine.execute(
-        task,
-        ExecutionDirective(
-            type="tool",
-            payload={
-                "tool": "shell_exec",
-                "args": {"command": "printf 'first\\n'; sleep 0.1; printf 'second\\n'"},
-            },
-        ),
-        permission_override=perm_override,
-    )
-
-    events = controller.event_bus.list_for_task(task.task_id)
-    chunk_events = [event for event in events if event.type == TOOL_OUTPUT_CHUNK]
-    completed_index = next(index for index, event in enumerate(events) if event.type == "tool_completed")
-    first_chunk_index = next(index for index, event in enumerate(events) if event.type == TOOL_OUTPUT_CHUNK)
-    assert result["status"] == "completed"
-    assert [event.payload["chunk"] for event in chunk_events] == ["first\n", "second\n"]
-    assert first_chunk_index < completed_index
-
-
-def test_streaming_shell_uses_idle_timeout_not_step_timeout(tmp_path: Path) -> None:
-    sandbox = ToolSandbox(
-        allowed_root=tmp_path,
-        timeout_ms=100,
-        command_timeout_ms=2000,
-        idle_timeout_ms=500,
-    )
-    chunks: list[str] = []
-
-    result = sandbox.run_shell(
-        command="printf 'first\\n'; sleep 0.2; printf 'second\\n'",
-        output_callback=lambda _stream, chunk: chunks.append(chunk),
-    )
-
-    assert result.returncode == 0
-    assert result.stdout == "first\nsecond\n"
-    assert chunks == ["first\n", "second\n"]
-
-
-def test_streaming_shell_timeout_kills_child_process_group(tmp_path: Path) -> None:
-    marker = tmp_path / "child-survived"
-    sandbox = ToolSandbox(
-        allowed_root=tmp_path,
-        timeout_ms=100,
-        command_timeout_ms=100,
-        idle_timeout_ms=1000,
-    )
-
-    result = sandbox.run_shell(
-        command=f"sh -c 'sleep 1; touch {marker}'",
-        output_callback=lambda _stream, _chunk: None,
-    )
-
-    assert result.returncode == -9
-    assert not marker.exists()
-
-
-class _RecoveryCritic:
-    async def generate(self, prompt: str, max_tokens: int | None = None) -> str:
-        return '{"action":"continue","reason":"No matches is acceptable information for this exploratory check."}'
-
-
-def test_failed_shell_step_can_recover_and_continue(tmp_path: Path) -> None:
-    _write_config_tree(tmp_path)
-    controller = RuntimeController(base_dir=tmp_path)
-    controller.execution_engine.set_critic(_RecoveryCritic())
-    controller.execution_engine._recovery_limit = 1
-    # Bypass permission check for this test — we're testing recovery, not permissions
-    from app.core.contracts import PermissionDecision
-    perm_override = PermissionDecision(
-        action_type="shell_command",
-        pattern="grep",
-        decision="allow_always",
-    )
-    result = controller.execution_engine.execute(
-        UserTask(
-            input="run grep with no matches and recover",
-        ),
-        ExecutionDirective(
-            type="plan",
-            payload={
-                "steps": [
-                    {
-                        "id": "1",
-                        "tool": "shell_exec",
-                        "args": {"command": "printf 'abc\\n' | grep definitely_missing"},
-                        "depends_on": [],
-                    }
-                ]
-            },
-        ),
-        permission_override=perm_override,
-    )
-    assert result["status"] == "completed"
-    failed_result = result["result"]["step_results"][0]["result"]["result"]
-    assert failed_result["metadata"]["exit_code"] == 1
-
-
-def test_privilege_scope_failure_awaits_user_review_before_replan(tmp_path: Path) -> None:
-    _write_config_tree(tmp_path)
-    controller = RuntimeController(base_dir=tmp_path)
-    task = UserTask(
-        input="обнови систему",
-        context={
-            "requested_tool": "shell_exec",
-            "tool_args": {"command": "sudo apt update && apt upgrade -y"},
-        },
-    )
-    class FailingShellTool:
-        def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
-            return ToolResult(
-                tool="shell_exec",
-                ok=False,
-                output="Error: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?",
-                error="Command failed with exit code 100",
-                metadata={"exit_code": 100},
-            )
-
-    controller.tool_registry._tools["shell_exec"] = FailingShellTool()
-
-    initial = controller.handle_task(task)
-    assert initial["status"] == "awaiting_permission"
-    controller.resolve_permission(task_id=task.task_id, decision="allow_once")
-    result = controller.resolve_secret(task_id=task.task_id, secret="secret")
-
-    assert result["status"] == "awaiting_review"
-    assert result["result"]["review"]["diagnosis"]["type"] == "privilege_scope_error"
-    assert result["result"]["review"]["critic_assessment"]["classification"] == "model_planning_error"
-
-
-def test_plan_pauses_on_privilege_scope_review_instead_of_completing(tmp_path: Path) -> None:
-    _write_config_tree(tmp_path)
-    controller = RuntimeController(base_dir=tmp_path)
-
-    class FailingShellTool:
-        def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
-            return ToolResult(
-                tool="shell_exec",
-                ok=False,
-                output="Error: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?",
-                error="Command failed with exit code 100",
-                metadata={"exit_code": 100},
-            )
-
-    controller.tool_registry._tools["shell_exec"] = FailingShellTool()
-    result = controller.execution_engine.execute(
-        UserTask(input="обнови систему"),
-        ExecutionDirective(
-            type="plan",
-            payload={
-                "steps": [
-                    {
-                        "id": "1",
-                        "tool": "shell_exec",
-                        "args": {"command": "sudo apt update && apt upgrade -y"},
-                        "depends_on": [],
-                    }
-                ]
-            },
-        ),
-        permission_override=PermissionDecision(
-            action_type="shell_command",
-            pattern="apt",
-            decision="allow_once",
-        ),
-        secret_override="secret",
-    )
-
-    assert result["status"] == "awaiting_review"
-    assert result["result"]["review"]["diagnosis"]["type"] == "privilege_scope_error"
-
-
-def test_sudo_auth_failure_requests_secret_retry_not_review(tmp_path: Path) -> None:
-    _write_config_tree(tmp_path)
-    controller = RuntimeController(base_dir=tmp_path)
-
-    class BadPasswordShellTool:
-        def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
-            return ToolResult(
-                tool="shell_exec",
-                ok=False,
-                output="Sorry, try again.\nsudo: no password was provided\nsudo: 1 incorrect password attempt\n",
-                error="Command failed with exit code 1",
-                metadata={"exit_code": 1, "sudo_auth_failed": True},
-            )
-
-    controller.tool_registry._tools["shell_exec"] = BadPasswordShellTool()
-    result = controller.execution_engine.execute(
-        UserTask(input="обнови систему"),
-        ExecutionDirective(
-            type="plan",
-            payload={
-                "steps": [
-                    {
-                        "id": "1",
-                        "tool": "shell_exec",
-                        "args": {"command": "sudo apt update && apt upgrade -y"},
-                        "depends_on": [],
-                    }
-                ]
-            },
-        ),
-        permission_override=PermissionDecision(
-            action_type="shell_command",
-            pattern="apt",
-            decision="allow_once",
-        ),
-        secret_override="wrong",
-    )
-
-    assert result["status"] == "awaiting_input"
-    assert result["result"]["secret_request"]["kind"] == "sudo_password"
-    assert result["result"]["secret_request"]["prompt"] == "Sudo password incorrect. Try again"
-    assert result["result"]["attempt_failed"] is True
-
-
-def test_runtime_keeps_secret_state_after_bad_sudo_password(tmp_path: Path) -> None:
-    _write_config_tree(tmp_path)
-    controller = RuntimeController(base_dir=tmp_path)
-
-    class RetryPasswordShellTool:
-        calls = 0
-
-        def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult:
-            self.calls += 1
-            if self.calls == 1:
-                return ToolResult(
-                    tool="shell_exec",
-                    ok=False,
-                    output="Sorry, try again.\nsudo: no password was provided\nsudo: 1 incorrect password attempt\n",
-                    error="Command failed with exit code 1",
-                    metadata={"exit_code": 1, "sudo_auth_failed": True},
-                )
-            return ToolResult(
-                tool="shell_exec",
-                ok=True,
-                output="root\n",
-                metadata={"exit_code": 0},
-            )
-
-    controller.tool_registry._tools["shell_exec"] = RetryPasswordShellTool()
-    task = UserTask(
-        input="кто root",
-        context={
-            "requested_tool": "shell_exec",
-            "tool_args": {"command": "sudo whoami"},
-        },
-    )
-    initial = controller.handle_task(task)
-    assert initial["status"] == "awaiting_permission"
-    allowed = controller.resolve_permission(task_id=task.task_id, decision="allow_once")
-    assert allowed["status"] == "awaiting_input"
-
-    retry = controller.resolve_secret(task_id=task.task_id, secret="wrong")
-    assert retry["status"] == "awaiting_input"
-    assert retry["result"]["attempt_failed"] is True
-
-    final = controller.resolve_secret(task_id=task.task_id, secret="correct")
-    assert final["status"] == "completed"
-    assert final["result"]["output"] == "root\n"
-
-
-def test_permission_resolution_can_resume_task(tmp_path: Path) -> None:
-    _write_config_tree(tmp_path)
-    controller = RuntimeController(base_dir=tmp_path)
-    initial = controller.handle_task(
-        UserTask(
-            input="запусти sudo apt update",
-        )
-    )
-    assert initial["status"] == "awaiting_permission"
-    resumed = controller.resolve_permission(task_id=initial["task_id"], decision="deny")
-    assert resumed["status"] == "failed"
-    assert resumed["result"]["error"] == "Permission denied by user."
-
-
-def test_sudo_permission_resolution_requests_secret_input(tmp_path: Path) -> None:
-    _write_config_tree(tmp_path)
-    controller = RuntimeController(base_dir=tmp_path)
-    initial = controller.handle_task(UserTask(input="запусти sudo apt update"))
-    assert initial["status"] == "awaiting_permission"
-    resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once")
-    assert resumed["status"] == "awaiting_input"
-    assert resumed["result"]["secret_request"]["kind"] == "sudo_password"
-
-
-def test_implicit_sudo_command_requests_password(tmp_path: Path) -> None:
-    """Commands like 'apt list --upgradable' that require sudo but don't start with 'sudo'
-    should also trigger password request after permission is granted."""
-    _write_config_tree(tmp_path)
-    controller = RuntimeController(base_dir=tmp_path)
-    # apt list --upgradable requires root but doesn't start with 'sudo'
-    initial = controller.handle_task(
-        UserTask(
-            input="проверь обновления",
-            context={
-                "requested_tool": "shell_exec",
-                "tool_args": {"command": "apt list --upgradable"},
-            },
-        )
-    )
-    assert initial["status"] == "awaiting_permission"
-    # Grant permission — should request sudo password since apt requires root
-    resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once")
-    assert resumed["status"] == "awaiting_input"
-    assert resumed["result"]["secret_request"]["kind"] == "sudo_password"
-
-
-def test_secret_resolution_continues_after_pending_secret_saved(tmp_path: Path) -> None:
-    _write_config_tree(tmp_path)
-    controller = RuntimeController(base_dir=tmp_path)
-    initial = controller.handle_task(UserTask(input="запусти sudo apt update"))
-    assert initial["status"] == "awaiting_permission"
-    resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once")
-    assert resumed["status"] == "awaiting_input"
-    final = controller.resolve_secret(task_id=initial["task_id"], secret="wrongpass")
-    assert final["status"] in {"completed", "failed", "awaiting_input"}
-    assert "error" in final["result"] or "output" in final["result"]