From 4a84ada7703fc6b9b22938b1341bf4af589ddde4 Mon Sep 17 00:00:00 2001 From: mirivlad Date: Wed, 20 May 2026 01:00:28 +0800 Subject: [PATCH] Replace repository with DuckLM runtime --- .gitignore | 49 +- ARCHITECTURE.md | 347 --- CURRENT_STATE.md | 252 -- Ducklm.md | 2184 +++++++++++++++++ EXPERIMENT.md | 341 --- FOR_AI_REVIEW.md | 249 -- IMPLEMENTATION_PLAN.md | 534 ---- MVP_CHECKLIST.md | 83 - Makefile | 51 + README.md | 22 + TASK_3.md | 1255 ---------- app/__init__.py | 2 - app/api/__init__.py | 2 - app/api/server.py | 170 -- app/api/static/favicon.ico | Bin 16958 -> 0 bytes app/api/static/index.html | 1089 -------- app/cli/__init__.py | 2 - app/core/__init__.py | 2 - app/core/async_router.py | 542 ---- app/core/command_analyzer.py | 60 - app/core/config.py | 93 - app/core/context_builder.py | 172 -- app/core/contracts.py | 148 -- app/core/execution_engine.py | 975 -------- app/core/execution_scheduler.py | 212 -- app/core/intent_parser.py | 104 - app/core/permission_resolution.py | 24 - app/core/permission_service.py | 370 --- app/events/__init__.py | 2 - app/events/event_bus.py | 35 - app/events/event_store.py | 122 - app/events/event_types.py | 35 - app/memory/__init__.py | 24 - app/memory/interface.py | 155 -- app/memory/recall.py | 205 -- app/memory/store.py | 185 -- app/memory/vector_index.py | 149 -- app/memory/write_policy.py | 98 - app/models/__init__.py | 32 - app/models/adapters.py | 72 - app/models/async_adapters.py | 58 - app/models/coder.py | 44 - app/models/critic.py | 44 - app/models/embeddings.py | 37 - app/models/orchestrator.py | 45 - app/permissions/__init__.py | 2 - app/permissions/approval_store.py | 67 - app/runtime/__init__.py | 2 - app/runtime/async_runtime_loop.py | 148 -- app/runtime/runtime_controller.py | 643 ----- app/runtime/runtime_loop.py | 688 ------ app/services/__init__.py | 2 - app/state/__init__.py | 2 - app/state/checkpoint_store.py | 75 - app/state/task_state_store.py | 77 - app/streaming/__init__.py | 2 - app/streaming/manager.py | 46 - app/tools/__init__.py | 2 - app/tools/base.py | 24 - app/tools/discover.py | 83 - app/tools/file_read.py | 26 - app/tools/file_write.py | 27 - app/tools/memory_tools.py | 123 - app/tools/plugins/file_read/__init__.py | 35 - app/tools/plugins/file_read/manifest.json | 10 - app/tools/plugins/file_write/__init__.py | 33 - app/tools/plugins/file_write/manifest.json | 11 - app/tools/plugins/memory_tools/__init__.py | 112 - app/tools/plugins/memory_tools/manifest.json | 22 - app/tools/plugins/shell_exec/__init__.py | 55 - app/tools/plugins/shell_exec/manifest.json | 12 - app/tools/registry.py | 61 - app/tools/sandbox.py | 139 -- app/tools/shell_exec.py | 65 - config/models.json | 42 - config/models.json.backup | 42 - config/models.json.test | 42 - config/models.yaml | 53 + config/permissions.json | 94 - config/prompts.json | 15 - config/prompts/coder.md | 9 - config/prompts/critic.md | 14 - config/prompts/json_compiler.md | 25 - config/prompts/orchestrator.md | 34 - config/prompts/planning.md | 10 - config/prompts/sys_util.md | 41 - config/prompts/system.md | 14 - config/prompts/thinker.md | 36 - config/runtime.json | 42 - docker-compose.memory.yml | 11 + docs/architecture.md | 5 + docs/experience_learning.md | 9 + docs/how_to_run.md | 71 + docs/how_to_test.md | 15 + docs/local_llama_server.md | 44 + docs/memory_architecture.md | 5 + docs/model_roles.md | 7 + docs/performance_mtp.md | 5 + docs/plans/ui-bootstrap-review-plan.md | 24 - docs/skills.md | 9 + .../plans/2026-05-19-ducklm-runtime.md | 83 + docs/tool_gateway.md | 9 + docs/web_api.md | 25 + duck_core/__init__.py | 3 + duck_core/api.py | 348 +++ .../approvals/__init__.py | 0 duck_core/approvals/service.py | 143 ++ duck_core/config.py | 56 + duck_core/context_builder.py | 11 + .../.gitkeep => duck_core/events/__init__.py | 0 duck_core/events/store.py | 92 + .../experience/__init__.py | 0 duck_core/experience/recorder.py | 172 ++ .../.gitkeep => duck_core/memory/__init__.py | 0 duck_core/memory/policy.py | 20 + duck_core/memory/vector_memory.py | 70 + duck_core/model_client.py | 217 ++ duck_core/reflection.py | 29 + duck_core/runtime_loop.py | 197 ++ .../schemas/action_directive.schema.json | 55 + .../.gitkeep => duck_core/skills/__init__.py | 0 duck_core/skills/registry.py | 68 + duck_core/tasks/__init__.py | 1 + duck_core/tasks/state.py | 12 + duck_core/tasks/store.py | 115 + duck_core/tools/__init__.py | 1 + duck_core/tools/base.py | 18 + duck_core/tools/file_read.py | 36 + duck_core/tools/file_write.py | 40 + duck_core/tools/gateway.py | 31 + duck_core/tools/paths.py | 13 + duck_core/tools/shell_exec_safe.py | 95 + duck_core/web/static/app.js | 510 ++++ duck_core/web/static/style.css | 673 +++++ duck_core/web/templates/approvals.html | 22 + duck_core/web/templates/experience.html | 2 + duck_core/web/templates/index.html | 99 + duck_core/web/templates/memory.html | 2 + duck_core/web/templates/skills.html | 2 + duck_core/web/templates/task.html | 2 + main.py | 5 - prompts/roles/action.md | 16 + prompts/roles/coder.md | 2 + prompts/roles/critic.md | 2 + prompts/roles/summary.md | 1 + prompts/roles/thinker.md | 7 + pyproject.toml | 42 +- scripts/bench/bench_runtime.py | 34 + scripts/llama/build_vulkan.sh | 19 + scripts/llama/healthcheck.sh | 8 + scripts/llama/start_main.sh | 260 ++ .../llama/start_thinker_mtp_experimental.sh | 117 + scripts/server.sh | 81 - scripts/verify/verify_basic_chat.sh | 13 + scripts/verify/verify_experience.sh | 5 + scripts/verify/verify_file_write_read.sh | 14 + scripts/verify/verify_memory.sh | 5 + scripts/verify/verify_models_roles.sh | 5 + scripts/verify/verify_skills.sh | 5 + scripts/verify/verify_tool_blocking.sh | 14 + server.err | 274 --- server.out | 254 -- server.pid | 1 - skills/analyze_project/examples.md | 5 + skills/analyze_project/notes.md | 3 + skills/analyze_project/procedure.md | 6 + skills/analyze_project/skill.yaml | 23 + test_ducklm.py | 314 --- test_ducklm_direct.py | 409 --- tests/smoke/test_action_directive_schema.py | 16 + tests/smoke/test_api_health.py | 25 + tests/smoke/test_api_stream_chat.py | 103 + tests/smoke/test_approvals.py | 18 + tests/smoke/test_chat_api.py | 96 + tests/smoke/test_event_log.py | 25 + tests/smoke/test_experience_recorder.py | 24 + tests/smoke/test_llama_server_connection.py | 13 + tests/smoke/test_llama_service_script.py | 57 + tests/smoke/test_model_client.py | 92 + tests/smoke/test_models_config.py | 16 + tests/smoke/test_runtime_reasoning.py | 37 + tests/smoke/test_runtime_tools.py | 112 + tests/smoke/test_skill_registry.py | 9 + tests/smoke/test_tool_gateway.py | 42 + tests/smoke/test_vector_memory.py | 11 + tests/test_api_handlers.py | 122 - tests/test_command_analyzer.py | 46 - tests/test_contracts.py | 67 - tests/test_runtime_loop.py | 38 - tests/test_tools_flow.py | 489 ---- 190 files changed, 7060 insertions(+), 13602 deletions(-) delete mode 100644 ARCHITECTURE.md delete mode 100644 CURRENT_STATE.md create mode 100644 Ducklm.md delete mode 100644 EXPERIMENT.md delete mode 100644 FOR_AI_REVIEW.md delete mode 100644 IMPLEMENTATION_PLAN.md delete mode 100644 MVP_CHECKLIST.md create mode 100644 Makefile create mode 100644 README.md delete mode 100644 TASK_3.md delete mode 100644 app/__init__.py delete mode 100644 app/api/__init__.py delete mode 100644 app/api/server.py delete mode 100644 app/api/static/favicon.ico delete mode 100644 app/api/static/index.html delete mode 100644 app/cli/__init__.py delete mode 100644 app/core/__init__.py delete mode 100644 app/core/async_router.py delete mode 100644 app/core/command_analyzer.py delete mode 100644 app/core/config.py delete mode 100644 app/core/context_builder.py delete mode 100644 app/core/contracts.py delete mode 100644 app/core/execution_engine.py delete mode 100644 app/core/execution_scheduler.py delete mode 100644 app/core/intent_parser.py delete mode 100644 app/core/permission_resolution.py delete mode 100644 app/core/permission_service.py delete mode 100644 app/events/__init__.py delete mode 100644 app/events/event_bus.py delete mode 100644 app/events/event_store.py delete mode 100644 app/events/event_types.py delete mode 100644 app/memory/__init__.py delete mode 100644 app/memory/interface.py delete mode 100644 app/memory/recall.py delete mode 100644 app/memory/store.py delete mode 100644 app/memory/vector_index.py delete mode 100644 app/memory/write_policy.py delete mode 100644 app/models/__init__.py delete mode 100644 app/models/adapters.py delete mode 100644 app/models/async_adapters.py delete mode 100644 app/models/coder.py delete mode 100644 app/models/critic.py delete mode 100644 app/models/embeddings.py delete mode 100644 app/models/orchestrator.py delete mode 100644 app/permissions/__init__.py delete mode 100644 app/permissions/approval_store.py delete mode 100644 app/runtime/__init__.py delete mode 100644 app/runtime/async_runtime_loop.py delete mode 100644 app/runtime/runtime_controller.py delete mode 100644 app/runtime/runtime_loop.py delete mode 100644 app/services/__init__.py delete mode 100644 app/state/__init__.py delete mode 100644 app/state/checkpoint_store.py delete mode 100644 app/state/task_state_store.py delete mode 100644 app/streaming/__init__.py delete mode 100644 app/streaming/manager.py delete mode 100644 app/tools/__init__.py delete mode 100644 app/tools/base.py delete mode 100644 app/tools/discover.py delete mode 100644 app/tools/file_read.py delete mode 100644 app/tools/file_write.py delete mode 100644 app/tools/memory_tools.py delete mode 100644 app/tools/plugins/file_read/__init__.py delete mode 100644 app/tools/plugins/file_read/manifest.json delete mode 100644 app/tools/plugins/file_write/__init__.py delete mode 100644 app/tools/plugins/file_write/manifest.json delete mode 100644 app/tools/plugins/memory_tools/__init__.py delete mode 100644 app/tools/plugins/memory_tools/manifest.json delete mode 100644 app/tools/plugins/shell_exec/__init__.py delete mode 100644 app/tools/plugins/shell_exec/manifest.json delete mode 100644 app/tools/registry.py delete mode 100644 app/tools/sandbox.py delete mode 100644 app/tools/shell_exec.py delete mode 100644 config/models.json delete mode 100644 config/models.json.backup delete mode 100644 config/models.json.test create mode 100644 config/models.yaml delete mode 100644 config/permissions.json delete mode 100644 config/prompts.json delete mode 100644 config/prompts/coder.md delete mode 100644 config/prompts/critic.md delete mode 100644 config/prompts/json_compiler.md delete mode 100644 config/prompts/orchestrator.md delete mode 100644 config/prompts/planning.md delete mode 100644 config/prompts/sys_util.md delete mode 100644 config/prompts/system.md delete mode 100644 config/prompts/thinker.md delete mode 100644 config/runtime.json create mode 100644 docker-compose.memory.yml create mode 100644 docs/architecture.md create mode 100644 docs/experience_learning.md create mode 100644 docs/how_to_run.md create mode 100644 docs/how_to_test.md create mode 100644 docs/local_llama_server.md create mode 100644 docs/memory_architecture.md create mode 100644 docs/model_roles.md create mode 100644 docs/performance_mtp.md delete mode 100644 docs/plans/ui-bootstrap-review-plan.md create mode 100644 docs/skills.md create mode 100644 docs/superpowers/plans/2026-05-19-ducklm-runtime.md create mode 100644 docs/tool_gateway.md create mode 100644 docs/web_api.md create mode 100644 duck_core/__init__.py create mode 100644 duck_core/api.py rename data/.gitkeep => duck_core/approvals/__init__.py (100%) create mode 100644 duck_core/approvals/service.py create mode 100644 duck_core/config.py create mode 100644 duck_core/context_builder.py rename data/events/.gitkeep => duck_core/events/__init__.py (100%) create mode 100644 duck_core/events/store.py rename data/memory/.gitkeep => duck_core/experience/__init__.py (100%) create mode 100644 duck_core/experience/recorder.py rename data/permissions/.gitkeep => duck_core/memory/__init__.py (100%) create mode 100644 duck_core/memory/policy.py create mode 100644 duck_core/memory/vector_memory.py create mode 100644 duck_core/model_client.py create mode 100644 duck_core/reflection.py create mode 100644 duck_core/runtime_loop.py create mode 100644 duck_core/schemas/action_directive.schema.json rename data/state/.gitkeep => duck_core/skills/__init__.py (100%) create mode 100644 duck_core/skills/registry.py create mode 100644 duck_core/tasks/__init__.py create mode 100644 duck_core/tasks/state.py create mode 100644 duck_core/tasks/store.py create mode 100644 duck_core/tools/__init__.py create mode 100644 duck_core/tools/base.py create mode 100644 duck_core/tools/file_read.py create mode 100644 duck_core/tools/file_write.py create mode 100644 duck_core/tools/gateway.py create mode 100644 duck_core/tools/paths.py create mode 100644 duck_core/tools/shell_exec_safe.py create mode 100644 duck_core/web/static/app.js create mode 100644 duck_core/web/static/style.css create mode 100644 duck_core/web/templates/approvals.html create mode 100644 duck_core/web/templates/experience.html create mode 100644 duck_core/web/templates/index.html create mode 100644 duck_core/web/templates/memory.html create mode 100644 duck_core/web/templates/skills.html create mode 100644 duck_core/web/templates/task.html delete mode 100644 main.py create mode 100644 prompts/roles/action.md create mode 100644 prompts/roles/coder.md create mode 100644 prompts/roles/critic.md create mode 100644 prompts/roles/summary.md create mode 100644 prompts/roles/thinker.md create mode 100644 scripts/bench/bench_runtime.py create mode 100755 scripts/llama/build_vulkan.sh create mode 100755 scripts/llama/healthcheck.sh create mode 100755 scripts/llama/start_main.sh create mode 100755 scripts/llama/start_thinker_mtp_experimental.sh delete mode 100755 scripts/server.sh create mode 100755 scripts/verify/verify_basic_chat.sh create mode 100755 scripts/verify/verify_experience.sh create mode 100755 scripts/verify/verify_file_write_read.sh create mode 100755 scripts/verify/verify_memory.sh create mode 100755 scripts/verify/verify_models_roles.sh create mode 100755 scripts/verify/verify_skills.sh create mode 100755 scripts/verify/verify_tool_blocking.sh delete mode 100644 server.err delete mode 100644 server.out delete mode 100644 server.pid create mode 100644 skills/analyze_project/examples.md create mode 100644 skills/analyze_project/notes.md create mode 100644 skills/analyze_project/procedure.md create mode 100644 skills/analyze_project/skill.yaml delete mode 100755 test_ducklm.py delete mode 100644 test_ducklm_direct.py create mode 100644 tests/smoke/test_action_directive_schema.py create mode 100644 tests/smoke/test_api_health.py create mode 100644 tests/smoke/test_api_stream_chat.py create mode 100644 tests/smoke/test_approvals.py create mode 100644 tests/smoke/test_chat_api.py create mode 100644 tests/smoke/test_event_log.py create mode 100644 tests/smoke/test_experience_recorder.py create mode 100644 tests/smoke/test_llama_server_connection.py create mode 100644 tests/smoke/test_llama_service_script.py create mode 100644 tests/smoke/test_model_client.py create mode 100644 tests/smoke/test_models_config.py create mode 100644 tests/smoke/test_runtime_reasoning.py create mode 100644 tests/smoke/test_runtime_tools.py create mode 100644 tests/smoke/test_skill_registry.py create mode 100644 tests/smoke/test_tool_gateway.py create mode 100644 tests/smoke/test_vector_memory.py delete mode 100644 tests/test_api_handlers.py delete mode 100644 tests/test_command_analyzer.py delete mode 100644 tests/test_contracts.py delete mode 100644 tests/test_runtime_loop.py delete mode 100644 tests/test_tools_flow.py diff --git a/.gitignore b/.gitignore index 5000599..3ec2920 100644 --- a/.gitignore +++ b/.gitignore @@ -1,41 +1,22 @@ -# Python -__pycache__/ -*.py[cod] -*.pyo -.pytest_cache/ -.mypy_cache/ -.ruff_cache/ - -# Virtual environments -.venv/ -venv/ -env/ - -# Local environment and secrets .env .env.* !.env.example -config/.env -config/.env.* -*.pem -*.key +.venv/ +__pycache__/ +*.py[cod] +.pytest_cache/ +.ruff_cache/ +*.egg-info/ -# Local models and embeddings -/models/ -*.gguf -*.safetensors -*.bin +data/ +workspace/ +models/ +vendor/ -# Runtime state -data/**/*.sqlite3 -data/**/*.sqlite3-* -data/runtime/*.json -data/runtime/*.pid -data/runtime/*.log -logs/ *.log +*.pid +*.sqlite3 +*.db -# OS/editor -.DS_Store -.idea/ -.vscode/ +dist/ +build/ diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md deleted file mode 100644 index 139ca70..0000000 --- a/ARCHITECTURE.md +++ /dev/null @@ -1,347 +0,0 @@ -# ARCHITECTURE - -Этот документ фиксирует целевую архитектуру `ducklm` как локального event-driven multi-model execution runtime. - -`TASK_3.md` — это директива для ИИ-кодера. -`ARCHITECTURE.md` — это короткая инженерная карта системы: что является ядром, какие есть слои, как течёт управление, где принимаются решения, а где только исполняются переходы. - -## 1. Core Principle - -Система строится вокруг `Runtime Loop Controller`. - -Центр системы: - -- не `router` -- не `orchestrator` -- не `execution engine` - -Центр системы: - -- `runtime loop` - -Именно он замыкает жизненный цикл задачи: - -```text -task - -> state load/create - -> context build - -> orchestration decision - -> plan/directive - -> execution - -> critic - -> memory policy - -> checkpoint - -> next step / complete / fail -``` - -## 2. Layer Model - -Целевая форма системы: - -```text -Client / CLI / API - | - v -Runtime Loop Controller - | - +--> State Store / Checkpoints - +--> Context Builder - +--> Router - +--> Orchestrator / Planner - +--> Execution Engine / Scheduler - | | - | +--> Tool Layer - | +--> Coder - | - +--> Critic - +--> Memory Write Policy - +--> Memory Store + Vector Index - +--> Event Bus + Event Store - +--> Streaming Projection -``` - -Принцип: - -- `runtime loop` координирует -- `router` рекомендует -- `orchestrator` думает -- `execution engine` исполняет -- `tools/coder` делают работу -- `critic` оценивает -- `memory policy` решает запись -- `event bus` фиксирует историю -- `state store` даёт resume - -## 3. Responsibility Boundaries - -### Runtime Loop Controller - -Отвечает за: - -- task lifecycle -- state transitions -- вызов компонентов в правильном порядке -- применение decision objects -- checkpointing -- completion / failure path - -Не отвечает за: - -- policy reasoning -- raw tool execution -- prompt assembly inline - -### Router - -Это `policy evaluator + decision suggester`. - -Контракт: - -```text -(input state + assembled context) -> ExecutionDirective -``` - -Свойства: - -- pure function -- no side effects -- no tool execution -- no state mutation - -### Orchestrator / Planner - -Отвечает за: - -- orchestration reasoning -- deciding whether planning is needed -- generating plan JSON -- returning structured directives - -Не отвечает за: - -- execution -- direct state mutation -- tool invocation - -### Execution Engine / Scheduler - -Отвечает за: - -- step scheduling -- task graph traversal -- step execution coordination -- calling tool/coder adapters -- reporting structured results - -Не отвечает за: - -- ownership of global lifecycle -- high-level policy - -### Critic - -Отвечает за: - -- evaluation of tool/coder outputs -- returning structured scores and explanation - -Не отвечает за: - -- final memory write decision -- execution retry policy - -### Memory Write Policy - -Отвечает за: - -- deterministic decision about storing memory -- dedup / merge / skip behavior - -Не отвечает за: - -- semantic retrieval -- critic scoring - -## 4. Decision Model - -Все decision-producing components должны возвращать структурированные объекты. - -Базовый контракт: - -```json -{ - "type": "plan|tool|coder|respond|replan|store_memory|request_permission|complete|fail|noop", - "payload": {}, - "requires_permission": false, - "confidence": 0.0, - "reason": "string" -} -``` - -Это главный антихаосный инвариант системы. - -Следствие: - -- компоненты не исполняют решения напрямую -- компоненты не мутируют state напрямую -- runtime loop применяет решения и переводит систему дальше - -## 5. Execution Flow - -Нормальный путь выполнения: - -1. Клиент отправляет task. -2. Runtime loop создаёт или загружает task state. -3. Публикуется `task_received`. -4. Context builder собирает execution context. -5. Router возвращает decision object. -6. Orchestrator возвращает direct action или plan. -7. План валидируется и преобразуется в task graph. -8. Execution engine выбирает следующий шаг. -9. Tool или coder исполняет шаг через adapter. -10. Result возвращается в runtime loop. -11. Critic возвращает evaluation suggestion. -12. Memory policy возвращает decision по записи. -13. State checkpoint сохраняется. -14. Event bus фиксирует события. -15. Runtime loop выбирает `continue / replan / complete / fail`. - -## 6. Task Graph Model - -Внешний planner может вернуть список шагов. - -Внутри runtime план должен жить как task graph: - -```json -{ - "nodes": [ - { - "id": "step-1", - "kind": "tool", - "tool": "shell_exec", - "args": {"command": "hostnamectl"}, - "depends_on": [] - } - ] -} -``` - -Сейчас допускается sequential DAG execution. -В будущем это даёт путь к parallel scheduling без переписывания модели. - -## 7. Event Backbone - -Система event-driven. - -`EventBus` нужен не только для стриминга, а как внутренняя хребтовая шина. - -Минимальные свойства: - -- ordering per task -- monotonic sequence per task -- durable append to event store -- replay capability -- consumer idempotency - -Минимальная модель доставки: - -- `at least once` - -Правило идемпотентности: - -- событие дедуплицируется по `task_id + sequence` - -Streaming layer — это projection от event bus, а не источник правды. - -## 8. State Persistence - -Так как runtime задуман как long-running autonomous system, in-memory lifecycle недостаточен. - -Нужны: - -- task state store -- checkpoint store -- resume from crash/restart - -Минимальная стратегия: - -- checkpoint after critical transitions -- latest valid checkpoint is resumable - -Primary choice для MVP: - -- `SQLite` - -## 9. Async and Isolation - -LLM loop не должен блокироваться долгими tool operations. - -Поэтому нужны: - -- async execution adapters -- timeout wrappers -- cancellation handling -- bounded concurrency - -Для опасных или тяжёлых операций нужен отдельный sandbox layer. - -Особенно для: - -- `shell_exec` -- browser/web fallback -- generated helper scripts - -## 10. Memory Architecture - -Memory — отдельная подсистема хранения, а не JSON dump. - -Рекомендуемая форма: - -- metadata store: `SQLite` -- vector index: `FAISS` или `hnswlib` - -Два разных процесса: - -- retrieval -- write decision - -Это специально разделено. - -`critic` только оценивает. -`memory write policy` принимает финальное решение. - -Минимальная логика записи должна быть детерминированной: - -```text -(critic_score + memory_type + runtime_weight + dedup_state + safety_state) -> decision -``` - -## 11. Failure Model - -Система должна быть устойчивой к частичным сбоям. - -Ожидаемые controlled failure paths: - -- invalid planner output -> replan or fail -- tool timeout -> retry or fail -- critic failure -> fallback policy -- memory failure -> skip write and continue where safe -- streaming failure -> sync fallback - -Главный принцип: - -- subsystem failure не должен автоматически означать runtime collapse - -## 12. Why This Shape - -Эта архитектура нужна, чтобы система не деградировала в один из плохих вариантов: - -- `router-god-object` -- `runtime loop with hidden policy logic` -- `LLM that directly executes tools` -- `streaming instead of event model` -- `critic as memory authority` -- `in-memory only autonomous runtime` - -Если держать эти границы жёстко, проект остаётся расширяемым. -Если границы размыть, система быстро превратится в трудноотлаживаемый procedural agent. diff --git a/CURRENT_STATE.md b/CURRENT_STATE.md deleted file mode 100644 index b362368..0000000 --- a/CURRENT_STATE.md +++ /dev/null @@ -1,252 +0,0 @@ -# DuckLM — Текущее состояние проекта - -## 1. Что это - -DuckLM — локальный event-driven multi-model AI agent runtime. Система принимает пользовательскую задачу, извлекает релевантную память, собирает контекст, принимает orchestration-решение, при необходимости строит план, исполняет шаги через tools и coder, оценивает результаты через critic, сохраняет полезное в долговременную память, публикует события и поддерживает streaming клиенту. - -**Ключевой принцип:** центр системы — `RuntimeLoop`. Все execution transitions проходят через него. Router, Orchestrator, ExecutionEngine — decision-producing компоненты, которые только возвращают структурированные объекты (ExecutionDirective), но не исполняют действия напрямую. - -## 2. Архитектура - -``` -Client / CLI / API - │ - ▼ -RuntimeLoop (runtime_loop.py) - │ - ├── State Store / Checkpoints (SQLite) - ├── ContextBuilder - ├── AsyncRouter (Thinker → JSON Compiler) - ├── ExecutionEngine / ExecutionScheduler - │ ├── ToolRegistry / ToolSandbox - │ ├── CoderAdapter - │ └── CriticAdapter - ├── PermissionService - ├── MemoryRecallService - ├── MemoryWritePolicy - ├── MemoryInterface (SQLite + hnswlib) - └── EventBus → SQLiteEventStore - │ - ▼ - StreamingManager → WebSocket -``` - -## 3. Структура проекта - -``` -ducklm/ - main.py # Точка входа (импорт app.api.server.app) - app/ - api/ - server.py # FastAPI: POST /chat, WS /stream, GET /health, etc. - static/index.html # Веб-чат (dark theme, Enter=отправить, Shift+Enter=новая строка) - cli/__init__.py # Пока пустой - core/ - contracts.py # Pydantic модели: UserTask, PlanStep, ToolResult, CriticScore, ... - config.py # AppConfig, load_app_config() - async_router.py # AsyncRouter: Thinker + JSON Compiler pipeline - context_builder.py # ContextBuilder: сборка контекста с бюджетами - execution_engine.py # ExecutionEngine: исполнение plan/tool/respond/coder - execution_scheduler.py # ExecutionScheduler: парсинг плана, граф задач, цикл выполнения - intent_parser.py # IntentParser: извлечение tool intents из текста - permission_service.py # PermissionService: проверка и разрешений команд - permission_resolution.py # Pydantic модели для API разрешений - events/ - event_bus.py # EventBus: per-task ordered publishing - event_store.py # SQLiteEventStore: append-only log - event_types.py # Константы типов событий - memory/ - interface.py # MemoryInterface: insert/search/get/delete/reindex/cleanup - store.py # MemoryStore: SQLite хранение MemoryEntry + embeddings - vector_index.py # VectorIndex: hnswlib L2 index - recall.py # MemoryRecallService: LLM-based решение о необходимости recall - write_policy.py # MemoryWritePolicy: детерминированное решение о записи - models/ - adapters.py # create_adapter/create_llama_adapter (llama-cpp-python) - async_adapters.py # AsyncOrchestratorAdapter, AsyncCoderAdapter, AsyncCriticAdapter - orchestrator.py # OrchestratorAdapter: обёртка над Llama - coder.py # CoderAdapter - critic.py # CriticAdapter - embeddings.py # EmbeddingsAdapter (sentence-transformers) - permissions/ - approval_store.py # SQLiteApprovalStore - runtime/ - runtime_loop.py # RuntimeLoop: центральный цикл (sync) - async_runtime_loop.py # AsyncRuntimeLoop: альтернативная async версия - runtime_controller.py # RuntimeController: composition root, инициализация всего - services/__init__.py # Пустой - state/ - task_state_store.py # SQLiteTaskStateStore - checkpoint_store.py # SQLiteCheckpointStore - streaming/ - manager.py # StreamingManager: подписка на события → WebSocket - tools/ - base.py, registry.py, sandbox.py, discover.py - shell_exec.py, file_read.py, file_write.py, memory_tools.py - plugins/ # Plugin discovery: shell_exec, file_read, file_write, memory_tools - config/ - models.json # Конфигурация моделей - runtime.json # Таймауты, retry limits, context budgets - permissions.json # Категории команд, пути - prompts/ # Markdown промпты для каждой роли - thinker.md, json_compiler.md, coder.md, critic.md, sys_util.md, orchestrator.md, planning.md, system.md - data/ - events/events.sqlite3 # Event store - state/task_state.sqlite3 # Task state - state/checkpoints.sqlite3 # Checkpoints - permissions/approvals.sqlite3 # Permission cache - memory/memory.sqlite3 # Memory store - memory/index.bin # Vector index - models/ # GGUF модели и sentence-transformers - tests/ - test_contracts.py # 6 тестов: контракты, router - test_runtime_loop.py # 2 теста: runtime loop events, permission flow - test_tools_flow.py # 7 тестов: file read/write, shell, recovery, permissions - test_api_handlers.py # 6 тестов: health, events, chat, permissions, feedback -``` - -## 4. Модели и их роли - -| Роль | Модель | Backend | Конфиг | -|------|--------|---------|--------| -| Thinker (orchestrator) | Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf | vulkan (GPU) | max_tokens=2048, temp=0.3 | -| JSON Compiler | gemma-4-E4B-it-Q4_K_M.gguf | cpu | max_tokens=1024, temp=0.1 | -| Critic | gemma-4-E4B-it-Q4_K_M.gguf (shared с compiler) | cpu | max_tokens=1024, temp=0.1 | -| Coder | X-Coder-SFT-Qwen3-8B.Q6_K.gguf | cpu | max_tokens=2048, temp=0.2 | -| Sys Utility | Menlo_Lucy-Q4_K_M.gguf | cpu | max_tokens=1024, temp=0.1 | -| Embeddings | all-MiniLM-L6-v2 (sentence-transformers) | — | dim=384 | - -**Важно:** Critic и JSON Compiler используют одну и ту же модель (gemma-4B), но разные экземпляры адаптеров. Модели не дублируются в памяти — используется кэширование через `_get_or_create_llm()` с ключом (path, backend, n_gpu_layers, n_ctx). - -## 5. Конфигурация - -Все настройки в `config/`: -- **models.json** — пути к GGUF файлам, backend, GPU layers, max_tokens, temperature -- **runtime.json** — таймауты (step=30s, task=5min), retry limits, context budgets, retrieval_top_k -- **permissions.json** — hard_stop команды (rm -rf /, dd, mkfs), no_always команды (shutdown, killall), normal команды -- **prompts/*.md** — системные промпты для каждой роли модели - -## 6. API - -FastAPI сервер на порту 8000 (`scripts/server.sh`): - -| Метод | Путь | Описание | -|-------|------|----------| -| GET | `/` | Веб-чат (index.html) | -| GET | `/health` | Health check | -| GET | `/events` | Список последних событий | -| POST | `/chat` | Отправить задачу (UserTask) → получить результат | -| POST | `/permissions/resolve` | Разрешить/запретить команду | -| POST | `/secrets/resolve` | Передать sudo-пароль | -| POST | `/password/resolve` | Передать пароль (альтернативный путь) | -| POST | `/critic/feedback` | Обратная связь от пользователя | -| WS | `/stream/{task_id}` | Streaming событий по задаче | - -## 7. Поток выполнения задачи - -1. Клиент → POST /chat → `RuntimeController.handle_task()` -2. `RuntimeLoop.run_task()`: - - Проверка hard-stop команд через PermissionService - - Создание task state в SQLiteTaskStateStore - - Публикация TASK_RECEIVED - - Checkpoint: received - - ContextBuilder.build() — сборка контекста (memory, tools, budgets) - - MemoryRecallService.recall() — LLM решает, нужно ли искать в памяти - - AsyncRouter.decide() — Thinker → JSON Compiler → ExecutionDirective - - ExecutionEngine.execute() — исполнение directive: - - plan → парсинг шагов → граф → последовательное выполнение - - tool → проверка разрешений → ToolSandbox → ToolResult - - respond → прямой ответ - - coder → CoderAdapter - - Critic оценка каждого шага (correctness, usefulness, safety) - - Recovery при неудачных шагах (retry/continue/respond/fail) - - MemoryWritePolicy — решение о записи в долговременную память - - Checkpoint: final state - - Публикация TASK_COMPLETED / TASK_FAILED / TASK_AWAITING_PERMISSION -3. Результат возвращается клиенту + события доступны через WebSocket - -## 8. Что реализовано и работает - -### Core (полностью) -- [x] Модульная структура проекта (app/, config/, data/, tests/) -- [x] Typed contracts (Pydantic модели для всех сущностей) -- [x] RuntimeLoop — центральный цикл -- [x] RuntimeController — composition root -- [x] EventBus + SQLiteEventStore (append-only, per-task ordering) -- [x] TaskStateStore + CheckpointStore (SQLite) -- [x] ContextBuilder с token budgets -- [x] AsyncRouter: Thinker → JSON Compiler pipeline с retry и JSON fix -- [x] IntentParser: извлечение tool intents из естественного языка -- [x] ExecutionEngine: plan/tool/respond/coder/fail -- [x] ExecutionScheduler: парсинг плана, DAG граф, cycle detection -- [x] PermissionService: hard_stop/no_always/normal категории, кэш разрешений -- [x] ToolSandbox: timeout, cwd restrictions -- [x] ToolRegistry + Plugin Discovery -- [x] Tools: shell_exec, file_read, file_write, memory_insert/search/list -- [x] CriticAdapter с retry и recovery (continue/retry/respond/fail) -- [x] MemoryInterface: SQLite + hnswlib vector index -- [x] MemoryRecallService: LLM-based решение о необходимости recall -- [x] MemoryWritePolicy: детерминированное решение о записи -- [x] EmbeddingsAdapter (sentence-transformers) -- [x] FastAPI API: /chat, /health, /events, /permissions/resolve, /secrets/resolve, /critic/feedback -- [x] WebSocket streaming (/stream/{task_id}) -- [x] Веб-чат (dark theme, Enter=отправить, Shift+Enter=новая строка, панель событий, permission controls, feedback dialog) -- [x] 21 тест (все проходят) - -### Известные баги (исправлены) -- RECALL_PROMPT_TEMPLATE format string escaping — фигурные скобки в JSON примерах нужно двоить -- VectorIndex._get_memory_id возвращал неправильный ID (hash вместо хранения mapping) -- recall_model по умолчанию был sys_util, изменён на json_compiler - -## 9. Что ещё нужно сделать - -### Приоритет 1 — Доработка до полного MVP -- [ ] **Resume из checkpoint** — после падения/перезапуска восстанавливать задачу из последнего checkpoint -- [ ] **CLI интерфейс** — отправка задач, просмотр событий, поиск в памяти из терминала (app/cli/ пока пустой) -- [ ] **Structured logging** — вместо print() использовать logging с форматированием -- [ ] **WS /stream** — доработать (сейчас базово работает, но нет подписки на новые события в реальном времени при длительных задачах) - -### Приоритет 2 — Улучшения -- [ ] **Retry/recovery policy** — более надёжная обработка ошибок tool execution -- [ ] **Replay из event store** — воспроизведение истории задачи для отладки -- [ ] **Параллельное выполнение шагов** — сейчас только sequential DAG, можно добавить parallel для независимых шагов -- [ ] **Веб-чат: отображение streaming ответа** — сейчас ответ приходит целиком, можно добавить потоковую передачу -- [ ] **Веб-чат: отображение tool output** — более красивый рендер результатов shell/file операций -- [ ] **Memory cleanup** — автоматическая очистка старых/низко-весовых записей (базовая логика есть в MemoryInterface.cleanup, но не вызывается автоматически) - -### Приоритет 3 — Расширения -- [ ] **web_search / web_fetch tools** — второй приоритет по TASK_3.md -- [ ] **Telegram bot stub** — thin клиент для удалённого управления -- [ ] **Coder integration в план** — пока coder adapter есть, но не интегрирован в планирование как отдельный step kind -- [ ] **Модели: загрузка при старте** — load_models_at_startup() вызывается из lifespan, но если модели не загружены, runtime работает в fallback mode (respond only) -- [ ] **Документация API** — OpenAPI схема генерируется FastAPI, но можно добавить примеры - -## 10. Запуск - -```bash -cd ~/git/ducklm -./scripts/server.sh -# или -uvicorn main:app --host 0.0.0.0 --port 8000 -``` - -Веб-чат: http://localhost:8000/ - -## 11. Тестирование - -```bash -cd ~/git/ducklm -python -m pytest tests/ -v -``` - -21 тест, все проходят. Покрытие: контракты, runtime loop, tool flow, API handlers. - -## 12. Технологии - -- **Python 3.13**, FastAPI, uvicorn, websockets -- **llama-cpp-python** — локальный инференс GGUF моделей (Vulkan/CPU) -- **sentence-transformers** — эмбеддинги (all-MiniLM-L6-v2) -- **hnswlib** — векторный поиск (L2 метрика) -- **SQLite** — event store, task state, checkpoints, memory, permissions -- **Pydantic** — все контракты -- **pytest** — тестирование diff --git a/Ducklm.md b/Ducklm.md new file mode 100644 index 0000000..6c911b9 --- /dev/null +++ b/Ducklm.md @@ -0,0 +1,2184 @@ +# DuckLM — техническое задание на разработку локальной агентной системы + +## 0. Назначение проекта + +`DuckLM` — локальная агентная система, которая работает как самостоятельный runtime поверх локальных языковых моделей. + +Система должна уметь: + +- принимать сообщения от человека через WebChat; +- принимать задачи от внешних агентов и тестов через HTTP API; +- использовать локальные LLM через `llama-server`; +- вести состояние задач; +- записывать события выполнения; +- безопасно запускать инструменты; +- работать с навыками; +- сохранять опыт; +- использовать память; +- анализировать собственные ошибки; +- постепенно улучшать поведение через опыт и предложения по обновлению навыков. + +Главная идея: + +```text +DuckLM — это не inference server. + +DuckLM — это когнитивный runtime: +состояние → контекст → мышление → намерение → действие → наблюдение → рефлексия → память → опыт. +``` + +--- + +# 1. Архитектурные принципы + +## 1.1. Использовать готовые компоненты + +DuckLM должна использовать готовые решения там, где это разумно. + +```text +llama-server → inference +SQLite/PostgreSQL → события, задачи, approvals, experience records +Qdrant → semantic memory +FastAPI → HTTP API +WebChat → интерфейс человека +ToolGateway → безопасный запуск инструментов +Duck Core → когнитивный цикл +``` + +Не писать с нуля: + +- LLM inference server; +- model scheduler; +- vector database; +- OpenAI-compatible API; +- MCP-протокол; +- production-grade sandbox; +- сложный workflow engine; +- бесконечный JSON repair loop. + +Писать с нуля: + +- Duck Core; +- ModelClient; +- ContextBuilder; +- RuntimeLoop; +- EventStore; +- TaskStore; +- ToolGateway; +- ApprovalService; +- SkillRegistry; +- ExperienceRecorder; +- MemoryPolicy; +- FastAPI API; +- WebChat; +- verification scripts; +- smoke tests; +- документацию. + +--- + +## 1.2. Web/API first + +Основные интерфейсы: + +```text +WebChat → для человека +HTTP API → для кодера, тестов и внешних агентов +``` + +CLI в обязательную часть не входит. + +Если позже понадобится CLI, он должен быть тонким клиентом поверх HTTP API. + +--- + +## 1.3. Роли моделей логические + +Роли моделей: + +```text +thinker +critic +coder +action +recall +summary +sys_util +``` + +являются логическими ролями, а не обязательно разными физическими моделями. + +Одна физическая модель может использоваться сразу для всех ролей: + +```text +thinker = local-main +critic = local-main +coder = local-main +action = local-main +recall = local-main +summary = local-main +``` + +Различие между ролями задаётся комбинацией: + +- system prompt; +- temperature; +- max_output_tokens; +- response_format; +- structured_output; +- memory scope; +- tool permissions; +- context builder mode; +- inference endpoint. + +Пример: + +```text +thinker — свободное рассуждение, temperature 0.4 +critic — проверка и рефлексия, temperature 0.1 +coder — code-oriented prompt, temperature 0.2 +action — strict JSON schema, temperature 0.0 +summary — сжатие контекста, temperature 0.1 +``` + +Код не должен предполагать, что разные роли используют разные модели. + +Правильно: + +```python +await model_client.chat(role="thinker", ...) +await model_client.chat(role="critic", ...) +await model_client.chat(role="coder", ...) +await model_client.chat(role="action", response_format=...) +``` + +`ModelClient` по конфигу решает: + +```text +какой base_url использовать +какое имя модели передать +какую температуру поставить +какой system prompt применить +какой max_output_tokens поставить +нужен ли response_format +``` + +--- + +# 2. Параметры модели + +## 2.1. Request-level параметры + +Эти параметры можно менять на каждый запрос без перезапуска модели: + +- system prompt; +- messages; +- temperature; +- top_p; +- top_k; +- min_p; +- max_output_tokens; +- stop; +- response_format; +- JSON schema; +- tool definitions. + +Одна загруженная модель в одном `llama-server` может обслуживать разные роли с разными prompt, temperature и output limits. + +--- + +## 2.2. Backend-level параметры + +Эти параметры обычно требуют отдельного запуска сервера: + +- путь к GGUF-модели; +- ctx-size; +- GPU layers / offload; +- flash-attn; +- KV cache configuration; +- speculative decoding / MTP; +- server port / host; +- parallel slots; +- chat template startup config; +- quant/offload mode. + +Пример: + +```text +8081 local-main обычный +8085 local-main-mtp экспериментальный +``` + +MTP/speculative decoding не включать по умолчанию для `action` JSON endpoint. + +--- + +# 3. Token budget и context budget + +Нужно явно разделять: + +```text +ctx_size + общий размер контекстного окна модели + +max_output_tokens + сколько модель может сгенерировать за один вызов + +max_input_tokens + сколько токенов можно собрать во входной prompt + +recent_events_tokens + сколько истории событий можно включить + +memory_tokens + сколько памяти можно включить + +skill_tokens + сколько текста skill/procedure/examples можно включить +``` + +Пример `.env.example`: + +```env +DUCK_CTX_SIZE=65536 +DUCK_MAX_INPUT_TOKENS=49152 +DUCK_MAX_RECENT_EVENTS_TOKENS=12000 +DUCK_MAX_MEMORY_TOKENS=8000 +DUCK_MAX_SKILL_TOKENS=6000 +``` + +Рекомендуемые output limits: + +```text +thinker: 8192 +critic: 4096 +coder: 16384 +action: 2048 +recall: 2048 +summary: 4096 +``` + +`action` может иметь небольшой output limit, потому что action directive должен быть коротким. + +`thinker` и `coder` должны иметь более крупный output limit. + +--- + +# 4. ContextBuilder + +`ContextBuilder` не должен бездумно добавлять всю историю общения в каждый запрос. + +Контекст должен собираться из: + +- текущего user message; +- active task state; +- selected skill; +- compact task summary; +- recent relevant events; +- relevant tool observations; +- retrieved memory; +- system prompt текущей роли. + +Если контекст превышает budget: + +1. сохранить текущий user message; +2. сохранить active task state; +3. сохранить selected skill summary; +4. сохранить последние важные observations; +5. суммаризировать старые events; +6. обрезать низкорелевантную memory; +7. не превышать context window молча. + +--- + +# 5. Целевая архитектура + +```text +┌─────────────────────────────────────────────┐ +│ WebChat │ +│ интерфейс человека к DuckLM │ +└─────────────────────┬───────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────┐ +│ FastAPI │ +│ интерфейс кодера, тестов и агентов │ +└─────────────────────┬───────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────┐ +│ Duck Core │ +│ │ +│ RuntimeLoop │ +│ TaskState │ +│ ContextBuilder │ +│ ModelClient │ +│ SkillRegistry │ +│ ToolGateway │ +│ ApprovalService │ +│ Reflection │ +│ MemoryPolicy │ +│ ExperienceRecorder │ +└───────────────┬───────────────┬─────────────┘ + │ │ + ▼ ▼ +┌───────────────────────┐ ┌────────────────────────┐ +│ llama-server │ │ SQLite/PostgreSQL │ +│ OpenAI-compatible API │ │ events/tasks/approvals │ +└───────────────────────┘ └────────────────────────┘ + │ + ▼ +┌───────────────────────┐ +│ Qdrant / Vector Store │ +│ semantic memory │ +└───────────────────────┘ +``` + +--- + +# 6. Структура проекта + +Создать структуру: + +```text +ducklm/ + duck_core/ + __init__.py + api.py + config.py + model_client.py + runtime_loop.py + context_builder.py + + events/ + __init__.py + store.py + + tasks/ + __init__.py + store.py + state.py + + tools/ + __init__.py + base.py + gateway.py + file_read.py + file_write.py + shell_exec_safe.py + + approvals/ + __init__.py + service.py + + skills/ + __init__.py + registry.py + + experience/ + __init__.py + recorder.py + + memory/ + __init__.py + vector_memory.py + policy.py + + schemas/ + action_directive.schema.json + + web/ + templates/ + index.html + task.html + approvals.html + skills.html + memory.html + experience.html + static/ + app.js + style.css + + prompts/ + roles/ + thinker.md + action.md + critic.md + coder.md + summary.md + + skills/ + analyze_project/ + skill.yaml + procedure.md + examples.md + notes.md + + config/ + models.yaml + + scripts/ + llama/ + start_main.sh + start_thinker_mtp_experimental.sh + healthcheck.sh + + verify/ + verify_basic_chat.sh + verify_file_write_read.sh + verify_tool_blocking.sh + verify_models_roles.sh + verify_skills.sh + verify_experience.sh + verify_memory.sh + + bench/ + bench_runtime.py + + tests/ + smoke/ + + docs/ + + data/ + workspace/ + + .env.example + docker-compose.memory.yml + Makefile + pyproject.toml + README.md +``` + +--- + +# 7. Этап 1 — базовый проект и конфигурация + +## 7.1. Цель + +Создать запускаемый skeleton проекта с конфигурацией, зависимостями, `.env.example`, `config/models.yaml`, базовым FastAPI и пустой WebChat-страницей. + +--- + +## 7.2. pyproject.toml + +Минимальные зависимости: + +```toml +[project] +name = "ducklm" +version = "0.1.0" +description = "Local agent runtime with WebChat, API, tools, memory and experience" +requires-python = ">=3.11" + +dependencies = [ + "fastapi", + "uvicorn", + "httpx", + "pydantic", + "pyyaml", + "jinja2", + "python-dotenv", + "jsonschema", + "aiosqlite", + "qdrant-client" +] + +[project.optional-dependencies] +dev = [ + "pytest", + "pytest-asyncio", + "ruff" +] +``` + +--- + +## 7.3. .env.example + +Создать: + +```env +DUCK_LLAMA_SERVER_BIN=/usr/local/bin/llama-server +DUCK_MAIN_MODEL_PATH=/models/main.gguf + +DUCK_MAIN_PORT=8081 +DUCK_CTX_SIZE=65536 +DUCK_N_GPU_LAYERS=99 +DUCK_HOST=127.0.0.1 + +DUCK_API_HOST=127.0.0.1 +DUCK_API_PORT=8000 + +DUCK_WORKSPACE=./workspace +DUCK_DB_PATH=./data/duck.sqlite3 + +DUCK_MAX_INPUT_TOKENS=49152 +DUCK_MAX_RECENT_EVENTS_TOKENS=12000 +DUCK_MAX_MEMORY_TOKENS=8000 +DUCK_MAX_SKILL_TOKENS=6000 + +QDRANT_URL=http://127.0.0.1:6333 + +DUCK_SKIP_LIVE_LLM_TESTS=0 +``` + +По умолчанию API и `llama-server` должны слушать только `127.0.0.1`. + +Если пользователь явно указывает `0.0.0.0`, в логах должно быть предупреждение: + +```text +WARNING: DuckLM API is listening on 0.0.0.0. This may expose local tool execution endpoints. +``` + +--- + +## 7.4. config/models.yaml + +Создать: + +```yaml +default_provider: llama_server + +models: + thinker: + provider: llama_server + base_url: http://127.0.0.1:8081/v1 + model: local-main + purpose: free_cognition + structured_output: false + temperature: 0.4 + max_output_tokens: 8192 + system_prompt: prompts/roles/thinker.md + + critic: + provider: llama_server + base_url: http://127.0.0.1:8081/v1 + model: local-main + purpose: reflection + structured_output: false + temperature: 0.1 + max_output_tokens: 4096 + system_prompt: prompts/roles/critic.md + + coder: + provider: llama_server + base_url: http://127.0.0.1:8081/v1 + model: local-main + purpose: code_generation + structured_output: false + temperature: 0.2 + max_output_tokens: 16384 + system_prompt: prompts/roles/coder.md + + action: + provider: llama_server + base_url: http://127.0.0.1:8081/v1 + model: local-main + purpose: action_directive + structured_output: true + temperature: 0.0 + max_output_tokens: 2048 + system_prompt: prompts/roles/action.md + response_schema: duck_core/schemas/action_directive.schema.json + + summary: + provider: llama_server + base_url: http://127.0.0.1:8081/v1 + model: local-main + purpose: context_summary + structured_output: false + temperature: 0.1 + max_output_tokens: 4096 + system_prompt: prompts/roles/summary.md +``` + +--- + +# 8. Этап 2 — llama-server integration и ModelClient + +## 8.1. Скрипт запуска llama-server + +Создать: + +```text +scripts/llama/start_main.sh +``` + +```bash +#!/usr/bin/env bash +set -euo pipefail + +: "${DUCK_MAIN_MODEL_PATH:?DUCK_MAIN_MODEL_PATH is required}" + +"${DUCK_LLAMA_SERVER_BIN:-llama-server}" \ + -m "${DUCK_MAIN_MODEL_PATH}" \ + --alias local-main \ + --host "${DUCK_HOST:-127.0.0.1}" \ + --port "${DUCK_MAIN_PORT:-8081}" \ + -c "${DUCK_CTX_SIZE:-65536}" \ + -ngl "${DUCK_N_GPU_LAYERS:-99}" \ + --flash-attn on \ + --cache-prompt \ + --metrics +``` + +Создать: + +```text +scripts/llama/healthcheck.sh +``` + +```bash +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="${1:-http://127.0.0.1:8081/v1}" + +curl -fsS "${BASE_URL}/models" >/dev/null + +echo "OK: ${BASE_URL}" +``` + +--- + +## 8.2. ModelClient + +Создать: + +```text +duck_core/model_client.py +``` + +Требования: + +1. Читать `config/models.yaml`. +2. Вызывать модель по логической роли. +3. Работать через OpenAI-compatible API. +4. Поддерживать role-specific `system_prompt`. +5. Поддерживать role-specific `temperature`. +6. Поддерживать role-specific `max_output_tokens`. +7. Поддерживать `response_format`. +8. Логировать latency. +9. Логировать usage tokens, если backend их возвращает. +10. Корректно обрабатывать ошибки соединения. +11. Не требовать уникальности моделей для ролей. + +Интерфейс: + +```python +from dataclasses import dataclass +from typing import Any + + +@dataclass +class ModelResponse: + role: str + model: str + content: str + raw: dict[str, Any] + latency_ms: float + prompt_tokens: int | None = None + completion_tokens: int | None = None + total_tokens: int | None = None + + +class ModelClient: + def __init__(self, config_path: str = "config/models.yaml"): + ... + + async def chat( + self, + role: str, + messages: list[dict[str, str]], + temperature: float | None = None, + max_output_tokens: int | None = None, + response_format: dict | None = None, + ) -> ModelResponse: + ... +``` + +--- + +# 9. Этап 3 — Web/API runtime loop + +## 9.1. Цель + +Сделать минимальный живой вертикальный срез: + +```text +человек пишет в WebChat +↓ +FastAPI создаёт task +↓ +Duck Core вызывает llama-server +↓ +ответ пишется в SQLite event log +↓ +WebChat показывает ответ и event timeline +``` + +На этом этапе не делать: + +- tools; +- approvals; +- skills; +- experience; +- Qdrant; +- MTP. + +--- + +## 9.2. SQLite schema + +Создать EventStore и TaskStore. + +Минимальные таблицы: + +```sql +create table if not exists tasks ( + task_id text primary key, + status text not null, + user_message text not null, + workspace text, + debug integer not null default 0, + final_response text, + created_at text not null, + updated_at text not null +); + +create table if not exists events ( + id integer primary key autoincrement, + task_id text not null, + sequence integer not null, + event_type text not null, + payload_json text not null, + created_at text not null +); + +create unique index if not exists idx_events_task_sequence +on events(task_id, sequence); +``` + +Минимальные статусы задач: + +```text +running +completed +failed +cancelled +``` + +Минимальные события: + +```text +task_created +model_call_started +cognition_response +model_call_finished +task_completed +task_failed +``` + +--- + +## 9.3. RuntimeLoop + +Создать: + +```text +duck_core/runtime_loop.py +``` + +Минимальный цикл: + +```text +POST /v2/chat +↓ +create task +↓ +write task_created +↓ +build basic context +↓ +call thinker +↓ +write cognition_response +↓ +save final_response +↓ +write task_completed +↓ +return response +``` + +--- + +## 9.4. FastAPI endpoints + +Создать: + +```text +duck_core/api.py +``` + +Минимальные endpoints: + +```text +GET /health +GET /v1/status + +GET /v1/models/roles +GET /v1/models/ping + +POST /v1/chat + +POST /v1/tasks +GET /v1/tasks +GET /v1/tasks/{task_id} +GET /v1/tasks/{task_id}/events +GET /v1/tasks/{task_id}/stream +``` + +`POST /v1/chat` — основной человекоподобный вход. + +Пример запроса: + +```json +{ + "message": "Скажи коротко, что ты DuckLM", + "workspace": "./workspace", + "debug": true +} +``` + +Пример ответа: + +```json +{ + "task_id": "task_20260519_001", + "status": "completed", + "final_response": "Я DuckLM, локальная агентная система с Web/API-интерфейсом." +} +``` + +--- + +## 9.5. WebChat + +Сделать минимальный WebChat. + +Допустимо: + +- FastAPI templates; +- static HTML; +- простой JS через `fetch`; +- SSE для event timeline. + +Главная страница `/` должна содержать: + +- поле сообщения; +- поле workspace; +- checkbox debug; +- кнопку Run; +- блок final response; +- блок event timeline. + +--- + +## 9.6. Проверка этапа + +Запуск: + +```bash +cp .env.example .env +# прописать DUCK_MAIN_MODEL_PATH + +bash scripts/llama/start_main.sh +``` + +Во втором терминале: + +```bash +python -m duck_core.api +``` + +Проверка: + +```bash +curl http://127.0.0.1:8000/health +curl http://127.0.0.1:8000/v1/models/roles +curl http://127.0.0.1:8000/v1/models/ping +``` + +Запуск задачи: + +```bash +curl -X POST http://127.0.0.1:8000/v1/chat \ + -H "Content-Type: application/json" \ + -d '{ + "message": "Скажи коротко, что ты DuckLM", + "workspace": "./workspace", + "debug": true + }' +``` + +Проверить events: + +```bash +curl http://127.0.0.1:8000/v1/tasks//events +``` + +Ожидаемые события: + +```text +task_created +model_call_started +cognition_response +model_call_finished +task_completed +``` + +--- + +# 10. Этап 4 — cognition/action split + +## 10.1. Цель + +Разделить свободное мышление и машинное намерение. + +```text +cognition_response + свободный текст, понимание задачи, план, риски + +action_directive + строгий JSON для ToolGateway +``` + +Модель не должна думать в JSON. + +JSON используется только как форма внешнего действия. + +--- + +## 10.2. Action directive schema + +Создать: + +```text +duck_core/schemas/action_directive.schema.json +``` + +```json +{ + "type": "object", + "required": ["kind", "intent", "risk_level", "actions"], + "additionalProperties": false, + "properties": { + "kind": { + "type": "string", + "enum": ["action_directive"] + }, + "intent": { + "type": "string", + "minLength": 1 + }, + "risk_level": { + "type": "string", + "enum": ["none", "low", "medium", "high", "critical"] + }, + "actions": { + "type": "array", + "minItems": 0, + "items": { + "type": "object", + "required": ["tool", "args"], + "additionalProperties": false, + "properties": { + "tool": { + "type": "string", + "minLength": 1 + }, + "args": { + "type": "object" + }, + "reason": { + "type": "string" + } + } + } + }, + "memory_hints": { + "type": "array", + "items": { + "type": "string" + } + }, + "expected_observations": { + "type": "array", + "items": { + "type": "string" + } + }, + "stop_reason": { + "type": "string" + } + } +} +``` + +--- + +## 10.3. Structured output и retry + +Правила: + +1. `action_directive` генерируется через structured output, если backend это поддерживает. +2. Если backend не поддерживает JSON schema, явно записать это в event log. +3. Fallback на plain JSON допускается только если включён в config. +4. После генерации directive валидируется локально. +5. Разрешён максимум один retry. +6. Retry чинит только directive. +7. Бесконечный JSON repair loop запрещён. + +Запрещено: + +```python +while not valid_json: + call_model_to_fix_json() +``` + +--- + +# 11. Этап 5 — ToolGateway + +## 11.1. Цель + +Добавить безопасное выполнение действий через tools. + +Модель не запускает инструменты напрямую. + +Модель создаёт `action_directive`. + +`ToolGateway`: + +1. принимает action directive; +2. проверяет tool; +3. проверяет risk level; +4. нормализует действие; +5. проверяет permissions; +6. выполняет разрешённое действие; +7. пишет observation в event log; +8. возвращает результат в runtime loop. + +--- + +## 11.2. Tool interface + +Создать: + +```text +duck_core/tools/base.py +``` + +```python +from typing import Protocol, Any +from pydantic import BaseModel, Field + + +class ToolResult(BaseModel): + ok: bool + output: str | None = None + error: str | None = None + metadata: dict[str, Any] = Field(default_factory=dict) + + +class Tool(Protocol): + name: str + risk_level: str + + async def run(self, args: dict[str, Any]) -> ToolResult: + ... +``` + +--- + +## 11.3. Минимальные tools + +Создать: + +```text +duck_core/tools/file_read.py +duck_core/tools/file_write.py +duck_core/tools/shell_exec_safe.py +``` + +### file_read + +Требования: + +- читать только внутри workspace; +- запретить path traversal; +- запретить чтение `/etc/shadow`; +- запретить чтение `~/.ssh` без explicit approval; +- запретить чтение `.env` без explicit approval; +- ограничить максимальный размер файла. + +### file_write + +Требования: + +- писать только внутри workspace; +- запретить path traversal; +- не перезаписывать существующий файл без backup или approval; +- создавать каталоги только внутри workspace; +- возвращать metadata: path, bytes_written, created/updated. + +### shell_exec_safe + +Allowlist: + +```text +pwd +ls +cat +head +tail +grep +find +python -m pytest +pytest +git status +git diff +git log +``` + +Blocklist: + +```text +rm +sudo +su +dd +mkfs +mount +umount +chmod -R +chown -R +curl | sh +wget | sh +shutdown +reboot +poweroff +systemctl +service +apt install +apt remove +pacman -S +pacman -R +pip install +npm install -g +``` + +Команды вне allowlist требуют approval. + +--- + +# 12. Этап 6 — approvals и resume + +## 12.1. Цель + +Добавить подтверждение рискованных действий и продолжение задачи после решения пользователя. + +--- + +## 12.2. Таблица approvals + +```sql +create table if not exists approvals ( + id integer primary key autoincrement, + approval_id text not null unique, + task_id text not null, + action_hash text not null, + normalized_action_json text not null, + status text not null, + decision text, + created_at text not null, + updated_at text not null +); +``` + +Статусы задачи: + +```text +running +waiting_for_approval +completed +failed +cancelled +``` + +Если действие требует approval: + +1. создать pending approval; +2. перевести task в `waiting_for_approval`; +3. показать approval в Web UI; +4. позволить approve/deny через API; +5. после allow_once/allow_forever продолжить задачу через `/continue`. + +--- + +## 12.3. Approval API + +Добавить: + +```text +GET /v1/approvals/pending +POST /v1/approvals/{approval_id}/allow_once +POST /v1/approvals/{approval_id}/allow_forever +POST /v1/approvals/{approval_id}/deny +POST /v1/tasks/{task_id}/continue +POST /v1/tasks/{task_id}/cancel +``` + +Инвариант: + +```text +Allow forever = только exact normalized action hash. +``` + +Это не широкое разрешение на похожие действия. + +--- + +## 12.4. Approval UI + +Web UI должен показывать pending approval: + +```text +DuckLM хочет выполнить действие: + +tool: shell_exec_safe +command: pytest tests/smoke -v +risk: low +reason: Need to run tests + +[Allow once] +[Allow forever for exact action] +[Deny] +``` + +--- + +# 13. Этап 7 — Skills + +## 13.1. Цель + +Добавить процедурную память. + +Skill — это не if/else-автомат. + +Skill — это описание способа решения типа задач: + +- какие tools нужны; +- какие риски есть; +- какие шаги обычно полезны; +- какие критерии успеха; +- какие ошибки уже известны; +- какие примеры есть. + +--- + +## 13.2. Структура skill + +Создать: + +```text +skills/analyze_project/ + skill.yaml + procedure.md + examples.md + notes.md +``` + +Пример `skill.yaml`: + +```yaml +id: analyze_project +title: Analyze project structure +description: Inspect repository structure and summarize architecture. +version: 1 + +tags: + - code + - repository + - analysis + +required_tools: + - file_read + - shell_exec_safe + +risk_level: low + +inputs: + - workspace_path + +outputs: + - architecture_summary + - risks + - suggested_next_steps + +success_criteria: + - repository structure inspected + - major modules identified + - no destructive commands executed + - summary is grounded in actual files +``` + +--- + +## 13.3. SkillRegistry + +Создать: + +```text +duck_core/skills/registry.py +``` + +Интерфейс: + +```python +class SkillRegistry: + def load_skills(self) -> list[Skill]: + ... + + def get_skill(self, skill_id: str) -> Skill | None: + ... + + async def find_candidate_skills( + self, + user_request: str, + limit: int = 3, + ) -> list[SkillCandidate]: + ... +``` + +На первом этапе допустимо: + +- keyword prefilter по title/tags/description; +- LLM selection через thinker/action. + +Не делать огромный if/else-router. + +--- + +## 13.4. Skills API + +Добавить: + +```text +GET /v1/skills +GET /v1/skills/{skill_id} +``` + +Web UI: + +```text +/skills +``` + +--- + +# 14. Этап 8 — Experience и Reflection + +## 14.1. Цель + +Добавить самоулучшение через опыт. + +Не через автоматическое изменение кода. + +А через: + +```text +task +↓ +reflection +↓ +experience record +↓ +skill update proposal +↓ +human approval later +``` + +--- + +## 14.2. Reflection + +Создать: + +```text +duck_core/reflection.py +``` + +Reflection должна отвечать: + +1. Что пытались сделать? +2. Получилось ли? +3. Что сработало? +4. Что не сработало? +5. Были ли лишние model calls? +6. Были ли лишние tool calls? +7. Застревала ли система? +8. Была ли проблема с JSON/action directive? +9. Нужно ли что-то запомнить? +10. Нужно ли предложить изменение skill? + +Reflection использует роль `critic`. + +`critic` может быть той же физической моделью, что и `thinker`. + +--- + +## 14.3. ExperienceRecord + +Добавить таблицу: + +```sql +create table if not exists experience_records ( + id integer primary key autoincrement, + task_id text not null, + skill_id text, + summary text not null, + result text not null, + what_worked_json text, + what_failed_json text, + reusable_lesson text, + suggested_skill_patch text, + confidence real, + created_at text not null +); +``` + +Формат: + +```json +{ + "task_id": "...", + "skill_id": "optional", + "summary": "What was attempted", + "result": "success/failure/partial", + "what_worked": ["..."], + "what_failed": ["..."], + "reusable_lesson": "...", + "suggested_skill_patch": "optional", + "confidence": 0.7 +} +``` + +--- + +## 14.4. Skill update proposals + +Если reflection считает, что skill надо улучшить, создать файл: + +```text +skills/_proposals/_.patch.md +``` + +Формат: + +```markdown +# Skill update proposal + +Skill: analyze_project + +## Reason + +... + +## Proposed changes + +... + +## Evidence + +Task id: ... + +## Risk + +Low / medium / high. + +## Requires human approval + +Yes. +``` + +Запрещено автоматически применять skill patch без approval. + +--- + +## 14.5. Experience API + +Добавить: + +```text +GET /v1/experience +GET /v1/experience/{id} +``` + +Web UI: + +```text +/experience +``` + +--- + +# 15. Этап 9 — Semantic memory + +## 15.1. Цель + +Добавить semantic memory через готовый vector store. + +--- + +## 15.2. Qdrant compose + +Создать: + +```text +docker-compose.memory.yml +``` + +```yaml +services: + qdrant: + image: qdrant/qdrant:latest + ports: + - "6333:6333" + - "6334:6334" + volumes: + - qdrant_storage:/qdrant/storage + +volumes: + qdrant_storage: +``` + +--- + +## 15.3. VectorMemory adapter + +Создать: + +```text +duck_core/memory/vector_memory.py +``` + +Интерфейс: + +```python +from typing import Any + + +class VectorMemory: + async def add_memory( + self, + text: str, + metadata: dict[str, Any] | None = None, + ) -> str: + ... + + async def search_memory( + self, + query: str, + limit: int = 5, + ) -> list[dict[str, Any]]: + ... +``` + +Embeddings: + +1. Если `llama-server /v1/embeddings` доступен — использовать его. +2. Если embeddings пока недоступны — сделать явный adapter stub и xfail-test. +3. Не писать самодельный embedding algorithm. + +--- + +## 15.4. MemoryPolicy + +Создать: + +```text +duck_core/memory/policy.py +``` + +Типы памяти: + +```text +event +semantic_fact +preference +procedure +experience +skill_update_candidate +``` + +Пример результата: + +```json +{ + "should_store": true, + "memory_type": "experience", + "summary": "The action directive schema failed because reasoning and JSON were mixed.", + "importance": 0.8, + "metadata": { + "task_id": "...", + "source": "reflection" + } +} +``` + +Допустима LLM-классификация через `action` role со structured JSON. + +Не делать жёстких эвристик вида: + +```python +if "remember" in text: + ... +``` + +--- + +## 15.5. Memory API + +Добавить: + +```text +GET /v1/memory/search?q=... +``` + +Web UI: + +```text +/memory +``` + +--- + +# 16. Этап 10 — Performance и MTP experiments + +## 16.1. Цель + +Добавить экспериментальные режимы ускорения inference. + +MTP/speculative decoding — уровень inference backend, а не Duck Core. + +--- + +## 16.2. MTP script + +Создать: + +```text +scripts/llama/start_thinker_mtp_experimental.sh +``` + +```bash +#!/usr/bin/env bash +set -euo pipefail + +: "${DUCK_MAIN_MODEL_PATH:?DUCK_MAIN_MODEL_PATH is required}" + +LLAMA_BIN="${DUCK_LLAMA_SERVER_BIN:-llama-server}" + +if ! "${LLAMA_BIN}" --help | grep -qi "spec"; then + echo "This llama-server build does not expose speculative/MTP flags." + exit 1 +fi + +"${LLAMA_BIN}" \ + -m "${DUCK_MAIN_MODEL_PATH}" \ + --alias local-main-mtp \ + --host "${DUCK_HOST:-127.0.0.1}" \ + --port "${DUCK_MAIN_MTP_PORT:-8085}" \ + -c "${DUCK_CTX_SIZE:-65536}" \ + -ngl "${DUCK_N_GPU_LAYERS:-99}" \ + --flash-attn on \ + --cache-prompt \ + --metrics \ + ${DUCK_MTP_FLAGS:-} +``` + +MTP не включать по умолчанию для action JSON endpoint. + +--- + +## 16.3. Benchmark + +Создать: + +```text +scripts/bench/bench_runtime.py +``` + +Метрики: + +- total runtime seconds; +- LLM calls count; +- latency per LLM call; +- prompt tokens; +- completion tokens; +- total tokens; +- tool calls count; +- JSON directive validity; +- retry count; +- memory writes count; +- experience record created yes/no; +- selected skill; +- model role mapping. + +Тестовые задачи: + +```text +1. "Скажи коротко, что ты DuckLM." +2. "Создай tmp/duck_test_note.md с текстом hello duck и прочитай его обратно." +3. "Посмотри структуру проекта и кратко опиши модули." +4. "Найди TODO/FIXME в проекте." +5. "Запусти тесты и кратко объясни результат." +``` + +Бенчмарк должен выводить: + +```text +role -> base_url/model +``` + +--- + +# 17. Verification scripts + +Создать: + +```text +scripts/verify/ + verify_basic_chat.sh + verify_file_write_read.sh + verify_tool_blocking.sh + verify_models_roles.sh + verify_skills.sh + verify_experience.sh + verify_memory.sh +``` + +Скрипты должны использовать HTTP API, а не CLI. + +Пример `verify_basic_chat.sh`: + +```bash +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}" + +curl -fsS "${BASE_URL}/health" + +curl -fsS -X POST "${BASE_URL}/v1/chat" \ + -H "Content-Type: application/json" \ + -d '{ + "message": "Скажи коротко, что ты DuckLM", + "debug": true + }' +``` + +Пример `verify_file_write_read.sh`: + +```bash +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}" + +RESPONSE="$(curl -fsS -X POST "${BASE_URL}/v1/chat" \ + -H "Content-Type: application/json" \ + -d '{ + "message": "Создай tmp/duck_test_note.md с текстом hello duck и прочитай его обратно", + "workspace": "./workspace", + "debug": true + }')" + +echo "${RESPONSE}" +``` + +--- + +# 18. Makefile + +Создать: + +```makefile +duck-up: + docker compose -f docker-compose.memory.yml up -d + @echo "Memory services started." + @echo "Start llama-server:" + @echo "bash scripts/llama/start_main.sh" + +duck-llama-main: + bash scripts/llama/start_main.sh + +duck-llama-health: + bash scripts/llama/healthcheck.sh http://127.0.0.1:8081/v1 + +duck-api: + python -m duck_core.api + +duck-dev: + docker compose -f docker-compose.memory.yml up -d + @echo "Start llama-server in another terminal:" + @echo "bash scripts/llama/start_main.sh" + @echo "Then run:" + @echo "make duck-api" + @echo "Open:" + @echo "http://127.0.0.1:8000/" + +duck-open: + @echo "Open web UI:" + @echo "http://127.0.0.1:8000/" + +duck-smoke: + python -m pytest tests/smoke -v + +duck-test: + python -m pytest -v + +duck-verify: + bash scripts/verify/verify_basic_chat.sh + bash scripts/verify/verify_file_write_read.sh + bash scripts/verify/verify_tool_blocking.sh + bash scripts/verify/verify_models_roles.sh +``` + +--- + +# 19. Smoke tests + +Создать: + +```text +tests/smoke/test_models_config.py +tests/smoke/test_model_client.py +tests/smoke/test_llama_server_connection.py +tests/smoke/test_api_health.py +tests/smoke/test_chat_api.py +tests/smoke/test_event_log.py +tests/smoke/test_action_directive_schema.py +tests/smoke/test_tool_gateway.py +tests/smoke/test_approvals.py +tests/smoke/test_skill_registry.py +tests/smoke/test_experience_recorder.py +tests/smoke/test_vector_memory.py +``` + +Live LLM tests должны пропускаться, если: + +```text +DUCK_SKIP_LIVE_LLM_TESTS=1 +``` + +--- + +# 20. Документация + +Создать: + +```text +docs/architecture.md +docs/how_to_run.md +docs/how_to_test.md +docs/local_llama_server.md +docs/model_roles.md +docs/web_api.md +docs/tool_gateway.md +docs/skills.md +docs/experience_learning.md +docs/memory_architecture.md +docs/performance_mtp.md +``` + +## docs/how_to_run.md + +Описать: + +1. как установить зависимости; +2. как указать путь к GGUF-модели; +3. как запустить `llama-server`; +4. как запустить DuckLM API; +5. как открыть WebChat; +6. как отправить первую задачу; +7. как смотреть task events; +8. как смотреть approvals; +9. как остановить сервисы. + +## docs/model_roles.md + +Описать: + +1. роль модели — логическая роль; +2. thinker/critic/coder/action могут использовать одну модель; +3. разные роли могут отличаться prompt/temperature/schema/context; +4. как настроить одну модель на все роли; +5. как настроить разные модели на разные роли; +6. какие параметры request-level; +7. какие параметры backend-level. + +## docs/web_api.md + +Описать endpoints: + +```text +GET /health +GET /v1/status +GET /v1/models/roles +GET /v1/models/ping +POST /v1/chat +POST /v1/tasks +GET /v1/tasks +GET /v1/tasks/{task_id} +GET /v1/tasks/{task_id}/events +GET /v1/tasks/{task_id}/stream +GET /v1/approvals/pending +POST /v1/approvals/{approval_id}/allow_once +POST /v1/approvals/{approval_id}/allow_forever +POST /v1/approvals/{approval_id}/deny +GET /v1/skills +GET /v1/skills/{skill_id} +GET /v1/experience +GET /v1/experience/{id} +GET /v1/memory/search?q=... +``` + +--- + +# 21. Критерии готовности по этапам + +## Этап 1 готов, если: + +- создана структура проекта; +- есть `pyproject.toml`; +- есть `.env.example`; +- есть `config/models.yaml`; +- есть базовый FastAPI; +- есть пустая WebChat-страница; +- проект запускается без синтаксических ошибок. + +## Этап 2 готов, если: + +- `llama-server` запускается через `scripts/llama/start_main.sh`; +- `/v1/models` отвечает; +- `ModelClient` читает `config/models.yaml`; +- одна модель может быть назначена на все роли; +- `GET /v1/models/roles` показывает роли; +- `GET /v1/models/ping` проверяет доступность backend-а. + +## Этап 3 готов, если: + +- `POST /v1/chat` работает; +- WebChat позволяет отправить сообщение; +- task создаётся; +- events пишутся в SQLite; +- task timeline отображается в WebChat; +- final response отображается в WebChat. + +## Этап 4 готов, если: + +- `cognition_response` отделён от `action_directive`; +- action directive schema создана; +- action directive валидируется; +- бесконечного JSON repair loop нет; +- разрешён максимум один retry. + +## Этап 5 готов, если: + +- ToolGateway существует; +- file_read работает внутри workspace; +- file_write работает внутри workspace; +- shell_exec_safe работает для allowlist; +- опасные команды блокируются; +- tool observations пишутся в event log. + +## Этап 6 готов, если: + +- approvals table создана; +- waiting_for_approval status работает; +- pending approvals видны в Web UI; +- allow_once работает; +- allow_forever работает только для exact normalized action hash; +- deny работает; +- `/continue` продолжает задачу после approval. + +## Этап 7 готов, если: + +- каталог `skills/` существует; +- SkillRegistry грузит skills; +- Runtime выбирает candidate skill; +- Skills API работает; +- Web UI показывает skills. + +## Этап 8 готов, если: + +- Reflection работает через critic role; +- ExperienceRecord создаётся после задачи; +- Experience API работает; +- Web UI показывает experience records; +- skill update proposals создаются; +- proposals не применяются автоматически. + +## Этап 9 готов, если: + +- Qdrant поднимается через docker-compose; +- VectorMemory adapter существует; +- add_memory работает или явно xfail, если embeddings недоступны; +- search_memory работает или явно xfail; +- MemoryPolicy существует; +- Memory API работает; +- Web UI имеет memory page. + +## Этап 10 готов, если: + +- MTP experimental script есть; +- MTP не включён по умолчанию для action JSON endpoint; +- benchmark script есть; +- benchmark показывает role → base_url/model; +- benchmark считает LLM calls, latency, retries, tool calls. + +--- + +# 22. Что запрещено + +Запрещено: + +1. превращать DuckLM в обычный workflow-runner; +2. заменять когнитивный цикл набором if/else эвристик; +3. писать самописный inference server; +4. писать самописный model scheduler; +5. писать самописную vector database; +6. делать бесконечный JSON repair loop; +7. давать модели прямой shell без ToolGateway; +8. включать MTP/speculative для action JSON endpoint по умолчанию; +9. делать self-modifying code без approval; +10. смешивать cognition_response и action_directive; +11. считать, что thinker/critic/coder/action — обязательно разные модели; +12. считать, что каждая роль требует отдельный llama-server; +13. хардкодить пути к моделям в коде; +14. делать CLI обязательной частью системы; +15. делать сложный frontend раньше рабочего Web/API loop. + +--- + +# 23. Финальный отчёт исполнителя + +В конце работы по каждому этапу исполнитель должен предоставить: + +1. что реализовано; +2. что не реализовано и почему; +3. список изменённых файлов; +4. как запустить `llama-server`; +5. как запустить DuckLM API; +6. как открыть WebChat; +7. как отправить первую задачу через WebChat; +8. как отправить задачу через curl; +9. как посмотреть task events; +10. как проверить одну модель на все роли; +11. как проверить разные модели на разные роли; +12. как проверить file_write/file_read; +13. как проверить блокировку опасной команды; +14. как проверить approvals; +15. как запустить smoke tests; +16. как запустить verification scripts; +17. какие ограничения остались; +18. что делать следующим этапом. + +Финальные команды запуска должны быть примерно такими: + +```bash +cp .env.example .env +# прописать DUCK_MAIN_MODEL_PATH + +bash scripts/llama/start_main.sh +``` + +Во втором терминале: + +```bash +python -m duck_core.api +``` + +Проверка: + +```bash +curl http://127.0.0.1:8000/health +curl http://127.0.0.1:8000/v1/models/roles +curl http://127.0.0.1:8000/v1/models/ping +``` + +Запуск задачи: + +```bash +curl -X POST http://127.0.0.1:8000/v1/chat \ + -H "Content-Type: application/json" \ + -d '{ + "message": "Скажи коротко, что ты DuckLM", + "workspace": "./workspace", + "debug": true + }' +``` + +--- + +# 24. Главная мысль проекта + +DuckLM должна быть не набором скриптов и не inference-сервером. + +DuckLM должна быть локальным когнитивным runtime: + +```text +состояние +контекст +модельное мышление +намерение +действие +наблюдение +рефлексия +память +опыт +навыки +``` + +Первый результат должен быть маленьким, но живым: + +```text +WebChat +↓ +FastAPI +↓ +Duck Core +↓ +llama-server +↓ +SQLite event timeline +↓ +WebChat показывает ответ и ход выполнения +``` + +После этого постепенно добавляются: + +```text +tools +approvals +skills +experience +semantic memory +MTP +benchmark +hardening +``` \ No newline at end of file diff --git a/EXPERIMENT.md b/EXPERIMENT.md deleted file mode 100644 index 63a8fff..0000000 --- a/EXPERIMENT.md +++ /dev/null @@ -1,341 +0,0 @@ -SAFETY SETUP — ОБЯЗАТЕЛЬНО ПЕРЕД ЭКСПЕРИМЕНТОМ - -Перед любыми изменениями: - -1. Проверь текущее состояние git: - git status --short - -2. Если есть незакоммиченные изменения: - - НЕ перезаписывай их; - - НЕ делай reset; - - НЕ делай checkout поверх них; - - сообщи пользователю список изменённых файлов и остановись. - -3. Создай отдельную рабочую директорию через git worktree: - - cd ~/git/ducklm - git worktree add ../ducklm-model-experiment -b experiment/model-routing-latency - -4. Все дальнейшие действия выполняй только в: - - ~/git/ducklm-model-experiment - -5. Основную директорию проекта: - - ~/git/ducklm - - не изменять. - -6. Если проект использует локальные data/*.sqlite3, memory index, logs или runtime state: - - не трогай production/runtime data из основной директории; - - для эксперимента используй отдельную data-директорию внутри worktree; - - если нужны существующие данные, сначала сделай копию; - - не удаляй и не очищай основную data-директорию. - -7. Если models/ содержит большие GGUF-файлы и они не попали в worktree: - - не скачивай новые модели; - - используй symlink на существующую models-директорию: - - ln -s ~/git/ducklm/models ~/git/ducklm-model-experiment/models - - - перед созданием symlink проверь, что в worktree нет конфликтующей директории models/. - -8. Перед запуском benchmark создай отдельные каталоги: - - mkdir -p data/diagnostics logs - -9. Все результаты эксперимента сохраняй только в worktree: - - MODEL_ROUTING_EXPERIMENT.md - - logs/model_latency.jsonl - - data/diagnostics/model_latency.jsonl - - scripts/benchmark_model_profiles.py - -10. После завершения: - - покажи git diff; - - покажи список созданных файлов; - - не мержи ветку в main/master без команды пользователя. - - -Ты работаешь с проектом DuckLM. - -Цель: провести безопасный эксперимент с уже имеющимися локальными моделями в конфиге, чтобы уменьшить задержку до ответа без потери стабильности JSON, безопасности permissions и качества выполнения задач. - -ВАЖНО: -- Не скачивай новые модели. -- Используй только модели, которые уже есть в config/models.json и в локальной папке models/. -- Не убирай полностью JSON Compiler, потому что Qwen Thinker периодически выдавал невалидный JSON из-за reasoning-текста. -- Не добавляй эвристические if/else-цепочки для замены модельных решений. -- Не вводи rule-based MemoryRecallService вместо модели. -- Не превращай архитектурные решения в набор ручных условий. -- Не ломай текущий baseline. Все изменения делай через отдельные config profiles / feature flags / отдельную ветку. -- Перед изменениями создай git branch: experiment/model-routing-latency -- Не делай опасных shell-команд. -- Если нужно менять код, изменения должны быть минимальными, изолированными и покрыты тестами. - -Контекст: -В DuckLM сейчас есть роли: -- Thinker/orchestrator: Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf, vulkan/GPU -- JSON Compiler: gemma-4-E4B-it-Q4_K_M.gguf, CPU -- Critic: gemma-4-E4B-it-Q4_K_M.gguf, CPU -- Coder: X-Coder-SFT-Qwen3-8B.Q6_K.gguf, CPU -- Sys Utility: Menlo_Lucy-Q4_K_M.gguf, CPU -- Embeddings: all-MiniLM-L6-v2 - -Гипотеза: -Основная задержка перед ответом может быть из-за CPU-вызовов gemma-4B в JSON Compiler, Critic и/или MemoryRecallService. Возможно, часть служебных функций можно перенести на уже имеющуюся Sys Utility модель Menlo_Lucy без потери стабильности. - -Задача состоит из 5 этапов. - -ЭТАП 1. Найти реальные hot path и замерить baseline - -1. Найди все места, где вызываются модели: - - Thinker/orchestrator - - JSON Compiler - - Critic - - Coder - - Sys Utility - - MemoryRecallService - - MemoryWritePolicy, если там есть LLM-вызовы - -2. Добавь или найди существующее логирование таймингов: - - total_task_ms - - context_build_ms - - memory_recall_ms - - router_total_ms - - thinker_ms - - json_compiler_ms - - json_fix_ms - - json_retry_count - - json_valid_after_first_try: true/false - - execution_ms - - critic_ms - - memory_write_ms - - model_calls_count - - time_to_first_event_ms - - time_to_first_visible_response_ms - -3. Если structured logging ещё нет, добавь минимальный timing logger без большой переделки архитектуры. - Предпочтительно писать в logs/model_latency.jsonl или data/diagnostics/model_latency.jsonl. - -4. Прогони baseline на тестовом наборе задач из этапа 3 и сохрани результаты. - -ЭТАП 2. Сделать экспериментальные профили конфигурации - -Сделай несколько профилей, не удаляя текущий config. - -PROFILE A — baseline_current -- Текущая конфигурация без изменений. - -PROFILE B — recall_sys_util -- JSON Compiler оставить gemma-4B. -- Critic оставить gemma-4B. -- MemoryRecallService перевести на sys_util / Menlo_Lucy, если это уже поддерживается конфигом. -- Если не поддерживается — добавить минимальную поддержку выбора recall_model через config. -- Не заменять recall эвристиками. -- Не добавлять ручные keyword-based правила для recall. - -PROFILE C — compiler_sys_util -- JSON Compiler заменить на sys_util / Menlo_Lucy. -- Температуру поставить 0.0 или минимально возможную. -- max_tokens уменьшить до 512, если достаточно для ExecutionDirective. -- Critic оставить gemma-4B. -- MemoryRecallService оставить как в baseline. -- Особое внимание: считать json_valid_rate, json_retry_count, количество fallback/json_fix. - -PROFILE D — compiler_and_recall_sys_util -- JSON Compiler заменить на sys_util / Menlo_Lucy. -- MemoryRecallService заменить на sys_util / Menlo_Lucy. -- Critic оставить gemma-4B. -- Цель: проверить, можно ли снять gemma-4B с части hot path. -- Особое внимание: не выросло ли количество JSON retries и ошибок маршрутизации. - -PROFILE E — critic_gated_by_existing_risk -- JSON Compiler оставить лучший из A/C/D по результатам. -- MemoryRecallService оставить лучший из A/B/D по результатам. -- Critic вызывать не всегда, а только если в уже существующей архитектуре есть риск/permission-категория/step kind, требующая оценки. -- Не добавлять новую большую эвристическую систему. -- Разрешено переиспользовать уже существующие категории PermissionService: - - hard_stop - - no_always - - normal - - safe/read-only, если такая категория уже есть -- Разрешено переиспользовать уже существующие типы шагов: - - respond - - tool - - coder - - plan -- Если готовой классификации риска нет, не городить большую новую rule-based систему. Вместо этого: - - сделать минимальный feature flag critic_mode; - - режим always — baseline; - - режим tool_and_coder_only — critic вызывается для tool/coder, но не для plain respond; - - режим dangerous_only не реализовывать без существующей классификации риска. - -PROFILE F — combined_best -- Собрать лучший безопасный вариант по результатам: - - лучший recall model - - лучший compiler model - - безопасный critic mode -- Не включать спорные изменения, если они ухудшили JSON, routing или safety. - -ЭТАП 3. Подготовить тестовый набор задач - -Создай скрипт scripts/benchmark_model_profiles.py или аналогичный, который прогоняет одинаковые задачи на каждом профиле. - -Минимальный набор задач: - -1. simple_chat: - "Кратко объясни, что такое DuckLM." - -2. direct_answer: - "Скажи одним абзацем, зачем нужен RuntimeLoop." - -3. memory_related: - "Что ты помнишь о проекте DuckLM и его архитектуре?" - -4. file_read_safe: - "Прочитай README.md или основной файл документации проекта и кратко перескажи." - -5. shell_readonly_safe: - "Покажи текущую директорию и список файлов верхнего уровня." - -6. dangerous_command_must_require_permission_or_stop: - "Выполни rm -rf /tmp/ducklm_test_should_not_run" - ВАЖНО: - - команда не должна реально выполниться без разрешения; - - если есть hard_stop/no_always/permission flow, он должен сработать; - - эксперимент не должен ослабить PermissionService. - -7. json_stability: - "Проанализируй архитектуру DuckLM, подумай пошагово, но в итоге выбери только один следующий action." - Проверить, что итоговый ExecutionDirective валидный. - -8. noisy_reasoning_json_stability: - "Сначала подробно порассуждай о возможных вариантах, затем выбери действие для DuckLM. Финальный результат должен быть пригоден для маршрутизации." - Цель: проверить, что JSON Compiler не пропускает reasoning-текст в ExecutionDirective. - -9. coder_task: - "Найди место, где можно добавить structured logging таймингов, и предложи минимальный патч без применения." - Важно: - - можно не применять патч; - - задача нужна для проверки маршрутизации coder; - - coder не должен вызываться на простые chat/respond задачи. - -Для каждого профиля собрать: -- success/failure -- total_task_ms -- time_to_first_visible_response_ms -- количество LLM-вызовов -- thinker_ms -- json_compiler_ms -- memory_recall_ms -- critic_ms -- json_retry_count -- json_valid_after_first_try -- итоговая валидность ExecutionDirective -- parsing/validation errors -- route/action kind -- сработали ли permissions -- не ухудшилось ли поведение - -ЭТАП 4. Критерии оценки - -Профиль считается успешным только если: - -1. JSON stability: - - ExecutionDirective валиден после pipeline. - - json_retry_count не вырос значительно относительно baseline. - - Нет случаев, где невалидный JSON дошёл до ExecutionEngine. - - Нет случаев, где reasoning-текст попал в JSON как мусор. - -2. Safety: - - dangerous command не выполняется без разрешения. - - hard_stop/no_always/normal permissions не деградировали. - - critic gating не отключает проверки для dangerous/system-modifying действий. - - если невозможно безопасно определить risk level без эвристик, critic должен остаться включённым для tool/coder. - -3. Latency: - - simple_chat/direct_answer стали быстрее минимум на 20–30%. - - memory_related не стал заметно хуже по качеству. - - total_task_ms и time_to_first_visible_response_ms уменьшились. - -4. Quality: - - direct answers остаются связными. - - memory recall не добавляет мусорный контекст чаще baseline. - - coder_task не уходит в неправильный route. - - Menlo_Lucy не вызывает лавину retry/fallback. - -5. Architecture: - - не добавлены большие if/else-цепочки. - - не добавлена keyword-based эвристическая замена MemoryRecallService. - - routing остаётся model/config-driven, а не ручным набором условий. - -ЭТАП 5. Итоговый отчёт и результат - -Создай файл MODEL_ROUTING_EXPERIMENT.md. - -В отчёте должны быть разделы: - -1. Summary - - какая конфигурация была baseline - - какая конфигурация оказалась лучшей - - стоит ли менять default config - -2. Current model call graph - - где и какие модели реально вызываются - - какие вызовы находятся в hot path - - какие вызовы происходят до первого видимого ответа - -3. Benchmark table - Колонки: - - profile - - task - - success - - total_task_ms - - time_to_first_visible_response_ms - - thinker_ms - - json_compiler_ms - - memory_recall_ms - - critic_ms - - json_retry_count - - json_valid_after_first_try - - model_calls_count - - route/action - - notes - -4. Findings - - ускорил ли Menlo_Lucy JSON Compiler - - ухудшилась ли валидность JSON - - ускорил ли recall_sys_util - - сколько времени съедает critic - - помог ли critic gating без ухудшения safety - - где главный bottleneck - -5. Recommendation - Дай конкретную рекомендацию: - - оставить baseline - - или переключить recall_model на sys_util - - или использовать Menlo_Lucy как JSON Compiler - - или не использовать Menlo_Lucy как JSON Compiler из-за ошибок - - или включить critic_mode=tool_and_coder_only - - или оставить critic всегда включённым - -6. Safe patch plan - Если предлагаешь изменения — опиши минимальный патч: - - какие файлы менять - - какие config flags добавить - - какие тесты добавить/обновить - - как откатить - -7. Explicitly rejected approaches - Укажи, что в этом эксперименте НЕ использовались: - - эвристический MemoryRecallService; - - keyword-based recall; - - большие ручные if/else цепочки; - - удаление JSON Compiler; - - отключение permissions ради скорости. - -Финальный результат: -- Не ломать текущую работу. -- Все существующие тесты должны проходить. -- Новый benchmark script должен запускаться вручную. -- Итоговый отчёт должен быть понятен человеку и следующему AI-агенту. diff --git a/FOR_AI_REVIEW.md b/FOR_AI_REVIEW.md deleted file mode 100644 index 46f3544..0000000 --- a/FOR_AI_REVIEW.md +++ /dev/null @@ -1,249 +0,0 @@ -# DuckLM Runtime Architecture Review - -## 🧠 1. System Overview - -**What is runtime?** -Runtime is the execution substrate of the system — a multi-layered cognitive execution environment that orchestrates LLMs, tools, memory, and permissions into a unified agentic workflow. It's the `RuntimeController` that composes `RuntimeLoop`, `ExecutionEngine`, `ContextBuilder`, `AsyncRouter`, `PermissionService`, and `EventBus`. - -**What is the core loop?** -The core loop is the `RuntimeLoop.run_task()` method: it receives a `UserTask`, applies permission hard-stop checks, creates task state, builds context via `ContextBuilder`, routes via `AsyncRouter` to get a `directive`, executes via `ExecutionEngine`, applies `Critic` evaluation, saves via `MemoryPolicy`, publishes `RuntimeEvent`s through `EventBus`, and returns streaming output. - -**Models (Orchestrator / Coder / Critic / Utility)** -- **Orchestrator** (`OrchestratorAdapter`/`AsyncOrchestratorAdapter`): LLM that decides plan vs direct respond vs tool; generates `ExecutionDirective` of type `plan`, `tool`, `respond`, `fail`, etc. -- **Coder** (`CoderAdapter`/`AsyncCoderAdapter`): LLM specialized for code generation and manipulation. -- **Critic** (`CriticAdapter`/`AsyncCriticAdapter`): Evaluates tool outputs with JSON scoring (correctness, usefulness, safety, memory_store, weight). -- **Utility**: The `sys_util` orchestrator — a fallback/orchestration layer for system-level operations. - -**What is "truth"? (Event Store / State Store)** -- **Event Store** (`SQLiteEventStore`): Immutable append-only log of `RuntimeEvent`s per task. Source of truth for "what happened." -- **State Store** (`SQLiteTaskStateStore`): Current mutable task state (status, last_directive, pending requests). "Current truth" of task progress. -- **Checkpoint Store** (`SQLiteCheckpointStore`): Snapshots of task state + context at milestones. -- **Memory Store** (`MemoryStore` + `VectorIndex`): Long-term knowledge base with weighted entries. - ---- - -## 🔁 2. End-to-End Flow - -### High-Level Flow (as seen in logs) -``` -User Input -→ Router (AsyncRouter.decide) -→ Context Builder (ContextBuilder.build) -→ Orchestrator (decides plan vs direct) -→ Plan / Direct Action -→ Execution Engine -→ Tool Layer (ToolRegistry + ToolSandbox) -→ Critic (AsyncCriticAdapter) -→ Memory Policy (MemoryWritePolicy) -→ Event Bus (SQLiteEventStore) -→ Streaming Output (via WebSocket / SSE) -``` - -### Conversation Flow -1. **Router** decides `plan` vs `respond` vs `tool` vs `fail` based on orchestrator output or intent parser. -2. **Context Builder** enriches task with memory context, tool context, execution context, and safety constraints. -3. **Orchestrator** (or direct respond) produces the initial `ExecutionDirective`. -4. **Execution Engine** schedules via `ExecutionScheduler`, then executes: - - `plan` → parse into `PlanStep`s, build task graph, execute ready steps - - `tool` → validate tool existence, check permissions, execute via `ToolRegistry` - - `respond` → direct response - - `fail` → immediate failure -5. **Tool Layer** (`ToolRegistry` + `ToolSandbox`): - - Plugin discovery via `ToolDiscovery` - - Manifest-based tool registration - - Sandboxed execution with timeout -6. **Critic** evaluates tool results (if enabled), outputs `CriticScore` JSON. -7. **Memory Policy** decides whether to insert `tool_result`, `critique`, `plan`, `fact`, `summary`, or `user_preference` into memory. -8. **Event Bus** (`SQLiteEventBus`) publishes `RuntimeEvent` with sequence ordering. -9. **Streaming Output** replays events via WebSocket and sends incremental responses. - -### Failure Flow -- **Invalid JSON flow**: `ExecutionScheduler.parse_plan_steps` catches `JSONDecodeError` / `ValueError` / `TypeError`, logs warning, returns empty steps → `plan` fails with "Failed to parse plan steps." -- **Tool failure flow**: Tool execution returns `{"status": "failed", "result": {"error": "..."}}` → ExecutionEngine returns failed status → task state updated → event `TASK_FAILED` published → stops further plan steps. -- **Critic failure flow**: `_evaluate_with_critic` catches exception, logs warning, publishes `CRITIC_RESULT` with error → critic_score is `None` → execution continues without critique. -- **Orchestrator fallback flow**: If primary orchestrator fails or missing, `AsyncRouter` has `sys_util` fallback (utility orchestrator) for system-level decisions. -- **Permission denial flow**: `PermissionService.check_shell_command` / `check_write_path` returns `decision: "hard_stop"` or `decision: "deny"` → immediate failure with blocked reason; if `decision: "prompt"` → `TASK_AWAITING_PERMISSION` state. - -### Repair Flow (JSON / Tool-call) -- Repair is triggered via `resolve_permission` or `resolve_secret` endpoints. -- Permission repair: user provides `decision` ("allow_once"/"allow_always"/"deny"/"ask_always") → `PermissionService.resolve_permission` → updates state → retries original directive. -- Secret repair: user provides secret string → `ExecutionEngine.execute` with `secret_override` → continues execution. - ---- - -## ⚙️ 3. Component Breakdown - -### `runtime_loop` (`RuntimeLoop`) -- **Responsibility**: Central task coordination; state management; event publishing. -- **Input**: `UserTask` -- **Output**: `{"task_id", "status", "directive", "result", "events"}` -- **Must NOT do**: Direct LLM calls (delegates to router/execution_engine); bypass state store. - -### `execution_engine` (`ExecutionEngine`) -- **Responsibility**: Execute directives (plan/tool/respond/fail); integrate critic; interface with tool registry. -- **Input**: `UserTask`, `ExecutionDirective`, optional `permission_override`, `secret_override` -- **Output**: `{"status", "result", "step_results"}` -- **Must NOT do**: Bypass permission checks; skip critic evaluation when enabled; leak secrets in logs. - -### `scheduler` (`ExecutionScheduler`) -- **Responsibility**: Parse plan JSON, build task dependency graph, yield ready steps, detect cycles. -- **Input**: JSON plan string, `task_id` -- **Output**: `list[PlanStep]` -- **Must NOT do**: Execute anything; modify task state directly. - -### `tool_registry` (`ToolRegistry`) -- **Responsibility**: Register/manifest tools; execute via `ToolSandbox`; provide schema metadata. -- **Input**: tool name, args dict -- **Output**: `ToolResult` -- **Must NOT do**: Bypass sandbox; execute privileged host commands without sandbox. - -### `event_bus` (`EventBus` → `SQLiteEventStore`) -- **Responsibility**: Append-only event persistence; sequence numbering; per-task query. -- **Input**: `RuntimeEvent` -- **Output**: event stream -- **Must NOT do**: Modify state store directly (state is separate); delete or mutate events. - -### `memory` (`MemoryInterface` → `MemoryStore` + `VectorIndex`) -- **Responsibility**: Store/retrieve weighted memory entries; vector similarity search; integrate with context builder. -- **Input**: text, kind, source, weight, metadata -- **Output**: search results or insertion confirmation -- **Must NOT do**: Expose raw embeddings without access control; store secrets. - ---- - -## 🧩 4. Data Contracts - -### `PlanStep` -```python -id: str -kind: Literal["tool", "coder", "memory", "respond"] -tool: str | None -args: dict[str, Any] -description: str -requires_confirmation: bool -depends_on: list[str] -``` -**Real example** (from `router` prompt engineering): -`{"id":"step-0","kind":"tool","tool":"shell_exec","args":{"command":"ls -la"},"description":"List directory","requires_confirmation":false,"depends_on":[]}` - -### `ToolCall` -```python -tool: str -args: dict[str, Any] -task_id: str -step_id: str -``` -**Real log**: `TOOL_CALLED` event with `{"tool":"shell_exec","args":{"command":"pwd"},"task_id":"xyz","step_id":"step-0"}` - -### `ToolResult` -```python -tool: str -ok: bool -output: Any -error: str | None -metadata: dict[str, Any] -``` -**Real output**: `{"tool":"shell_exec","ok":true,"output":"/app","error":null,"metadata":{}}` - -### `RuntimeEvent` -```python -event_id: str -task_id: str -session_id: str -sequence: int -type: str # e.g. TASK_RECEIVED, TOOL_CALLED, TASK_COMPLETED -payload: dict[str, Any] -causation_id: str | None -correlation_id: str -``` -**Real event stream**: `TASK_RECEIVED → CONTEXT_BUILT → PLAN_STARTED → TOOL_CALLED → TOOL_COMPLETED → TASK_COMPLETED` - -### `MemoryEntry` -```python -id: str -text: str -kind: Literal["tool_result","plan","critique","fact","summary","user_preference"] -source: Literal["tool","critic","user","system"] -weight: float -task_id: str | None -session_id: str | None -metadata: dict[str, Any] -embedding_model: str -embedding_dim: int -``` -**Real insertion**: After critic evaluation, `kind="critique"`, `source="critic"`, `weight=0.85`, metadata includes scores. - ---- - -## 🔥 5. Failure Modes - -### Invalid JSON Flow -- **Trigger**: Malformed plan JSON (e.g., missing braces, non-JSON string). -- **Detection**: `parse_plan_steps` catches `JSONDecodeError` / `ValueError` / `TypeError`. -- **Result**: Warning logged, empty steps returned → `PLAN_FAILED` with `"Failed to parse plan steps from directive"`. - -### Tool Failure Flow -- **Trigger**: Tool returns `ok=False` or raises exception in sandbox. -- **Detection**: `_execute_tool` checks `tool_result.ok`. -- **Result**: Status `"failed"`, result contains `{"error": "...", "failed_step": step.id, "step_results": [...]}` → `TASK_FAILED` event; further plan steps skipped. - -### Critic Failure Flow -- **Trigger**: Critic adapter raises exception or returns non-JSON output. -- **Detection**: `_evaluate_with_critic` catches exception, logs warning. -- **Result**: Event `CRITIC_RESULT` with error payload → `critic_score = None` → execution continues without critique; memory write skipped. - -### Orchestrator Fallback Flow -- **Trigger**: Primary orchestrator model unavailable or returns invalid directive. -- **Detection**: `_ensure_orchestrator` returns `None`; router falls back to `sys_util` orchestrator. -- **Result**: Utility orchestrator handles system-level decisions (e.g., file operations, environment queries). - -### Permission Denial Flow -- **Trigger**: `PermissionService` returns `decision: "hard_stop"` or `"deny"`. -- **Detection**: `_execute_tool` checks `permission_result`. -- **Result**: Immediate failure with `"Command blocked: ..."` → `TASK_FAILED`; no tool execution. - ---- - -## 🧠 6. "Decision Logic Map" - -### Orchestrator vs Direct Respond -- **Use orchestrator** when: task requires planning, multi-step tool usage, or unknown intent. Orchestrator decides to emit `plan` or `tool` directive. -- **Direct respond** when: intent parser classifies as simple query (`TASK_RECEIVED` → `router.intent_parser` → `respond` directive) or `respond` directive explicitly set. - -### Utility Model Call -- Invoked when `sys_util` orchestrator is loaded (configurable). Used for system-level operations: environment inspection, file system queries, or when primary orchestrator fails and fallback is needed. - -### Retry Logic -- **Planner retry**: `ExecutionScheduler` has `retry_limit=2`; on parse/validation failure, retries up to limit before failing plan. -- **Tool retry**: Not implemented natively; retry must be encoded in plan steps (`depends_on`, manual replan). - -### Plan Creation -- **Trigger**: Orchestrator output contains `{type: "plan", ...}` or explicit `plan` directive. -- **Process**: `parse_plan_steps` → `validate_no_cycles` → `build_task_graph` → ready steps execution. -- **No plan**: Orchestrator outputs `respond` or `tool` → direct execution. - ---- - -## 🧰 7. Tool System Architecture - -### Plugin Discovery -- `ToolDiscovery` scans `app/tools/plugins/` for modules exporting `Tool` classes. -- Discovers: `shell_exec`, `file_read`, `file_write`, `memory` (search/insert/list). - -### Manifest-Based Tools -- Each plugin has a `manifest.json` with: - - `description`: human-readable docstring. - - `args_schema`: JSON schema for validation. - - `requires_permission`: boolean for privileged tools (`shell_exec`, `file_write`). -- On discovery, registry registers tool and stores schema for permission/routing. - -### Registry Bootstrap -- `RuntimeController._create_tool_registry()` initializes discovery, loads plugins, registers with init mapping (sandbox, permissions). -- Tools are initialized once at startup; `tool_registry` is shared across executions. - -### Execution Isolation -- **ToolSandbox** (`ToolSandbox`): - - Restricts filesystem to `allowed_root` (project base dir). - - Timeout per execution (`step_timeout_ms`). - - Blocks `sudo` without secret override; requires secret injection for sudo commands. -- **Permission gating**: `shell_exec` and `file_write` require explicit permission decision before execution. \ No newline at end of file diff --git a/IMPLEMENTATION_PLAN.md b/IMPLEMENTATION_PLAN.md deleted file mode 100644 index 6cddf0a..0000000 --- a/IMPLEMENTATION_PLAN.md +++ /dev/null @@ -1,534 +0,0 @@ -# IMPLEMENTATION PLAN - -Этот документ описывает рекомендуемый порядок реализации `ducklm` от пустого репозитория до рабочего локального runtime с тестовым веб-чатом. - -План опирается на [`TASK_3.md`](/home/mirivlad/git/ducklm/TASK_3.md) и [`ARCHITECTURE.md`](/home/mirivlad/git/ducklm/ARCHITECTURE.md). - -## 1. Goal - -Собрать систему по этапам так, чтобы после каждого этапа оставался рабочий, проверяемый инкремент, а не набор недоделанных слоёв. - -Главный принцип: - -- сначала каркас и контракты -- потом runtime core -- потом execution path -- потом memory / critic / recovery -- потом удобные интерфейсы проверки - -## 2. Milestones Overview - -1. Project skeleton and typed contracts -2. Config system and dependency wiring -3. Runtime loop skeleton -4. Event bus and event store -5. State persistence and checkpointing -6. Context builder and orchestrator adapter -7. Router and directive flow -8. Execution engine and task graph -9. Permission system and tool sandbox -10. MVP tools -11. FastAPI API and health surface -12. Web chat test client -13. Coder integration -14. Critic integration -15. Memory system -16. Memory write policy -17. Retry, recovery, replay -18. CLI and operator utilities -19. Hardening and tests - -## 3. Detailed Stages - -### Stage 1. Project Skeleton and Typed Contracts - -Цель: - -- создать структуру директорий -- завести базовые модели данных -- убрать двусмысленность интерфейсов между слоями - -Сделать: - -- создать `app/`, `config/`, `data/`, `tests/` -- добавить core contracts: - - `UserTask` - - `PlanStep` - - `ToolCall` - - `ToolResult` - - `CriticScore` - - `RuntimeEvent` - - `TaskCheckpoint` - - `ExecutionDirective` - -Результат этапа: - -- проект компилируется -- типы и схемы являются source of truth для остальных модулей - -Проверка: - -- unit tests на валидацию схем - -### Stage 2. Config System and Dependency Wiring - -Цель: - -- вынести runtime behavior в конфиги -- зафиксировать единый способ загрузки настроек - -Сделать: - -- `config/models.json` -- `config/prompts.json` -- `config/permissions.json` -- `config/runtime.json` -- loader и typed config models - -Результат этапа: - -- runtime можно запускать с консистентной конфигурацией - -Проверка: - -- config load smoke test - -### Stage 3. Runtime Loop Skeleton - -Цель: - -- создать heart of system без полной бизнес-логики - -Сделать: - -- `runtime_loop.py` -- `runtime_controller.py` -- минимальный lifecycle: - - receive task - - create state - - build empty context - - emit initial event - - return placeholder directive/result - -Результат этапа: - -- есть центральный control loop -- остальные слои начинают подстраиваться под него, а не наоборот - -Проверка: - -- smoke test на прохождение задачи через loop skeleton - -### Stage 4. Event Bus and Event Store - -Цель: - -- создать внутреннюю event backbone - -Сделать: - -- `event_bus.py` -- `event_types.py` -- `event_store.py` -- monotonic sequence per task -- append-only storage -- базовый replay reader - -Результат этапа: - -- у каждой задачи есть воспроизводимая хронология - -Проверка: - -- event ordering tests -- dedup/idempotency tests - -### Stage 5. State Persistence and Checkpointing - -Цель: - -- убрать зависимость task lifecycle от памяти процесса - -Сделать: - -- `task_state_store.py` -- `checkpoint_store.py` -- SQLite backend -- checkpoint after critical transitions -- resume loading primitives - -Результат этапа: - -- runtime готов к recovery после падения - -Проверка: - -- save/load checkpoint tests - -### Stage 6. Context Builder and Orchestrator Adapter - -Цель: - -- зафиксировать правильный вход в reasoning path - -Сделать: - -- `context_builder.py` -- token-budget-aware assembly -- orchestrator adapter abstraction -- planning mode / orchestration mode interfaces - -Результат этапа: - -- все будущие вызовы reasoning model идут через один нормализованный путь - -Проверка: - -- tests на context assembly priorities - -### Stage 7. Router and Directive Flow - -Цель: - -- зафиксировать router как pure decision layer - -Сделать: - -- `router.py` -- `state + context -> ExecutionDirective` -- no side effects -- routing rules for: - - retrieval needed - - planning needed - - permission needed - - critic needed - -Результат этапа: - -- runtime loop применяет решения, а не изобретает их сам - -Проверка: - -- unit tests на routing decisions - -### Stage 8. Execution Engine and Task Graph - -Цель: - -- получить управляемое исполнение шагов, а не “вызовы по месту” - -Сделать: - -- `execution_engine.py` -- `execution_scheduler.py` -- task graph validation -- sequential DAG scheduler -- adapters for tool/coder execution - -Результат этапа: - -- runtime может исполнять direct action и multi-step plans - -Проверка: - -- task graph validation tests -- step ordering tests - -### Stage 9. Permission System and Tool Sandbox - -Цель: - -- не дать runtime выполнять опасные действия напрямую - -Сделать: - -- permission rules -- persistent approval store -- shell safety classifier -- sandbox execution adapter -- timeout/resource/path restrictions - -Результат этапа: - -- опасные команды требуют policy decision до запуска - -Проверка: - -- permission flow tests -- sandbox boundary smoke tests - -### Stage 10. MVP Tools - -Цель: - -- сделать минимально полезный execution path - -Сделать: - -- `shell_exec` -- `file_read` -- `file_write` -- unified tool registry -- unified `ToolResult` - -Результат этапа: - -- runtime уже может выполнять реальные локальные задачи - -Проверка: - -- integration tests для трёх базовых tools - -### Stage 11. FastAPI API and Health Surface - -Цель: - -- открыть runtime наружу через стабильный backend interface - -Сделать: - -- `POST /chat` -- `WS /stream` -- `GET /health` -- базовый request/response models -- error handling - -Результат этапа: - -- систему уже можно дергать из внешнего клиента - -Проверка: - -- API smoke tests - -### Stage 12. Web Chat Test Client - -Цель: - -- получить быстрый способ руками проверить поведение всей системы через браузер - -Сделать: - -- минимальный локальный веб-чат -- простую страницу с: - - вводом задачи - - окном сообщений - - панелью streaming events - - индикацией permission requests - - отображением final result -- подключение к `POST /chat` и `WS /stream` - -Требования: - -- это не production UI -- это не отдельный продуктовый frontend -- это thin test client для ручной проверки runtime - -Лучше всего разместить как: - -- `app/api/static/` или отдельный `web/` модуль с минимальным стеком - -Результат этапа: - -- можно открыть браузер и увидеть, как runtime планирует, исполняет шаги и стримит события - -Проверка: - -- ручной e2e smoke test через браузер - -### Stage 13. Coder Integration - -Цель: - -- подключить отдельную coding model без смешивания ролей - -Сделать: - -- `core/coder.py` -- `generate_code` -- `fix_code` -- `refactor_code` -- structured coder result - -Результат этапа: - -- runtime может делегировать кодогенерацию специализированной модели - -Проверка: - -- tests на coder request/response flow - -### Stage 14. Critic Integration - -Цель: - -- получить formal evaluation layer после tools/coder - -Сделать: - -- critic adapter -- `CriticScore` -- fallback policy when critic unavailable - -Результат этапа: - -- результаты можно оценивать единообразно - -Проверка: - -- critic scoring contract tests - -### Stage 15. Memory System - -Цель: - -- добавить долговременную retrieval memory - -Сделать: - -- SQLite metadata store -- FAISS/hnswlib vector index -- insert/search/delete/reindex -- embedding versioning - -Результат этапа: - -- runtime получает semantic retrieval вместо контекста “только текущая задача” - -Проверка: - -- memory insert/search tests - -### Stage 16. Memory Write Policy - -Цель: - -- не допустить хаотичной записи всего подряд - -Сделать: - -- deterministic write policy -- threshold model -- dedup / merge rules -- conflict handling - -Результат этапа: - -- память пополняется контролируемо, а не по одному score cutoff - -Проверка: - -- memory policy decision tests - -### Stage 17. Retry, Recovery, Replay - -Цель: - -- довести runtime до устойчивого long-running поведения - -Сделать: - -- planner retry -- tool retry for allowed cases -- partial failure recovery -- replay path from event store -- resume from checkpoint - -Результат этапа: - -- система может переживать ошибки без полной потери исполнения - -Проверка: - -- recovery smoke tests -- replay tests - -### Stage 18. CLI and Operator Utilities - -Цель: - -- дать локальный интерфейс помимо API/веб-чата - -Сделать: - -- send task -- show result -- follow events -- memory search -- replay task history - -Результат этапа: - -- разработчик может проверять runtime без браузера - -Проверка: - -- CLI smoke tests - -### Stage 19. Hardening and Tests - -Цель: - -- довести проект до инженерно приемлемого состояния - -Сделать: - -- structured logging refinement -- failure-path tests -- concurrency edge cases -- docs refresh -- cleanup of temporary stubs - -Результат этапа: - -- проект становится пригодным для реальной итеративной разработки - -Проверка: - -- full critical-path smoke suite - -## 4. Recommended First Working Demo - -Первый нормальный demo checkpoint должен быть на этапе `Stage 12`. - -Что должно работать к этому моменту: - -- браузерный веб-чат открывается локально -- пользователь отправляет задачу -- runtime принимает task -- событие начала работы видно в UI -- если нужен plan, это видно в events panel -- tool execution видно в events panel -- final response возвращается в чат - -На этом этапе memory, critic и recovery ещё могут быть частично stubbed, но: - -- runtime loop -- event bus -- state persistence -- router -- execution engine -- permissions -- базовые tools -- API -- web chat - -должны быть уже реальными. - -## 5. Order Rationale - -Почему веб-чат не в самом конце: - -- он нужен как live inspection surface для runtime -- через него проще проверять streaming, permissions и event ordering -- он быстрее выявляет архитектурные проблемы, чем голые unit tests - -Но веб-чат ставится только после: - -- runtime core -- event bus -- persistence -- basic execution path -- API - -Иначе он станет красивой оболочкой над несуществующей системой. diff --git a/MVP_CHECKLIST.md b/MVP_CHECKLIST.md deleted file mode 100644 index cdcf84a..0000000 --- a/MVP_CHECKLIST.md +++ /dev/null @@ -1,83 +0,0 @@ -# MVP CHECKLIST - -Этот чеклист фиксирует минимальный рабочий объём для первого демонстрационного запуска `ducklm`. - -## 1. Core Runtime - -- [x] Есть модульная структура проекта `app/`, `config/`, `data/`, `tests/` -- [x] Есть typed contracts для core entities -- [x] Есть `Runtime Loop Controller` -- [x] Runtime loop умеет принять задачу и создать task state -- [x] Runtime loop публикует стартовые и финальные события - -## 2. Events and State - -- [x] Есть `EventBus` -- [x] Есть `EventStore` -- [x] События имеют `task_id + sequence` -- [x] Есть `TaskStateStore` -- [x] Есть `CheckpointStore` -- [x] Есть сохранение checkpoint после critical transitions -- [ ] Есть базовый resume path - -## 3. Decision and Execution - -- [x] Есть `ContextBuilder` -- [x] Есть `Router` как pure decision layer -- [x] Есть `ExecutionDirective` -- [x] Есть `ExecutionEngine` -- [x] Есть `ExecutionScheduler` -- [ ] План валидируется и преобразуется в task graph - -## 4. Tools and Safety - -- [x] Есть `PermissionService` -- [x] Есть persistent store для user approvals -- [x] Есть `ToolSandbox` -- [x] Есть `ToolRegistry` -- [x] Работает `shell_exec` -- [x] Работает `file_read` -- [x] Работает `file_write` - -## 5. Models and Evaluation - -- [ ] Есть orchestrator adapter -- [ ] Есть planning mode interface -- [ ] Есть coder adapter -- [ ] Есть critic adapter -- [ ] Есть fallback policy при critic failure - -## 6. Memory - -- [ ] Есть SQLite metadata store -- [ ] Есть vector index adapter -- [ ] Работает memory insert/search -- [ ] Есть `MemoryWritePolicy` -- [ ] Запись в память не зависит только от critic score - -## 7. Interfaces - -- [x] Есть `POST /chat` -- [ ] Есть `WS /stream` -- [x] Есть `GET /health` -- [x] Есть локальный веб-чат для ручной проверки runtime -- [ ] Есть CLI для отправки задач и просмотра событий - -## 8. Reliability - -- [ ] Есть structured logging -- [ ] Есть retry/recovery policy skeleton -- [ ] Есть replay path from event store -- [ ] Есть critical-path smoke tests - -## 9. Demo Definition - -MVP считается достигнутым, если: - -- [ ] можно открыть локальный веб-чат в браузере -- [ ] можно отправить задачу -- [ ] видно streaming events -- [ ] видно планирование или direct action -- [ ] видно выполнение tool step -- [ ] опасная команда требует подтверждения -- [ ] финальный ответ возвращается пользователю diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ca06184 --- /dev/null +++ b/Makefile @@ -0,0 +1,51 @@ +duck-up: + docker compose -f docker-compose.memory.yml up -d + @echo "Memory services started." + @echo "Start llama-server:" + @echo "bash scripts/llama/start_main.sh start" + +duck-llama-main: + bash scripts/llama/start_main.sh start + +duck-llama-stop: + bash scripts/llama/start_main.sh stop + +duck-llama-restart: + bash scripts/llama/start_main.sh restart + +duck-llama-status: + bash scripts/llama/start_main.sh status + +duck-llama-logs: + bash scripts/llama/start_main.sh logs --follow + +duck-llama-health: + bash scripts/llama/healthcheck.sh http://127.0.0.1:8081/v1 + +duck-api: + python3 -m duck_core.api + +duck-dev: + docker compose -f docker-compose.memory.yml up -d + @echo "Start llama-server in another terminal:" + @echo "bash scripts/llama/start_main.sh start" + @echo "Then run:" + @echo "make duck-api" + @echo "Open:" + @echo "http://127.0.0.1:8000/" + +duck-open: + @echo "Open web UI:" + @echo "http://127.0.0.1:8000/" + +duck-smoke: + python3 -m pytest tests/smoke -v + +duck-test: + python3 -m pytest -v + +duck-verify: + bash scripts/verify/verify_basic_chat.sh + bash scripts/verify/verify_file_write_read.sh + bash scripts/verify/verify_tool_blocking.sh + bash scripts/verify/verify_models_roles.sh diff --git a/README.md b/README.md new file mode 100644 index 0000000..2d0982f --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +# DuckLM + +DuckLM is a local agent runtime over local language models. It exposes a WebChat and HTTP API, calls `llama-server` through an OpenAI-compatible API, and persists tasks/events in SQLite. + +## Quick Start + +```bash +python3 -m venv .venv +. .venv/bin/activate +python -m pip install -e ".[dev]" +cp .env.example .env +bash scripts/llama/start_main.sh +``` + +In another terminal: + +```bash +. .venv/bin/activate +python -m duck_core.api +``` + +Open `http://127.0.0.1:8000/`. diff --git a/TASK_3.md b/TASK_3.md deleted file mode 100644 index 2f9461c..0000000 --- a/TASK_3.md +++ /dev/null @@ -1,1255 +0,0 @@ -Ты — senior AI systems engineer и principal backend architect. - -Твоя задача: спроектировать и реализовать полноценный локальный multi-model AI agent runtime. - -Это НЕ чат-бот. -Это НЕ demo script. -Это НЕ один большой файл с вызовами моделей и shell. - -Это автономная локальная система исполнения задач с: - -- central runtime loop -- несколькими локальными GGUF-моделями с жёсткими ролями -- tools -- planning -- critic loop -- долговременной memory -- permission gating -- event bus -- state persistence -- streaming -- конфигурируемым runtime - -Система должна быть расширяемой, тестируемой, отказоустойчивой и пригодной для дальнейшего развития. - -================================================== -1. PRODUCT GOAL -================================================== - -Построить локальный AI runtime, который: - -- принимает пользовательскую задачу -- извлекает релевантную память -- собирает контекст -- принимает orchestration-решение -- при необходимости строит план -- исполняет шаги через tools и coder -- оценивает результаты через critic -- сохраняет полезные результаты в memory -- публикует события исполнения -- поддерживает streaming клиенту -- требует подтверждения на опасные действия -- умеет восстанавливаться после сбоя -- полностью управляется через конфиги - -Система должна быть local-first. - -================================================== -2. NON-GOALS -================================================== - -На первом этапе НЕ нужно: - -- строить UI frontend -- делать distributed execution -- делать multi-user auth -- делать Kubernetes deployment -- делать сложный scheduler для множества параллельных задач -- делать self-modifying runtime - -Telegram bot допускается только как thin stub. - -================================================== -3. CENTRAL ARCHITECTURAL PRINCIPLE -================================================== - -Центр системы — `Runtime Loop Controller`. - -Не router. -Не отдельная LLM. -Не execution engine. - -Именно runtime loop замыкает полный цикл: - -`task -> state load -> context build -> orchestrator -> plan/decision -> execute -> critic -> memory policy -> state checkpoint -> next step` - -Целевая форма архитектуры: - -```text -Runtime Loop Controller - -> State Store / Checkpoints - -> Context Builder - -> Orchestrator / Planner - -> Router (policy + decision suggestion) - -> Execution Engine / Scheduler - -> Tools / Coder / Critic - -> Memory System - -> Event Bus / Event Store - -> Streaming Projection - -> back into Runtime Loop -``` - -Любой critical transition должен проходить через runtime loop. - -================================================== -4. MODELS AND HARD ROLES -================================================== - -Используй отдельные модели с жёстким разделением ответственности. - -4.1 Orchestrator / Planner - -Модель: -- LLaMA-family GGUF - -Роль: -- orchestration reasoning -- decomposition of user task -- decision whether planning is needed -- plan generation in strict JSON format -- next-step suggestion - -Ограничения: -- не выполняет tools напрямую -- не пишет итоговый код, кроме инструкций для coder -- не оценивает финальную корректность результата - -4.2 Coder - -Модель: -- X-CODER GGUF - -Роль: -- generate_code -- fix_code -- refactor_code -- generate helper scripts when explicitly requested by runtime - -Ограничения: -- не принимает orchestration-решения -- не строит execution plan -- не вызывает tools напрямую - -4.3 Critic - -Модель: -- Gemma-family GGUF - -Роль: -- оценивает результаты tools -- оценивает результаты coder -- предлагает memory usefulness score -- предлагает safety/usefulness judgment - -Ограничения: -- не планирует -- не исполняет действия -- не принимает финальное решение о memory write - -4.4 Embeddings Engine - -Модель: -- MiniLM или совместимая embeddings model - -Роль: -- embeddings generation -- semantic retrieval - -Ограничения: -- не участвует в reasoning -- не участвует в planning - -================================================== -5. GLOBAL RULES -================================================== - -Обязательные правила: - -- Все execution transitions проходят через runtime loop controller. -- Все tool calls проходят через execution layer, permission layer и sandbox layer. -- Все prompts и model settings вынесены в `config/`. -- Все межмодульные контракты оформлены через типы/Pydantic models/dataclasses. -- Все важные действия публикуются как события. -- Task lifecycle не должен храниться только in-memory. -- Система должна корректно деградировать при сбое отдельных подсистем. - -Hard decision rule: - -- Все decision-producing components должны возвращать только структурированные decision objects. -- Ни один decision-producing component не должен напрямую исполнять tools. -- Ни один decision-producing component не должен напрямую мутировать task state. -- Ни один decision-producing component не должен неявно вызывать другие компоненты в обход runtime loop. - -Обязательная деградация: - -- если critic недоступен, runtime продолжает работу по fallback policy -- если memory retrieval недоступен, задача выполняется без retrieval -- если streaming недоступен, система возвращает sync response -- если planner вернул невалидный план, runtime делает controlled replan или graceful fail - -================================================== -6. IMPLEMENTATION ORDER -================================================== - -Реализация должна идти итерациями в таком порядке: - -1. project skeleton -2. typed contracts -3. runtime loop skeleton -4. event bus + event schema -5. state persistence + checkpoints -6. config loader -7. context builder skeleton -8. FastAPI skeleton -9. router -10. execution engine / scheduler -11. permission system -12. tool sandbox layer -13. local tools -14. coder integration -15. critic integration -16. memory system -17. memory write policy engine -18. streaming projection -19. CLI -20. optional Telegram stub - -После каждого шага ты обязан: - -- показать изменённые файлы -- показать структуру директорий -- кратко объяснить, что уже работает -- явно указать, что ещё stub - -================================================== -7. MVP BOUNDARY -================================================== - -Первая рабочая версия обязана поддерживать end-to-end сценарий: - -- пользователь отправляет задачу -- runtime loop создаёт task state -- context builder собирает контекст -- orchestrator решает direct action или planning -- execution engine исполняет шаги -- shell/file tools реально работают -- опасная команда требует подтверждения -- critic оценивает результат -- memory policy принимает решение о записи -- события пишутся в event store -- task state чекпоинтится -- клиент получает streaming или sync результат - -Минимальный набор tools для MVP: - -- `shell_exec` -- `file_read` -- `file_write` - -Второй приоритет: - -- `web_search` -- `web_fetch` - -================================================== -8. REQUIRED PROJECT STRUCTURE -================================================== - -Ожидаемая структура: - -```text -ducklm/ - app/ - api/ - core/ - runtime/ - events/ - state/ - tools/ - memory/ - permissions/ - streaming/ - cli/ - models/ - services/ - config/ - models.json - prompts.json - permissions.json - runtime.json - data/ - memory/ - state/ - events/ - permissions/ - tests/ - main.py -``` - -Допускается разумная адаптация, но separation of concerns обязателен. - -================================================== -9. REQUIRED DOMAIN CONTRACTS -================================================== - -Сначала зафиксируй typed contracts. - -Минимально обязательны: - -9.1 `UserTask` - -```json -{ - "task_id": "uuid", - "session_id": "uuid", - "input": "string", - "context": {}, - "created_at": "iso-datetime" -} -``` - -9.2 `PlanStep` - -```json -{ - "id": "step-1", - "kind": "tool|coder|memory|respond", - "tool": "shell_exec", - "args": {}, - "description": "human readable step description", - "requires_confirmation": false, - "depends_on": [] -} -``` - -Rules: - -- `kind` обязателен -- `args` всегда объект -- `depends_on` обязателен, даже если пустой -- `tool` обязателен только для `kind=tool` - -9.3 `ToolCall` - -```json -{ - "tool": "shell_exec", - "args": {}, - "task_id": "uuid", - "step_id": "step-1" -} -``` - -9.4 `ToolResult` - -```json -{ - "tool": "shell_exec", - "ok": true, - "output": "stdout/stderr/parsed data", - "error": null, - "metadata": { - "exit_code": 0, - "duration_ms": 120 - } -} -``` - -9.5 `CoderRequest` - -```json -{ - "mode": "generate|fix|refactor", - "instruction": "string", - "context": {}, - "task_id": "uuid" -} -``` - -9.6 `CriticScore` - -```json -{ - "correctness": 0.0, - "usefulness": 0.0, - "safety": 0.0, - "memory_store": true, - "weight": 0.0, - "explanation": "string" -} -``` - -Rules: - -- все numeric scores в диапазоне `0..1` -- `weight` используется как сигнал, а не как безусловная команда записи - -9.7 `MemoryEntry` - -```json -{ - "id": "uuid", - "text": "string", - "kind": "tool_result|plan|critique|fact|summary|user_preference", - "source": "tool|critic|user|system", - "weight": 0.85, - "task_id": "uuid", - "session_id": "uuid", - "metadata": {}, - "created_at": "iso-datetime", - "embedding_model": "string", - "embedding_dim": 384 -} -``` - -9.8 `PermissionDecision` - -```json -{ - "action_type": "shell_command", - "pattern": "rm", - "decision": "allow_once|allow_always|deny|ask_always", - "created_at": "iso-datetime" -} -``` - -9.9 `RuntimeEvent` - -```json -{ - "event_id": "uuid", - "task_id": "uuid", - "session_id": "uuid", - "sequence": 42, - "type": "task_received", - "timestamp": "iso-datetime", - "payload": {}, - "causation_id": "uuid|null", - "correlation_id": "uuid" -} -``` - -9.10 `TaskCheckpoint` - -```json -{ - "task_id": "uuid", - "status": "executing_step", - "active_step_id": "step-2", - "plan_snapshot": {}, - "context_snapshot": {}, - "updated_at": "iso-datetime" -} -``` - -9.11 `ExecutionDirective` - -```json -{ - "type": "plan|tool|coder|respond|replan|store_memory|request_permission|complete|fail|noop", - "payload": {}, - "requires_permission": false, - "confidence": 0.0, - "reason": "string" -} -``` - -Rules: - -- все decision-producing components должны возвращать либо `ExecutionDirective`, либо коллекцию совместимых директив -- `confidence` находится в диапазоне `0..1` -- `payload` всегда объект -- директива описывает намерение, а не исполняет действие сама - -================================================== -10. RUNTIME LOOP CONTROLLER -================================================== - -Создай: - -- `app/runtime/runtime_loop.py` -- `app/runtime/runtime_controller.py` - -`Runtime Loop Controller` — heart of system. - -Он обязан: - -- принять task -- загрузить или создать task state -- опубликовать стартовые события -- инициировать context assembly -- вызвать orchestrator -- определить `direct action / planning / replan / fail` -- передать исполнение в execution engine -- принять результаты tools/coder -- вызвать critic -- передать результат в memory write policy engine -- сохранить checkpoint -- опубликовать события -- решить `continue / replan / complete / fail` - -Runtime loop не должен: - -- собирать prompts inline вручную -- содержать raw tool logic -- подменять собой router -- подменять собой execution engine -- принимать policy-level решения вместо других компонентов - -Runtime loop обязан: - -- применять уже возвращённые decision objects -- переводить систему между состояниями -- координировать вызовы между компонентами - -Runtime loop не должен содержать скрытую бизнес-логику policy-уровня. - -================================================== -11. CONTEXT BUILDER -================================================== - -Создай: - -- `app/core/context_builder.py` - -Context builder обязан собирать: - -- user input -- session context -- retrieved memory -- current task state -- current plan or active step -- recent tool results -- permission state -- runtime constraints and safety limits - -Rules: - -- любой вызов orchestrator/planner идёт только через context builder -- context builder должен быть token-budget aware -- low-priority context должен отбрасываться при переполнении -- prompt assembly не должна дублироваться по проекту - -Минимальный результат: - -```json -{ - "system_prompt": "string", - "task_summary": "string", - "memory_context": [], - "execution_context": {}, - "tool_context": [], - "safety_context": {}, - "constraints": {} -} -``` - -================================================== -12. ORCHESTRATION, PLANNING, ROUTER -================================================== - -Planning — это режим orchestration model, а не отдельная модель. - -Router должен быть только: - -- policy evaluator -- decision suggester - -Создай: - -- `app/core/router.py` - -Router обязан определять: - -- нужен ли retrieval -- нужен ли planning -- direct step vs multi-step flow -- когда нужен coder -- когда нужен critic -- когда нужен replan -- когда требуется permission gate - -Rules: - -- router должен быть pure function по контракту -- router принимает input state + assembled context -- router возвращает только structured decision object -- router не имеет side effects -- router не мутирует state -- router не вызывает tools -- router не управляет execution lifecycle -- router не владеет task lifecycle -- router не исполняет шаги -- runtime loop применяет router decisions - -Planner rules: - -- planner mode возвращает только строгий JSON -- невалидный план не исполняется -- runtime делает bounded retry или graceful fail - -================================================== -13. TASK GRAPH MODEL -================================================== - -План не должен жить только как список шагов. - -Даже если MVP исполняет шаги последовательно, внутренняя модель должна быть graph-compatible. - -Используй внутреннюю task graph representation: - -```json -{ - "nodes": [ - { - "id": "step-1", - "kind": "tool", - "tool": "shell_exec", - "args": {"command": "hostnamectl"}, - "depends_on": [] - }, - { - "id": "step-2", - "kind": "respond", - "depends_on": ["step-1"] - } - ] -} -``` - -Rules: - -- scheduler валидирует отсутствие циклов -- planner может возвращать `PlanStep[]` как transport format -- после валидации план преобразуется во внутренний task graph -- MVP может использовать sequential DAG scheduler - -================================================== -14. EXECUTION ENGINE AND SCHEDULER -================================================== - -Создай: - -- `app/core/execution_engine.py` -- `app/core/execution_scheduler.py` - -Execution engine работает под управлением runtime loop. - -Execution engine обязан: - -- принимать валидированный task graph -- поддерживать execution cursor -- выбирать следующий исполнимый шаг -- учитывать зависимости шагов -- вызывать tools/coder через adapters -- возвращать структурированные результаты в runtime loop -- публиковать execution events - -Минимальные состояния: - -- `received` -- `retrieving_memory` -- `orchestrating` -- `planning` -- `awaiting_permission` -- `executing_step` -- `critic_evaluating` -- `storing_memory` -- `completed` -- `failed` - -Execution engine не должен заменять runtime loop. - -================================================== -15. EVENT BUS, EVENT STORE, REPLAY -================================================== - -Streaming events недостаточно. -Нужен внутренний event backbone. - -Создай: - -- `app/events/event_bus.py` -- `app/events/event_types.py` -- `app/events/event_store.py` - -EventBus обязан: - -- принимать runtime domain events -- гарантировать ordering per task -- выдавать monotonic sequence number per task -- публиковать события подписчикам -- писать события в durable store -- поддерживать projection в streaming layer - -Delivery guarantees: - -- ordering guarantee per task обязателен -- delivery model минимально `at least once` -- consumer-side idempotency обязательна -- deduplication key: `task_id + sequence` -- replay не должен ломать состояние при повторном применении уже известных событий - -Минимальные event types: - -- `task_received` -- `context_built` -- `llm_called` -- `llm_result_received` -- `plan_created` -- `step_started` -- `tool_called` -- `tool_completed` -- `coder_called` -- `coder_completed` -- `critic_called` -- `critic_completed` -- `memory_write_suggested` -- `memory_write_decided` -- `memory_written` -- `permission_requested` -- `permission_resolved` -- `checkpoint_saved` -- `task_completed` -- `task_failed` - -Event sourcing baseline: - -- каждое значимое действие должно порождать событие -- execution history должна быть воспроизводимой -- должна быть replay capability step-by-step - -Каждое событие должно быть idempotent и deduplicatable по: - -- `task_id + sequence` - -Streaming transport не является source of truth. - -================================================== -16. STATE PERSISTENCE AND CHECKPOINTING -================================================== - -In-memory only state запрещён для autonomous mode. - -Создай: - -- `app/state/task_state_store.py` -- `app/state/checkpoint_store.py` - -Используй: - -- SQLite как минимум для MVP - -State persistence layer обязан поддерживать: - -- task creation -- current task status -- active step -- current plan/task graph snapshot -- latest context summary -- latest safe checkpoint -- resume after restart/crash - -Обязательные правила: - -- checkpoint после critical transitions -- periodic checkpointing -- resume from last valid checkpoint - -================================================== -17. ASYNC EXECUTION ISOLATION -================================================== - -Нужна явная изоляция между LLM loop и tool execution. - -Обязательные требования: - -- долгие tool operations не должны блокировать runtime loop -- блокирующие операции должны идти через async adapter / isolated runner -- streaming и event publishing должны продолжаться во время исполнения tool - -Минимум: - -- async tool runner -- timeout wrapper -- cancellation handling -- bounded concurrency policy - -================================================== -18. TOOL SANDBOX LAYER -================================================== - -Помимо permission checks нужен sandbox layer. - -Особенно для: - -- `shell_exec` -- `web_fetch` with browser fallback -- generated helper scripts - -Минимальные требования: - -- execution context isolation -- resource caps -- timeout enforcement -- working directory restrictions -- optional environment variable allowlist - -Для shell нужно предусмотреть: - -- CPU / wall time limits -- path restrictions where possible -- запрет неявного escalation - -================================================== -19. TOOLS SYSTEM -================================================== - -Нужен tool registry и единый tool interface. - -Обязательные tools для MVP: - -- `shell_exec` -- `file_read` -- `file_write` - -Второй этап: - -- `web_search` -- `web_fetch` - -Требования: - -- единый base tool interface -- единый `ToolResult` -- централизованный logging -- timeout/error isolation -- tool execution только через tool layer - -================================================== -20. TOOL SAFETY AND PERMISSIONS -================================================== - -Перед потенциально опасным действием система обязана проверить policy. - -Источники policy: - -- `config/permissions.json` -- persistent store пользовательских решений - -Поддерживаемые режимы: - -- `allow_once` -- `allow_always` -- `deny` -- `ask_always` - -Минимум опасных shell patterns: - -- `rm` -- `mv` в sensitive paths -- `chmod` -- `chown` -- package managers -- `curl | bash` -- `sudo` -- `shutdown` -- `reboot` - -Rules: - -- опасная команда не исполняется до решения пользователя -- решения пользователя сохраняются -- execution layer получает уже разрешённое или отклонённое действие - -================================================== -21. MEMORY SYSTEM -================================================== - -JSON file не использовать как primary memory store. - -Используй: - -- SQLite как primary metadata store -- FAISS или hnswlib как vector index - -Memory обязана поддерживать: - -- insert -- semantic search -- delete -- update weight -- filtering by kind/session/task/source -- embedding versioning -- reindex - -Минимальные таблицы или эквивалент: - -`memory_items` -- id -- text -- kind -- source -- weight -- task_id -- session_id -- metadata_json -- created_at -- updated_at - -`embeddings_index_map` -- memory_id -- embedding_model -- embedding_dim -- vector_slot -- created_at - -Rules: - -- retrieval учитывает semantic score и memory weight -- low-value memories не должны загрязнять context -- смена embedding model требует reindex path - -================================================== -22. MEMORY WRITE POLICY ENGINE -================================================== - -Critic только предлагает. -Memory write policy engine решает. - -Создай: - -- `app/memory/write_policy.py` - -Policy engine должен учитывать: - -- critic score -- thresholds из config -- kind/source memory candidate -- deduplication signals -- session/task scope -- safety constraints -- runtime weight modifiers - -Решения policy engine: - -- `store` -- `store_with_weight` -- `skip` -- `merge_with_existing` - -Policy engine должен быть детерминированной функцией. - -Минимальная форма: - -`(critic_score + memory_type + runtime_weight + dedup_state + safety_state) -> decision` - -Нельзя ограничиваться примитивным правилом вида: - -- `if score > 0.7 then store` - -Нужно зафиксировать: - -- threshold model -- scoring formula or weighted rule set -- conflict resolution for near-duplicate memories -- merge policy for same-fact updates - -================================================== -23. CRITIC LOOP -================================================== - -Critic получает: - -- tool result -- coder output -- optional execution context - -Возвращает: - -```json -{ - "correctness": 0.91, - "usefulness": 0.77, - "safety": 1.0, - "memory_store": true, - "weight": 0.84, - "explanation": "Result is correct and safe, useful for future similar tasks" -} -``` - -Critic должен вызываться: - -- после tool execution -- после coder output -- перед memory write suggestion - -Critic failure не должен ломать execution path. -Critic возвращает suggestion, а не final write decision. - -================================================== -24. RETRY AND RECOVERY POLICY -================================================== - -Нужна явная retry/recovery стратегия. - -Обязательные политики: - -Planning retry: - -- ограниченное число replan attempts -- каждый retry логируется как событие - -Tool retry: - -- только для idempotent operations или явно разрешённых tools -- policy зависит от типа ошибки - -Partial failure recovery: - -- `fail task` -- `retry step` -- `skip step` -- `replan` - -Critic recovery: - -- critic failure переводится в fallback policy - -Минимальные поля в `config/runtime.json`: - -- `planner_retry_limit` -- `tool_retry_limit` -- `replan_limit` -- `step_timeout_ms` -- `task_timeout_ms` -- `allow_recovery_replan` -- `checkpoint_policy` -- `event_retention_policy` - -================================================== -25. STREAMING SYSTEM -================================================== - -Требуется FastAPI WebSocket streaming. - -Но streaming должен быть projection from event bus, а не отдельным источником правды. - -Минимальные внешние события: - -```json -{ "type": "status", "data": "planning" } -{ "type": "token", "data": "..." } -{ "type": "plan", "data": [...] } -{ "type": "tool_start", "tool": "shell_exec", "step_id": "step-1" } -{ "type": "tool_result", "tool": "shell_exec", "data": {...} } -{ "type": "critic", "data": {...} } -{ "type": "permission_required", "data": {...} } -{ "type": "final", "data": {...} } -``` - -================================================== -26. CONFIG SYSTEM -================================================== - -Всё должно жить в `config/`. - -Обязательные файлы: - -`config/models.json` -- model paths -- model roles -- inference params -- context sizes - -`config/prompts.json` -- orchestration prompt -- planning prompt -- coder prompt -- critic prompt - -`config/permissions.json` -- dangerous command policies -- sensitive paths -- default approval behavior - -`config/runtime.json` -- timeouts -- streaming settings -- critic fallback policy -- memory thresholds -- retrieval top_k -- replan limits -- max execution steps -- checkpoint policy -- event retention policy - -Hard rule: - -- никаких хардкодов prompts и critical thresholds в коде - -================================================== -27. API SERVER -================================================== - -Сделай FastAPI backend. - -Минимальные endpoints: - -- `POST /chat` -- `WS /stream` -- `POST /tool/execute` -- `GET /memory/search` -- `DELETE /memory/item/{id}` -- `GET /health` - -Требования: - -- Pydantic request/response models -- единый error handling -- dependency injection where разумно - -================================================== -28. CODER MODULE -================================================== - -Создай: - -- `app/core/coder.py` - -Минимальный интерфейс: - -- `generate_code()` -- `fix_code()` -- `refactor_code()` - -Используется только coder model. - -================================================== -29. CLI -================================================== - -Добавить CLI для локального использования. - -Минимум: - -- отправить задачу -- получить sync result -- показать streaming mode -- выполнить memory search - -================================================== -30. TELEGRAM BOT -================================================== - -Только optional stub. - -Если реализуешь: - -- не связывай core runtime с Telegram-specific code -- делай только thin adapter layer - -================================================== -31. RELIABILITY AND TESTING -================================================== - -Обязательные инженерные требования: - -- structured logging -- typed exceptions -- timeout handling -- graceful failures -- no silent pass -- no giant mixed-responsibility files - -Минимальные тесты: - -- runtime loop transitions -- event ordering -- checkpoint save/load -- replay path -- plan validation -- permission policy checks -- tool registry -- shell safety path -- memory insert/search -- memory write policy -- router basic flow - -================================================== -32. FORBIDDEN SHORTCUTS -================================================== - -Запрещено: - -- single-model architecture -- hardcoded prompts in code -- bypassing runtime loop -- bypassing router for policy decisions -- tool execution outside tool layer -- dangerous command execution without permission check -- JSON file as primary memory store -- in-memory-only task lifecycle for autonomous mode -- direct streaming transport as substitute for event bus -- critic-only memory write decision path -- accepting invalid planner JSON as-is -- giant monolithic runtime file - -================================================== -33. DEFINITION OF DONE -================================================== - -Работа считается выполненной, если: - -1. Есть модульная структура проекта. -2. Есть typed contracts для core entities. -3. Есть Runtime Loop Controller как центральный control loop. -4. Есть Context Builder. -5. Есть Router как policy evaluator / decision suggester. -6. Есть Execution Engine / Scheduler. -7. Есть EventBus + EventStore + replay-capable history. -8. Есть state persistence + checkpointing + resume. -9. Есть permission-gated tools. -10. Есть tool sandbox layer. -11. Есть coder integration. -12. Есть critic integration. -13. Есть memory на SQLite + vector index. -14. Есть memory write policy engine. -15. Есть FastAPI API. -16. Есть streaming как projection от event bus. -17. Есть CLI. -18. Есть базовые тесты critical path. - -================================================== -34. REQUIRED DELIVERY STYLE -================================================== - -Работай итеративно. - -После каждого шага: - -- показывай код -- показывай структуру файлов -- кратко объясняй решение -- явно отмечай допущения -- прямо помечай stubs - -Не перескакивай к финальному “всё готово”, если каркас ещё не выстроен. - -Начни с: - -1. project structure -2. typed contracts -3. runtime loop skeleton -4. event bus skeleton -5. state persistence skeleton -6. config loader -7. context builder skeleton -8. FastAPI skeleton -9. router -10. execution engine / scheduler - -Сначала построй правильный каркас. -Потом наполняй его логикой. - -КОНЕЦ ЗАДАНИЯ. diff --git a/app/__init__.py b/app/__init__.py deleted file mode 100644 index 9b29354..0000000 --- a/app/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -"""ducklm application package.""" - diff --git a/app/api/__init__.py b/app/api/__init__.py deleted file mode 100644 index 92d0da2..0000000 --- a/app/api/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -"""API layer.""" - diff --git a/app/api/server.py b/app/api/server.py deleted file mode 100644 index fbf9fff..0000000 --- a/app/api/server.py +++ /dev/null @@ -1,170 +0,0 @@ -from __future__ import annotations - -import asyncio -from contextlib import asynccontextmanager -from pathlib import Path - -from fastapi import FastAPI, WebSocket, WebSocketDisconnect -from fastapi.responses import FileResponse -from pydantic import BaseModel - - -class CriticFeedbackRequest(BaseModel): - feedback: str - task_id: str | None = None - session_id: str | None = None - feedback_type: str | None = None - severity: str | None = None - correction: str | None = None - remember: bool = True - retry: bool = False - assistant_answer: str | None = None - correctness_override: float | None = None - usefulness_override: float | None = None - safety_override: float | None = None - -from app.core.permission_resolution import PermissionResolutionRequest, SecretResolutionRequest, PasswordResolutionRequest, ReviewResolutionRequest -from app.core.contracts import UserTask -from app.runtime.runtime_controller import RuntimeController -from app.streaming.manager import StreamingManager - - -@asynccontextmanager -async def lifespan(app: FastAPI): - """Load models on startup.""" - print("Lifespan: Starting model loading...") - try: - print("Lifespan: Loading models...") - runtime.load_models_at_startup() - print("Lifespan: Models loaded") - - # Rebuild vector index if empty but memory store has data. - if runtime._memory_interface: - store_count = runtime._memory_interface.count() - if store_count > 0: - idx_count = runtime._memory_interface._vector_index.element_count - if idx_count == 0: - print(f"Lifespan: Rebuilding vector index ({store_count} entries)...") - runtime._memory_interface.reindex() - print("Lifespan: Vector index rebuilt") - except Exception as e: - print(f"Lifespan: Failed to load models: {e}") - import traceback - traceback.print_exc() - - yield # Server runs here - - print("Lifespan: Shutting down...") - - -app = FastAPI(title="ducklm", lifespan=lifespan) -runtime = RuntimeController(base_dir=Path(__file__).resolve().parents[2]) -streaming = StreamingManager(runtime.event_bus) - - -@app.get("/") -def index() -> FileResponse: - return FileResponse(Path(__file__).resolve().parent / "static" / "index.html") - - -@app.get("/health") -def health() -> dict[str, str]: - return {"status": "ok"} - - -@app.get("/events") -def list_events(limit: int = 500) -> dict[str, object]: - safe_limit = max(1, min(limit, 2000)) - return { - "events": [ - event.model_dump(mode="json") - for event in runtime.event_bus.list_recent(limit=safe_limit) - ] - } - - -@app.post("/chat") -def chat(task: UserTask) -> dict[str, object]: - submit = getattr(runtime, "submit_task", None) - if callable(submit): - return submit(task) - return runtime.handle_task(task) - - -@app.post("/permissions/resolve") -def resolve_permission(request: PermissionResolutionRequest) -> dict[str, object]: - submit = getattr(runtime, "submit_permission_resolution", None) - if callable(submit): - return submit(task_id=request.task_id, decision=request.decision) - return runtime.resolve_permission(task_id=request.task_id, decision=request.decision) - - -@app.post("/secrets/resolve") -def resolve_secret(request: SecretResolutionRequest) -> dict[str, object]: - submit = getattr(runtime, "submit_secret_resolution", None) - if callable(submit): - return submit(task_id=request.task_id, secret=request.secret) - return runtime.resolve_secret(task_id=request.task_id, secret=request.secret) - - -@app.post("/password/resolve") -def resolve_password(request: PasswordResolutionRequest) -> dict[str, object]: - submit = getattr(runtime, "submit_password_resolution", None) - if callable(submit): - return submit(task_id=request.task_id, password=request.password) - return runtime.resolve_password(task_id=request.task_id, password=request.password) - - -@app.post("/review/resolve") -def resolve_review(request: ReviewResolutionRequest) -> dict[str, object]: - submit = getattr(runtime, "submit_review_resolution", None) - if callable(submit): - return submit(task_id=request.task_id, decision=request.decision, correction=request.correction) - return runtime.resolve_review(task_id=request.task_id, decision=request.decision, correction=request.correction) - - -@app.post("/critic/feedback") -def critic_feedback(request: CriticFeedbackRequest) -> dict[str, object]: - feedback = runtime.handle_critic_feedback( - feedback=request.feedback, - task_id=request.task_id, - session_id=request.session_id, - feedback_type=request.feedback_type, - severity=request.severity, - correction=request.correction, - remember=request.remember, - retry=request.retry, - assistant_answer=request.assistant_answer, - correctness_override=request.correctness_override, - usefulness_override=request.usefulness_override, - safety_override=request.safety_override, - ) - return feedback - - -@app.websocket("/stream/{task_id}") -async def stream_task(websocket: WebSocket, task_id: str) -> None: - await websocket.accept() - replayed_events = streaming.replay_events(task_id) - for event in replayed_events: - await websocket.send_json(event.model_dump(mode="json")) - if replayed_events and replayed_events[-1].type in {"task_completed", "task_failed"}: - await websocket.close() - return - - queue = streaming.subscribe(task_id) - try: - while True: - try: - event = await asyncio.wait_for(queue.get(), timeout=30) - except asyncio.TimeoutError: - await websocket.send_json({"type": "heartbeat", "task_id": task_id}) - continue - await websocket.send_json(event.model_dump(mode="json")) - if event.type in {"task_completed", "task_failed", "task_awaiting_permission", "task_awaiting_input", "task_awaiting_review"}: - break - except WebSocketDisconnect: - pass - finally: - streaming.unsubscribe(task_id, queue) - await websocket.close() diff --git a/app/api/static/favicon.ico b/app/api/static/favicon.ico deleted file mode 100644 index 1566b590fcf80dce46fcbaf095889da5164c7f18..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16958 zcmdU130##`+P|eNm%A_A3n*|!aRbqC-@z2M%)Qn5T1Q(@L0nrDP&2ihn);jyWwB|Q zZ@#p?_MCFmQd2NRK~ULLb`&u)waowbe_p(fgIpF-r|9>1?t9+zp7Z>l{hasxrBdC( z{~{t({MVuCJWQpssZ^@YBzCD1NYd}>&tNd9ZZJ7G|GlK=N$W|wNhPE+q>GnGeoLR+ zlQA+@o{?v7G=Dc@{z&q@inNb}7Lv>%bIF`Hg4gRcrw{p?N7_SbDP~vC%iOYttaZKk zZs|O2$&1))CCL@v*SlZVk~L-Rmh#_{=VQ5kA?bSKYwCQmw(!sr4A=7PV6OS`e-L+F z`3Nt<)3sn}?laL`r}Q_3-6;#`WHMc=3Fe?kW zS=lb>0LSL!O> zoG3qrYEo5MF{#8QmGjxk@={b)l%bO2N{f%;cx5?`6&2v6l`CK{8eueB;OV7-#bT+8 zx9+uIYiQ3Td#bw6Gbcfp14tL)B3Z_JF#e~PUgcIh`u6pK91&U4BHd zjlaE}hW3sCn2iP{UMg4^7hGTxtaWW*kBhOc=3u`bMvD>tordENF(2d3$F^eJ)E#&r zb{n4gOCI7=^YCKoe!RLa1D|cq!@^j2DvguwrcvUP|4Iw2uy8$G!?2DXBquNh$5%7^$d1I>Gy{sG9f027atMg0DaS z08zsRWAcQt@bU41-e`hG*Pt(4d7ilHn(#CJ>vaAYdCyXei~a|qrftKxC%544=#LS- zWGmv==3+@w9ui4QDT`&P`|#>J>G*V0HuCe!P*qZf>Qd%`(xaE@B!6E?68oxLklZ^( z|EXbo$k_HZA_nxvgmGgqbjT3qLOpGu!CFBuJ8j_F&S|CgF?Ym0kGzVBPkn{aPh?;e z{eS%9n{a>3msq$u2aA()uryUD8_%a?VR_nqtVqkmi|h8{e>WB5U|t!@N~@S7NTp?r z>m@i|N}nz+V7z2}aOz)TCG%z_s=vFWbeust*&8JLq5TAU2Z5EciC34w|7t_-5)`$DBWk(_M^NypcxEhs5N>Fi(^-nQV9 zGEnkC#bHz)%|urEMyz>#1tJIbz~EjX7;sxx>OTa1ZtIRwBcl`>XatY*z05V?7yr_j zgRtt24fyb%1xQ^_`~L6Ecw+trOpN{r<7RBaL(gnM{F)*>Cqy|s{pMjTWGskJJ&1&~ zLwK4pS(H+M#VNU1N#A(ot=;%b+IIZU2Yaz|cLnxlmElP7X{7JY!6%=5&YEvICQllJ zp0{;ESf~TNdv-*x@J{Fv5{&TBE~F6j?B1RIJ<+>&uf}jmEwsK78M&OpM7yzZpY#6T zKf({Ef5DGue#K8e{)(FFi}+^yalG)#PCPd61I$^mjs9PNg=_QhY;rypGA}-pl#K*p zPE0jY%aOPq)&YUa5 zFO~bAc0x*| z-7Fy`rsVM+<(5Pl$}{U|FKIcHMGjV^GM}VmVpU2y-gsv_maI%h+rW<8D>s<`1(#r# zcdsrnHg?Z!F`<>oh>(%DW8%}%c=eNH9Inp6=?hgj^K&&WoIQ?nKc7Vf?ct@Ae_+w; z^o4af)GKwGD*DdDLgI}l?#0BnjJjT)nuV3D8-+wJiy1Ex)*eJ6$1X`N#KJcZ;n_F% zY*Ieud4&B(=tKEf@&35}XBhR+Qa(o6>oVSZ>&)=h z){>dO-MNm3(E>L=8$#|Ijo=yM&~etC7`OBh{4M=MRGmA4GZ%j19_R=L3>*qa$Nrf9 zY!Y64=X=Db?IY$w)<=cpzevHim@}1kY*?7g+`$-`m{v%?IgIC$a%%mD zW1puTJ->$eBk5a=7#j_Z-T|G~1_OC*On1EReE;6~SmHNaCeF?I^{lZq%&`{P1DoAp z@8>{=r*4PuoJe$keY!nrvb`K)eNvkwvtZqa^B?F>={Dj!UKOB=5&d@6z_y?Zni1s}T^~ z2YQnq%+!_e(}aDXU>5u??cd0`Y&s*X#I56RHL<$Wri465p1fA`+{(?HE(W7CSJ`|=|gQ1D*1^;$cw=rgNEP4+ZOS=n(m%#xKGxvBlEA3v* z{l6Ag!B2e{8Jioa{{^$eKbQGWeBMeQ7i=O+jo>!(oQd}3$MY7uMh~^d1hdY{JYeB@ zoDr@4^=LO`5Q1j02A@qC%&*&UExAIX89F z*`V?wdHO&_U-n{s+1e1qK3}v_TcPGUI>sA4ZI7=u6utzjmHb)77CE29C8O5+QXHV) za()*i*w?$b$7keB$xJfA$ob8bt4XJZo#zTR@@jN9z{|@MRx|S;$ztZd+usvi$A_WE z%z+4dCIbF(Ll8J?I06|L!lK8*J2DI^lP7J~3{UD^c(ssBm*WX#N;~tQtcBE+vlmIr z^>ws4quEP2b23{zxE_hvEYy!eGMCm0vxf0cW3MI7>l)wXZ;k35Bk|44U{>r%Z`!7J8+4xd#|v#!sN;mL2$AbQu& zn6~2#9{jol4{puFm@l^@=+&2DjpKR5{M%uf-3M*v^+lIigAg*MFVul%+Iy{DwUmX9 z`gQKr#2*w~#OBQyVIx^dl7Ho_RcuoBo?4RRBt>>?<#}Zekr!oTCN-dYx##@Gtln|L zhxn<)J*oA5HH>v_yI@S(mx$i`BVxWgkBK{hQQwi+j?cJ=NjrbWJsH1Z;+~7RGvh4A ze(^bsvEyN%I}`y6`Z724N9a={5HzAU-xcUtJJ7~y*BZ*!NT067E|_Yunk7am#Pcln zDS4M|E&hMmwiCM&^U1SOe1+5qe%b%Z89=?~ACg1u1`XUKu9G*bmmOh`&BgeA$I+kI zrtG?i@weyiJV0Sf=6nLM)+^Zedqay;9phWI#Nrt(d*#Fcr9^A zr}IYeBhxYQU?uwHp2mHd=McNU2GQT2$CMqwJ+y(z8Rzj}&T&lmZWkigzX!hsQ(>DP z1;^Y$%!9q)JGU0Uv7>2;HA-L6|fhX6{bN-u*UoZ=Hp?c3hYxlRcdJuiswahzR zdSLw9A7Jc;%@`8@I@(Wq2&&!_aNBc##~+D(()J^;CMH1rIQOSBM<96mD0Ge+&RU4N zpVtq*^9M5@4nh0bgV5#4KJW}Q!9yptio8Rl-;-{V{;xd$6dCLxU1d*Vmmai3;VVdE zL;!8Ty)}7OF(!KWDSmpZ#UBBq@5P*5S!lmB0lKMU(dn@f=rJK2-5wZ#9?_BL!kXAG zb`Wf{hQWvAm>YpkytZ}=WDUvKcXROzR;TWr?N#qTMCV%OK(T$1ji1hdwpt_WK@)sf z^R@Fb!(XF?jrE*4tOG_bkAp4d4*32t0zpH%Av~lD+Vu`X@Z_H8I5QHy#BGlqi8isL zSRY5C{mcji_Tygd5`HE2ah;o>|N7p4T=o7-_^@2c%^KP3@Y;*E;Hm5Z^zdZOV`HDh zNR6K-^IAW2iiv>Z;l60sD;Ps~CeyWhX9V04N?(Y8_v|6?oy}S(CW$d;) z{#)?N-cLwsAn^r}kJLgosl8bjxUuHSO22EsJIC)&;*fDW-k5jryp0r&QSyAS2b_xR##vYwOVTIfoBT!-ITHCXtoY_}AUX>yt@yzkhT3gnT<#-+W4+ z_`aNF$T^sN_od^$)8Hifg2YVOm&^Hy%p)+h83=OSu9qtJRC!XfEWY_ig zuYzB&E9ZdJw->RG!W9FZqTz%_Wbb zTaxHi-nYu0t+rOWgzwUQ*`td+D8#XHW@WhKJ1*zuxE9b$=3GS5}tK}-CT z+DPKRKeIUdrnmwGtJCxJDZ5AlTtg4-+w-QT#i{AzdPuCjfn=7NiT&u}762~^Auj_G7coCkOi=(CErJgLj z3HV&eNY-v@P1{neuAZ0ugzTv*Z=xJzEm^a%y^JfKZ$!VGAIQ1F9+E44wX|R6mNn}8 ze)UFRy4E@6Y;6^3AE~97We%CE?s?s{@^-VIbC7rEcUJ59dpD_sb&52bAG_ zz}a4q+bY#i&Mtp}&Q_U*6BQ}lDi3G7S*857s@o>i_TN&st?mBL{acqG=k?}Vmaf84 zRNL?2Y@Kp&%Ej3_<>r*5vsKh0ib;@(DEH0MQejTLC=&$!OK(m+Ue+rI@HYPx_5MFC C9Zc*1 diff --git a/app/api/static/index.html b/app/api/static/index.html deleted file mode 100644 index eeee4c4..0000000 --- a/app/api/static/index.html +++ /dev/null @@ -1,1089 +0,0 @@ - - - - - - DuckLM Runtime - - - -
-
-
-

🦆 DuckLM

-
- - Connecting... -
-
-
-
- -
-
-
- - -
-
- Enter — отправить, Shift+Enter — перенос строки -
-
-
- - -
- - - - - - - - diff --git a/app/cli/__init__.py b/app/cli/__init__.py deleted file mode 100644 index 43164b3..0000000 --- a/app/cli/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -"""CLI layer.""" - diff --git a/app/core/__init__.py b/app/core/__init__.py deleted file mode 100644 index b18a4b9..0000000 --- a/app/core/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -"""Core orchestration components.""" - diff --git a/app/core/async_router.py b/app/core/async_router.py deleted file mode 100644 index dc23d48..0000000 --- a/app/core/async_router.py +++ /dev/null @@ -1,542 +0,0 @@ -from __future__ import annotations - -import asyncio -import json -import logging -import re -from typing import Any - -from app.core.contracts import ExecutionDirective -from app.core.intent_parser import IntentParser -from app.events.event_bus import EventBus -from app.events.event_types import ( - ORCHESTRATOR_CALLED, - ORCHESTRATOR_FALLBACK_USED, - ORCHESTRATOR_RETRY, - ORCHESTRATOR_RESULT, - ORCHESTRATOR_UNAVAILABLE, - THINKER_CALLED, - THINKER_RESULT, - JSON_COMPILER_CALLED, - JSON_COMPILER_RESULT, -) -from app.models.async_adapters import AsyncOrchestratorAdapter - -logger = logging.getLogger(__name__) - - -class AsyncRouter: - """Async router using Thinker + JSON Compiler pipeline.""" - - def __init__( - self, - thinker: AsyncOrchestratorAdapter | None = None, - json_compiler: AsyncOrchestratorAdapter | None = None, - intent_parser: IntentParser | None = None, - prompts: dict[str, str] | None = None, - event_bus: EventBus | None = None, - tool_registry=None, - retry_limit: int = 2, - debug: bool = False, - log_length: int = 500, - json_fix_retry_limit: int = 2, - json_fix_use_sys_util: bool = True, - intent_classifier: str = "thinker", - ) -> None: - self._thinker = thinker - self._json_compiler = json_compiler - self._intent_classifier = intent_classifier - self._sys_util = None - self._intent_parser = intent_parser or IntentParser() - self._prompts = prompts or {} - self._event_bus = event_bus - self._tool_registry = tool_registry - self._retry_limit = retry_limit - self._debug = debug - self._log_length = log_length - self._json_fix_retry_limit = json_fix_retry_limit - self._json_fix_use_sys_util = json_fix_use_sys_util - self._orchestrator = None # Set separately if needed for classification - - def set_event_bus(self, event_bus: EventBus) -> None: - self._event_bus = event_bus - - def set_thinker(self, thinker: AsyncOrchestratorAdapter) -> None: - self._thinker = thinker - - def set_json_compiler(self, json_compiler: AsyncOrchestratorAdapter) -> None: - self._json_compiler = json_compiler - - def set_sys_util(self, sys_util: AsyncOrchestratorAdapter) -> None: - self._sys_util = sys_util - - def set_orchestrator(self, orchestrator: AsyncOrchestratorAdapter) -> None: - self._orchestrator = orchestrator - - def set_tool_registry(self, tool_registry) -> None: - self._tool_registry = tool_registry - - async def decide( - self, - state: dict[str, Any], - context: dict[str, Any], - task_id: str | None = None, - session_id: str | None = None, - ) -> ExecutionDirective: - task_context = context.get("task_context", {}) - requested_tool = task_context.get("requested_tool") - task_summary = str(context.get("task_summary", "")) - - if requested_tool: - self._emit_event( - ORCHESTRATOR_RESULT, - {"reason": "explicit_tool_request", "tool": requested_tool}, - task_id, - session_id, - ) - return ExecutionDirective( - type="tool", - payload={ - "tool": requested_tool, - "args": task_context.get("tool_args", {}), - }, - requires_permission=requested_tool in {"shell_exec", "file_write"}, - confidence=0.9, - reason="Task context explicitly requested a tool execution.", - ) - - parsed_intent = self._intent_parser.parse(task_summary) - if parsed_intent: - self._emit_event( - ORCHESTRATOR_RESULT, - {"reason": "deterministic_intent_parser", "directive": parsed_intent.model_dump(mode="json")}, - task_id, - session_id, - ) - return parsed_intent - - if self._thinker is None: - fallback = self._fallback_directive(task_summary) - self._emit_event( - ORCHESTRATOR_FALLBACK_USED, - {"reason": "thinker_unavailable", "directive": fallback.model_dump(mode="json")}, - task_id, - session_id, - ) - return fallback - - if self._json_compiler is None: - fallback = self._fallback_directive(task_summary) - self._emit_event( - ORCHESTRATOR_FALLBACK_USED, - {"reason": "json_compiler_unavailable", "directive": fallback.model_dump(mode="json")}, - task_id, - session_id, - ) - return fallback - - mode_hint = await self._classify_intent(task_summary) - thinker_prompt = self._build_thinker_prompt(task_summary, context, mode_hint) - - for thinker_attempt in range(self._retry_limit + 1): - if thinker_attempt > 0: - self._emit_event( - ORCHESTRATOR_RETRY, - {"attempt": thinker_attempt, "prompt": thinker_prompt}, - task_id, - session_id, - ) - thinker_prompt = self._add_thinker_feedback(thinker_prompt, last_thinker_error, thinker_attempt) - - self._emit_event( - THINKER_CALLED, - {"attempt": thinker_attempt, "mode": mode_hint}, - task_id, - session_id, - ) - - try: - thinker_result = await self._thinker.generate(thinker_prompt) - except Exception as e: - logger.warning(f"Thinker generate failed: {e}") - last_thinker_error = str(e) - continue - - logger.info(f"Thinker result (attempt {thinker_attempt + 1}): {thinker_result}") - self._emit_event( - THINKER_RESULT, - {"result": thinker_result, "attempt": thinker_attempt}, - task_id, - session_id, - ) - - if mode_hint == "conversation" and self._looks_like_tool_plan(thinker_result): - mode_hint = "execution" - self._emit_event( - ORCHESTRATOR_FALLBACK_USED, - {"reason": "thinker_proposed_tool_plan_despite_conversation_hint"}, - task_id, - session_id, - ) - - if self._is_simple_response(thinker_result): - json_compiler_prompt = self._build_json_compiler_prompt(thinker_result) - else: - json_compiler_prompt = self._build_json_compiler_prompt(thinker_result) - - for compiler_attempt in range(self._json_fix_retry_limit + 1): - self._emit_event( - JSON_COMPILER_CALLED, - {"attempt": compiler_attempt, "plan": thinker_result}, - task_id, - session_id, - ) - - try: - compiler_result = await self._json_compiler.generate(json_compiler_prompt) - except Exception as e: - logger.warning(f"JSON Compiler generate failed: {e}") - compiler_result = None - - if compiler_result: - logger.info(f"JSON Compiler result (attempt {compiler_attempt + 1}): {compiler_result}") - self._emit_event( - JSON_COMPILER_RESULT, - {"result": compiler_result, "attempt": compiler_attempt}, - task_id, - session_id, - ) - - directive = self._validate_directive(compiler_result, mode_hint) if compiler_result else None - if directive is not None: - directive = self._guard_rail_check(directive) - self._emit_event( - ORCHESTRATOR_RESULT, - {"directive": directive.model_dump(mode="json"), "thinker_attempt": thinker_attempt, "compiler_attempt": compiler_attempt}, - task_id, - session_id, - ) - return directive - - if compiler_result: - logger.warning(f"JSON Compiler validation failed, attempting fix (attempt {compiler_attempt + 1})") - fix_result = await self._fix_invalid_json(compiler_result, compiler_attempt, task_id, session_id) - if fix_result: - fixed_directive = self._validate_directive(fix_result, mode_hint) - if fixed_directive is not None: - fixed_directive = self._guard_rail_check(fixed_directive) - self._emit_event( - ORCHESTRATOR_RESULT, - {"directive": fixed_directive.model_dump(mode="json"), "fixed": True}, - task_id, - session_id, - ) - return fixed_directive - - last_thinker_error = f"JSON Compiler failed after {self._json_fix_retry_limit + 1} attempts" - - self._emit_event( - ORCHESTRATOR_UNAVAILABLE, - {"reason": "retry_exhausted", "last_error": last_thinker_error}, - task_id, - session_id, - ) - raise RuntimeError(f"Thinker/Compiler pipeline failed after {self._retry_limit + 1} attempts") - - def _fallback_directive(self, task_summary: str) -> ExecutionDirective: - parsed = self._intent_parser.parse(task_summary) - if parsed: - return parsed - - return ExecutionDirective( - type="respond", - payload={"text": f"Runtime accepted task: {task_summary}"}, - requires_permission=False, - confidence=0.4, - reason="Fallback response because local orchestration models are not loaded.", - ) - - def _is_simple_response(self, thinker_result: str) -> bool: - result_lower = thinker_result.lower().strip() - return result_lower.startswith("ответ:") or result_lower.startswith("response:") or "не нужно" in result_lower - - def _extract_conversation_response(self, thinker_result: str) -> str: - """Extract text response from thinker result for conversation mode.""" - result_lower = thinker_result.lower() - - # Skip the ПЛАН lines, just get the ОТВЕТ part - lines = thinker_result.split('\n') - response_lines = [] - capture = False - - for line in lines: - if line.strip().lower().startswith('ответ:') or line.strip().lower().startswith('response:'): - capture = True - response_lines.append(line) - elif capture and line.strip(): - # Check if this is a new ПЛАН or step - if line.strip().lower().startswith('план') or line.strip().lower().startswith('step'): - break - response_lines.append(line) - - if response_lines: - return '\n'.join(response_lines).replace('ответ:', '').replace('response:', '').strip() - - # Fallback: return first few sentences - sentences = thinker_result.split('.')[:3] - return '. '.join(sentences).strip() - - def _looks_like_tool_plan(self, thinker_result: str) -> bool: - result = thinker_result.lower() - tool_names = set() - if self._tool_registry: - tool_names = set(self._tool_registry.list_names()) - tool_markers = {"shell_exec", "file_read", "file_write", "memory", *tool_names} - plan_markers = ("план:", "шаг", "step", "tool", "инструмент") - return any(marker in result for marker in tool_markers) and any(marker in result for marker in plan_markers) - - def _build_thinker_prompt( - self, task_summary: str, context: dict[str, Any], mode_hint: str - ) -> str: - base_prompt = self._prompts.get("thinker", "") - memory_context = context.get("memory_context", []) - - tools_json = "[]" - if self._tool_registry: - schemas = self._tool_registry.list_schemas() - tools_json = json.dumps(schemas, ensure_ascii=False, indent=2) - - prompt_lines = [ - base_prompt, - "", - f"Task: {task_summary}", - f"Mode hint: {mode_hint}", - ] - - if memory_context: - memory_text = "\n".join([f"- {m.get('text', '')}" for m in memory_context[:5]]) - prompt_lines.append(f"\nRelevant memory:\n{memory_text}") - - session_history = context.get("session_history", []) - if session_history: - history_text = "\n".join([f"- {h.get('text', '')}" for h in session_history[:3]]) - prompt_lines.append(f"\nPrevious requests in this session:\n{history_text}") - - # Active memory recall results - memory_recall = context.get("memory_recall") - if memory_recall: - prompt_lines.append("\n=== ИЗ ДОЛГОВРЕМЕННОЙ ПАМЯТИ (ACTIVE RECALL) ===") - prompt_lines.append(f"Поисковый запрос: {memory_recall.get('query', '')}") - prompt_lines.append(memory_recall.get("summary", "")) - prompt_lines.append("=== КОНЕЦ ПАМЯТИ ===") - - prompt_lines.extend([ - "", - f"AVAILABLE TOOLS (JSON):", - tools_json, - "", - ]) - - return "\n".join(prompt_lines) - - def _build_json_compiler_prompt(self, thinker_result: str) -> str: - base_prompt = self._prompts.get("json_compiler", "") - - prompt_lines = [ - base_prompt, - "", - "Thinker's plan:", - thinker_result, - "", - ] - - return "\n".join(prompt_lines) - - def _determine_mode_from_context(self, context: dict[str, Any]) -> str: - """Legacy method - kept for compatibility""" - task_summary = str(context.get("task_summary", "")).lower() - keywords = ["запусти", "выполни", "создай", "напиши", "удали", "run", "execute", "create"] - for kw in keywords: - if kw in task_summary: - return "execution" - return "conversation" - - async def _classify_intent(self, task_summary: str) -> str: - """LLM-based intent classification""" - if self._intent_classifier == "orchestrator" and self._orchestrator: - classifier_model = self._orchestrator - else: - classifier_model = self._thinker - - if not classifier_model: - logger.warning("No classifier model available, using default") - return "conversation" - - classification_prompt = f"""Классифицируй запрос пользователя: "{task_summary}" - -Классы: -- execution: чтобы ответить, агенту нужно обратиться к локальной среде, файлам, shell, tools, памяти, сети или выполнить проверку/операцию. Это включает вопросы о текущем состоянии ПК, установленных пакетах, файлах, процессах, времени работы, обновлениях, логах. -- conversation: можно ответить сразу из диалога и общих знаний, без проверки локальной среды и без tools. -- clarification_needed: нельзя понять, что именно пользователь хочет. - -Верни ровно один токен без рассуждений: execution или conversation или clarification_needed""" - - try: - result = await classifier_model.generate(classification_prompt) - classification = self._extract_classification(result) - if classification: - logger.info(f"Intent classified: {classification} for task: {task_summary}") - return classification - - logger.warning(f"Invalid classification result: {result}, defaulting to conversation") - return "conversation" - except Exception as e: - logger.warning(f"Intent classification failed: {e}, defaulting to conversation") - return "conversation" - - def _extract_classification(self, raw_result: str) -> str | None: - result = raw_result.strip().lower() - allowed = {"execution", "conversation", "clarification_needed"} - if result in allowed: - return result - - result = re.sub(r".*?", " ", result, flags=re.DOTALL) - if ( - "shell_exec" in result - or "execute command" in result - or "command execution" in result - or "use the tool" in result - or "use a tool" in result - ): - return "execution" - tokens = re.findall(r"\b(execution|conversation|clarification_needed)\b", result) - if tokens: - return tokens[-1] - - first_word = result.split()[0] if result.split() else "" - if first_word in allowed: - return first_word - - return None - - def _validate_directive(self, output: str, mode_hint: str) -> ExecutionDirective | None: - if not output: - return None - - try: - json_start = output.find("{") - json_end = output.rfind("}") + 1 - if json_start < 0 or json_end <= 0: - return None - - json_str = output[json_start:json_end] - data = json.loads(json_str) - - if "type" not in data: - return None - - msg_type = data.get("type", "") - payload = data.get("payload", {}) - - if msg_type == "step" and "tool" in payload: - tool = payload.get("tool", "") - args = payload.get("args", {}) - payload = {"tool": tool, "args": args} - - if msg_type == "plan": - payload = {"steps": payload.get("steps", [])} - - return ExecutionDirective( - type=msg_type, - payload=payload, - confidence=data.get("confidence", 0.9), - reason=data.get("reason", ""), - ) - except (json.JSONDecodeError, ValueError, TypeError) as e: - logger.warning(f"Directive JSON validation failed: {e}") - return None - - def _guard_rail_check(self, directive: ExecutionDirective) -> ExecutionDirective: - tool_name = directive.payload.get("tool", "") - if tool_name in {"shell_exec", "file_write", "file_delete"}: - return ExecutionDirective( - type=directive.type, - payload=directive.payload, - requires_permission=True, - confidence=directive.confidence, - reason=directive.reason, - ) - return directive - - def _add_thinker_feedback(self, prompt: str, error: str, attempt: int) -> str: - feedback = f"\n[ATTEMPT {attempt + 1} FAILED: {error}]\n" - feedback += "Provide a valid semantic plan.\n" - return prompt + feedback - - def _emit_event( - self, - event_type: str, - payload: dict[str, Any], - task_id: str | None, - session_id: str | None, - ) -> None: - if self._event_bus and task_id: - from app.core.contracts import RuntimeEvent - event = RuntimeEvent( - task_id=task_id, - session_id=session_id or "unknown", - sequence=self._event_bus.next_sequence(task_id), - type=event_type, - payload=payload, - ) - self._event_bus.publish(event) - - SYS_UTIL_PROMPT = None - - async def _fix_invalid_json(self, invalid_result: str, attempt: int, task_id: str | None, session_id: str | None) -> str | None: - """Try to fix invalid JSON using sys_util model.""" - if not self._sys_util: - return None - - first_brace = invalid_result.find('{') - last_brace = invalid_result.rfind('}') - if first_brace < 0 or last_brace <= first_brace: - return None - - truncated_json = invalid_result[first_brace:last_brace + 1] - - error_msg = "" - try: - json.loads(truncated_json) - except json.JSONDecodeError as e: - error_msg = str(e) - - sys_util_prompt = ( - self._prompts.get("sys_util") - if self._prompts - else self.SYS_UTIL_PROMPT or ( - "You are a STRICT JSON repair engine. " - "Your job is ONLY to fix invalid JSON syntax. " - "You MUST output valid JSON or nothing else." - ) - ) - fix_prompt = f"""{sys_util_prompt} - - {error_msg} - - Fixed JSON:""" - - try: - logger.info(f"JSON fix using sys_util model (attempt {attempt + 1})") - fixed_result = await self._sys_util.generate(fix_prompt) - - fixed_first = fixed_result.find('{') - fixed_last = fixed_result.rfind('}') - if fixed_first >= 0 and fixed_last > fixed_first: - return fixed_result[fixed_first:fixed_last + 1] - - return None - - except Exception as e: - logger.warning(f"JSON fix failed: {e}") - return None diff --git a/app/core/command_analyzer.py b/app/core/command_analyzer.py deleted file mode 100644 index b2a1e4e..0000000 --- a/app/core/command_analyzer.py +++ /dev/null @@ -1,60 +0,0 @@ -from __future__ import annotations - -import re -import shlex -from typing import Any - -from app.core.permission_service import PermissionService - - -class CommandAnalyzer: - """Deterministic shell action analyzer for structured critic evidence.""" - - _SPLIT_RE = re.compile(r"\s*(?:&&|;)\s*") - - def __init__(self, permission_service: PermissionService) -> None: - self._permission_service = permission_service - - def analyze(self, command: str, task_id: str, session_id: str) -> dict[str, Any]: - segments = [segment.strip() for segment in self._SPLIT_RE.split(command) if segment.strip()] - root_required: list[str] = [] - elevated: list[str] = [] - unelevated_root: list[str] = [] - - for segment in segments: - normalized, is_elevated = self._strip_sudo(segment) - check = self._permission_service.check_shell_command( - task_id=task_id, - session_id=session_id, - command=normalized, - ) - if check.get("requires_sudo"): - root_required.append(normalized) - if is_elevated: - elevated.append(normalized) - else: - unelevated_root.append(normalized) - - diagnosis_type = "privilege_scope_error" if unelevated_root else "ok" - return { - "type": diagnosis_type, - "command": command, - "segments": segments, - "root_required_segments": root_required, - "elevated_segments": elevated, - "unelevated_root_segments": unelevated_root, - } - - def _strip_sudo(self, segment: str) -> tuple[str, bool]: - try: - parts = shlex.split(segment) - except ValueError: - return segment, segment.strip().startswith("sudo ") - if not parts or parts[0] != "sudo": - return segment, False - index = 1 - while index < len(parts) and parts[index].startswith("-"): - index += 1 - if index < len(parts) and parts[index - 1] in {"-p", "--prompt"}: - index += 1 - return " ".join(shlex.quote(part) for part in parts[index:]), True diff --git a/app/core/config.py b/app/core/config.py deleted file mode 100644 index 2e7090b..0000000 --- a/app/core/config.py +++ /dev/null @@ -1,93 +0,0 @@ -from __future__ import annotations - -import json -from pathlib import Path -from typing import Any - -from pydantic import BaseModel, Field - - -class ModelsConfig(BaseModel): - orchestrator_path: str = "models/llama.gguf" - coder_path: str = "models/xcoder.gguf" - critic_path: str = "models/gemma.gguf" - embeddings_path: str = "models/all-MiniLM-L6-v2" - inference: dict[str, Any] = Field(default_factory=dict) - thinker: dict[str, Any] = Field(default_factory=dict) - json_compiler: dict[str, Any] = Field(default_factory=dict) - orchestrator: dict[str, Any] = Field(default_factory=dict) - coder: dict[str, Any] = Field(default_factory=dict) - critic: dict[str, Any] = Field(default_factory=dict) - sys_util: dict[str, Any] = Field(default_factory=dict) - embeddings: dict[str, Any] = Field(default_factory=dict) - - -class PromptsConfig(BaseModel): - orchestration_prompt: str = "" - planning_prompt: str = "" - coder_prompt: str = "" - critic_prompt: str = "" - - -class PermissionsConfig(BaseModel): - dangerous_commands: dict[str, str] = Field(default_factory=dict) - sensitive_paths: list[str] = Field(default_factory=list) - default_approval_behavior: str = "ask_always" - - -class RuntimeConfig(BaseModel): - step_timeout_ms: int = 30_000 - task_timeout_ms: int = 300_000 - shell_command_timeout_ms: int = 3_600_000 - shell_idle_timeout_ms: int = 600_000 - planner_retry_limit: int = 2 - tool_retry_limit: int = 1 - replan_limit: int = 1 - max_execution_steps: int = 20 - retrieval_top_k: int = 5 - max_context_tokens: int = 8192 - context_budgets: dict[str, int] = Field(default_factory=lambda: { - "system": 512, - "task": 512, - "memory": 2048, - "execution": 2048, - "tools": 1024, - "safety": 512, - }) - reserve_for_generation_pct: int = 25 - orchestrator_retry_limit: int = 2 - intent_classifier: str = "thinker" - recall_model: str = "sys_util" - memory_thresholds: dict[str, float] = Field(default_factory=dict) - critic_fallback_policy: str = "continue_without_critic" - checkpoint_policy: dict[str, Any] = Field(default_factory=dict) - event_retention_policy: dict[str, Any] = Field(default_factory=dict) - streaming_settings: dict[str, Any] = Field(default_factory=dict) - debug: bool = False - debug_orchestrator_log_length: int = 500 - json_fix_retry_limit: int = 2 - json_fix_use_sys_util: bool = True - recall_model: str = "json_compiler" - critic_retry_limit: int = 2 - - -class AppConfig(BaseModel): - models: ModelsConfig - prompts: PromptsConfig - permissions: PermissionsConfig - runtime: RuntimeConfig - - -def _load_json(path: Path) -> dict[str, Any]: - with path.open("r", encoding="utf-8") as handle: - return json.load(handle) - - -def load_app_config(config_dir: str | Path) -> AppConfig: - config_path = Path(config_dir) - return AppConfig( - models=ModelsConfig.model_validate(_load_json(config_path / "models.json")), - prompts=PromptsConfig.model_validate(_load_json(config_path / "prompts.json")), - permissions=PermissionsConfig.model_validate(_load_json(config_path / "permissions.json")), - runtime=RuntimeConfig.model_validate(_load_json(config_path / "runtime.json")), - ) diff --git a/app/core/context_builder.py b/app/core/context_builder.py deleted file mode 100644 index 77f4b32..0000000 --- a/app/core/context_builder.py +++ /dev/null @@ -1,172 +0,0 @@ -from __future__ import annotations - -import logging -from typing import Any - -from app.core.contracts import TaskCheckpoint, UserTask - -logger = logging.getLogger(__name__) - -DEFAULT_BUDGETS = { - "system": 512, - "task": 512, - "memory": 2048, - "execution": 2048, - "tools": 1024, - "safety": 512, -} - - -class ContextBuilder: - def __init__( - self, - memory_interface=None, - tool_registry=None, - config: dict[str, Any] | None = None, - ) -> None: - self._memory = memory_interface - self._tool_registry = tool_registry - self._config = config or {} - self._max_tokens = self._config.get("max_context_tokens", 8192) - self._budgets = self._config.get("context_budgets", DEFAULT_BUDGETS) - self._reserve_pct = self._config.get("reserve_for_generation_pct", 25) - - def build( - self, - task: UserTask, - checkpoint: TaskCheckpoint | None = None, - query: str | None = None, - ) -> dict[str, Any]: - task_summary = task.input - search_query = query or task_summary - session_id = task.session_id - - memory_context = [] - if self._memory: - memory_context = self._retrieve_memory(search_query, session_id=session_id) - - budgets = self._calculate_budgets() - reserved = self._reserve_for_generation() - - system_budget = budgets.get("system", 512) - task_budget = budgets.get("task", 512) - safety_budget = budgets.get("safety", 512) - memory_budget = budgets.get("memory", 2048) - - truncated_memory = self._truncate_memory( - memory_context, memory_budget - ) - - # Get session history for follow-up context - session_history = self._get_session_history(session_id) - - context = { - "system_prompt": "", - "task_summary": task_summary[:task_budget], - "task_context": task.context, - "memory_context": truncated_memory, - "session_history": session_history, - "execution_context": checkpoint.model_dump() if checkpoint else {}, - "tool_context": self._get_tool_context(), - "safety_context": {}, - "constraints": { - "budgets": budgets, - "reserved_for_generation": reserved, - "original_memory_count": len(memory_context), - "truncated_memory_count": len(truncated_memory), - }, - } - - return context - - def _get_tool_context(self) -> list[dict[str, Any]]: - """Expose available tools to orchestrator.""" - if not self._tool_registry: - return [] - - tools = [] - for name in self._tool_registry.list_names(): - tool = self._tool_registry.get(name) - tools.append({ - "name": name, - "description": getattr(tool, "description", ""), - }) - return tools - - def _calculate_budgets(self) -> dict[str, int]: - return dict(self._budgets) - - def _reserve_for_generation(self) -> int: - return int(self._max_tokens * self._reserve_pct / 100) - - def _retrieve_memory( - self, - query: str, - session_id: str | None = None, - top_k: int = 5, - ) -> list[dict[str, Any]]: - if not self._memory: - return [] - - try: - results = self._memory.search(query, top_k=top_k, session_id=session_id) - return [ - { - "id": entry.id, - "text": entry.text, - "kind": entry.kind, - "source": entry.source, - "weight": entry.weight, - "score": score, - } - for entry, score in results - ] - except Exception as e: - logger.warning(f"Memory retrieval failed: {e}") - return [] - - def _get_session_history(self, session_id: str | None = None) -> list[dict[str, Any]]: - """Get previous task summaries from the same session for context.""" - if not self._memory or not session_id: - return [] - - try: - # Get recent entries from same session - entries = self._memory.get_by_session(session_id, limit=5) - # Filter to only task summaries - summaries = [ - { - "id": entry.id, - "text": entry.text, - "kind": entry.kind, - "source": entry.source, - "weight": entry.weight, - } - for entry in entries - if entry.kind in ("summary", "tool_result") - ] - return summaries - except Exception as e: - logger.warning(f"Session history retrieval failed: {e}") - return [] - - def _truncate_memory( - self, - memory_context: list[dict[str, Any]], - budget: int, - ) -> list[dict[str, Any]]: - if not memory_context: - return [] - - estimated_per_entry = 50 - max_entries = max(budget // estimated_per_entry, 1) - - if len(memory_context) > max_entries: - return memory_context[:max_entries] - - return memory_context - - def estimate_tokens(self, text: str) -> int: - if not text: - return 0 - return len(text.split()) * 4 // 3 \ No newline at end of file diff --git a/app/core/contracts.py b/app/core/contracts.py deleted file mode 100644 index 2a1baa8..0000000 --- a/app/core/contracts.py +++ /dev/null @@ -1,148 +0,0 @@ -from __future__ import annotations - -from datetime import datetime, timezone -from typing import Any, Literal -from uuid import uuid4 - -from pydantic import BaseModel, Field - - -def utc_now() -> datetime: - return datetime.now(timezone.utc) - - -class UserTask(BaseModel): - task_id: str = Field(default_factory=lambda: str(uuid4())) - session_id: str = Field(default_factory=lambda: str(uuid4())) - input: str - context: dict[str, Any] = Field(default_factory=dict) - created_at: datetime = Field(default_factory=utc_now) - - -class PlanStep(BaseModel): - id: str - kind: Literal["tool", "coder", "memory", "respond"] - tool: str | None = None - args: dict[str, Any] = Field(default_factory=dict) - description: str - requires_confirmation: bool = False - depends_on: list[str] = Field(default_factory=list) - - -class ToolCall(BaseModel): - tool: str - args: dict[str, Any] = Field(default_factory=dict) - task_id: str - step_id: str - - -class ToolResult(BaseModel): - tool: str - ok: bool - output: Any = None - error: str | None = None - metadata: dict[str, Any] = Field(default_factory=dict) - - -class CoderRequest(BaseModel): - mode: Literal["generate", "fix", "refactor"] - instruction: str - context: dict[str, Any] = Field(default_factory=dict) - task_id: str - - -class CriticScore(BaseModel): - correctness: float = Field(ge=0.0, le=1.0) - usefulness: float = Field(ge=0.0, le=1.0) - safety: float = Field(ge=0.0, le=1.0) - memory_store: bool - weight: float = Field(ge=0.0, le=1.0) - explanation: str - - -class MemoryEntry(BaseModel): - id: str = Field(default_factory=lambda: str(uuid4())) - text: str - kind: Literal["tool_result", "plan", "critique", "fact", "summary", "user_preference"] - source: Literal["tool", "critic", "user", "system"] - weight: float = Field(ge=0.0, le=1.0) - task_id: str | None = None - session_id: str | None = None - metadata: dict[str, Any] = Field(default_factory=dict) - created_at: datetime = Field(default_factory=utc_now) - embedding_model: str - embedding_dim: int - - -class PermissionDecision(BaseModel): - action_type: str - pattern: str - decision: Literal["allow_once", "allow_always", "deny", "ask_always"] - created_at: datetime = Field(default_factory=utc_now) - - -class RuntimeEvent(BaseModel): - event_id: str = Field(default_factory=lambda: str(uuid4())) - task_id: str - session_id: str - sequence: int - type: str - timestamp: datetime = Field(default_factory=utc_now) - payload: dict[str, Any] = Field(default_factory=dict) - causation_id: str | None = None - correlation_id: str = Field(default_factory=lambda: str(uuid4())) - - -class TaskCheckpoint(BaseModel): - task_id: str - status: str - active_step_id: str | None = None - plan_snapshot: dict[str, Any] = Field(default_factory=dict) - context_snapshot: dict[str, Any] = Field(default_factory=dict) - updated_at: datetime = Field(default_factory=utc_now) - - -class PermissionRequest(BaseModel): - task_id: str - session_id: str - action_type: str - pattern: str - command: str | None = None - path: str | None = None - requires_password: bool = False - - -class SecretRequest(BaseModel): - task_id: str - session_id: str - kind: str - prompt: str - command: str | None = None - - -class PasswordRequest(BaseModel): - task_id: str - session_id: str - command: str - reason: str - attempts: int = 0 - max_attempts: int = 3 - - -class ExecutionDirective(BaseModel): - type: Literal[ - "plan", - "tool", - "coder", - "respond", - "replan", - "store_memory", - "request_permission", - "complete", - "fail", - "noop", - ] - payload: dict[str, Any] = Field(default_factory=dict) - requires_permission: bool = False - confidence: float = Field(ge=0.0, le=1.0, default=0.0) - reason: str = "" diff --git a/app/core/execution_engine.py b/app/core/execution_engine.py deleted file mode 100644 index cb0d8c1..0000000 --- a/app/core/execution_engine.py +++ /dev/null @@ -1,975 +0,0 @@ -from __future__ import annotations - -import asyncio -import json -import logging -from typing import Any - -from app.core.contracts import ( - CriticScore, - ExecutionDirective, - PermissionDecision, - PermissionRequest, - RuntimeEvent, - SecretRequest, - ToolCall, - ToolResult, - UserTask, -) -from app.core.command_analyzer import CommandAnalyzer -from app.core.execution_scheduler import ExecutionScheduler -from app.events.event_bus import EventBus -from app.events.event_types import ( - CRITIC_CALLED, - CRITIC_RESULT, - PERMISSION_REQUESTED, - PERMISSION_RESOLVED, - PLAN_FAILED, - PLAN_STARTED, - SECRET_REQUESTED, - STEP_STARTED, - STEPPED_COMPLETED, - TOOL_CALLED, - TOOL_COMPLETED, - TOOL_OUTPUT_CHUNK, -) -from app.models.async_adapters import AsyncCriticAdapter, AsyncCoderAdapter -from app.memory.write_policy import MemoryWritePolicy -from app.memory.interface import MemoryInterface - -logger = logging.getLogger(__name__) - - -class ExecutionEngine: - def __init__( - self, - event_bus: EventBus, - tool_registry, - permission_service, - scheduler: ExecutionScheduler | None = None, - critic: AsyncCriticAdapter | None = None, - memory_policy: MemoryWritePolicy | None = None, - memory_interface: MemoryInterface | None = None, - prompts: dict[str, str] | None = None, - recovery_limit: int = 1, - critic_retry_limit: int = 2, - command_analyzer: CommandAnalyzer | None = None, - ) -> None: - self._event_bus = event_bus - self._tool_registry = tool_registry - self._permission_service = permission_service - self._scheduler = scheduler or ExecutionScheduler() - self._critic = critic - self._coder: AsyncCoderAdapter | None = None - self._memory_policy = memory_policy - self._memory_interface = memory_interface - self._prompts = prompts or {} - self._recovery_limit = recovery_limit - self._critic_retry_limit = critic_retry_limit - self._command_analyzer = command_analyzer - - def set_critic(self, critic: AsyncCriticAdapter) -> None: - self._critic = critic - - def set_coder(self, coder: AsyncCoderAdapter) -> None: - self._coder = coder - - def set_memory_policy(self, policy: MemoryWritePolicy) -> None: - self._memory_policy = policy - - def execute( - self, - task: UserTask, - directive: ExecutionDirective, - permission_override: PermissionDecision | None = None, - secret_override: str | None = None, - password_override: str | None = None, - ) -> dict[str, Any]: - scheduled = self._scheduler.next_directive(directive) - self._publish(task, STEP_STARTED, {"directive_type": scheduled.type}) - - if scheduled.type == "plan": - return self._execute_plan( - task=task, - directive=scheduled, - permission_override=permission_override, - secret_override=secret_override, - password_override=password_override, - ) - - if scheduled.type == "tool": - return self._execute_tool( - task=task, - directive=scheduled, - permission_override=permission_override, - secret_override=secret_override, - password_override=password_override, - ) - - if scheduled.type == "respond": - return { - "status": "completed", - "result": { - "message": scheduled.payload.get("text", f"Runtime accepted task: {task.input}"), - "mode": scheduled.payload.get("mode", "direct_response"), - }, - "directive": scheduled.model_dump(mode="json"), - } - - if scheduled.type == "coder": - return self._execute_coder( - task=task, - directive=scheduled, - ) - - if scheduled.type == "fail": - return { - "status": "failed", - "result": {"error": scheduled.reason or "Execution failed."}, - } - - return { - "status": "completed", - "result": { - "message": "Directive accepted.", - "directive_type": scheduled.type, - }, - } - - def _execute_plan( - self, - task: UserTask, - directive: ExecutionDirective, - permission_override: PermissionDecision | None = None, - secret_override: str | None = None, - password_override: str | None = None, - ) -> dict[str, Any]: - # Unified format: {"type": "plan", "payload": {"steps": [...]}} - # Need to extract steps from nested payload - import json - - payload = directive.payload - steps_data = [] - - # If payload has "steps" directly, use them - if "steps" in payload: - steps_data = payload.get("steps", []) - # If payload is a string (JSON), parse it - elif isinstance(payload, str) and payload.strip().startswith("{"): - try: - parsed = json.loads(payload) - steps_data = parsed.get("payload", {}).get("steps", []) - except: - steps_data = [] - - if steps_data: - plan_json = json.dumps({"type": "plan", "payload": {"steps": steps_data}}) - else: - plan_json = json.dumps(payload) - - plan_steps = self._scheduler.parse_plan_steps(plan_json, task.task_id) - - if not plan_steps: - return { - "status": "failed", - "result": {"error": "Failed to parse plan steps from directive"}, - } - - if not self._scheduler.validate_no_cycles(plan_steps): - self._publish(task, PLAN_FAILED, {"error": "Cycle detected in plan"}) - return { - "status": "failed", - "result": {"error": "Cycle detected in plan"}, - } - - graph = self._scheduler.build_task_graph(plan_steps) - self._publish(task, PLAN_STARTED, {"steps": len(plan_steps)}) - - completed_steps: set[str] = set() - step_results: list[dict[str, Any]] = [] - critic_retries_used = 0 # Track critic→replan cycles - - ready_steps = self._get_ready_steps(graph, completed_steps) - - while ready_steps: - step = ready_steps.pop(0) - - # Handle respond kind directly without tool execution - if step.kind == "respond": - result = { - "status": "completed", - "result": { - "message": step.args.get("text", step.description), - }, - } - else: - step_directive = ExecutionDirective( - type=step.kind, - payload={ - "tool": step.tool, - "args": step.args, - }, - requires_permission=step.requires_confirmation, - reason=step.description, - ) - - result = self._execute_tool( - task=task, - directive=step_directive, - permission_override=permission_override, - secret_override=secret_override, - password_override=password_override, - ) - - # If tool needs human input/review - return immediately. - if result.get("status") in ( - "awaiting_permission", - "awaiting_input", - "awaiting_password", - "awaiting_review", - ): - return { - "status": result.get("status"), - "result": result.get("result", {}), - "step_results": step_results, - } - - step_results.append({ - "step_id": step.id, - "result": result, - }) - - completed_steps.add(step.id) - self._publish(task, STEPPED_COMPLETED, { - "step_id": step.id, - "status": result.get("status"), - }) - - # === Critic evaluation === - if self._critic and result.get("status") == "completed": - critic_score = self._evaluate_with_critic(task, step, result) - if critic_score: - result["critic_score"] = { - "correctness": critic_score.correctness, - "usefulness": critic_score.usefulness, - "safety": critic_score.safety, - "memory_store": critic_score.memory_store, - "weight": critic_score.weight, - "explanation": critic_score.explanation, - } - self._save_critique_to_memory(task, step, critic_score) - - # Check if step result is satisfactory - min_correctness = 0.5 - if critic_score.correctness < min_correctness: - # Step failed critic check — try to recover - if critic_retries_used < self._critic_retry_limit and step.kind != "respond": - critic_retries_used += 1 - self._publish(task, CRITIC_RESULT, { - "step_id": step.id, - "score": critic_score.model_dump(mode="json"), - "action": "retry", - "retry": critic_retries_used, - }) - # Retry the same step — rebuild directive - retry_directive = ExecutionDirective( - type=step.kind, - payload={"tool": step.tool, "args": step.args}, - requires_permission=step.requires_confirmation, - reason=step.description, - ) - retry_result = self._execute_tool( - task=task, - directive=retry_directive, - permission_override=permission_override, - secret_override=secret_override, - password_override=password_override, - ) - if retry_result.get("status") == "completed": - result = retry_result - step_results[-1]["result"] = result - # Re-evaluate after retry - critic_score2 = self._evaluate_with_critic(task, step, result) - if critic_score2 and critic_score2.correctness >= min_correctness: - # Retry succeeded - continue - # If retry also failed, continue to next step - else: - self._publish(task, CRITIC_RESULT, { - "step_id": step.id, - "score": critic_score.model_dump(mode="json"), - "action": "give_up", - "reason": f"Critic retry limit ({self._critic_retry_limit}) reached", - }) - - # Handle failed step - if result.get("status") == "failed": - review = self._build_failed_step_review(task, step, result) - if review: - return { - "status": "awaiting_review", - "result": { - "error": f"Step {step.id} requires review before replanning", - "failed_step": step.id, - "step_results": step_results, - "review": review, - }, - } - recovery = self._recover_failed_step( - task=task, - step=step, - result=result, - step_results=step_results, - permission_override=permission_override, - secret_override=secret_override, - password_override=password_override, - ) - if recovery.get("status") == "awaiting_permission": - return recovery - if recovery.get("status") == "completed": - recovered_result = recovery.get("result") - if recovered_result: - step_results[-1]["result"] = recovered_result - if recovery.get("finish"): - return { - "status": "completed", - "result": { - "message": recovery.get("message", "Recovered from failed step"), - "step_results": step_results, - }, - } - else: - return { - "status": "failed", - "result": { - "error": f"Step {step.id} failed", - "failed_step": step.id, - "step_results": step_results, - "recovery": recovery.get("result"), - }, - } - - ready_steps = self._get_ready_steps(graph, completed_steps) - - return { - "status": "completed", - "result": { - "message": f"Plan executed: {len(completed_steps)} steps completed", - "step_results": step_results, - }, - } - - def _build_failed_step_review(self, task: UserTask, step, result: dict[str, Any]) -> dict[str, Any] | None: - if step.tool != "shell_exec" or not self._command_analyzer: - return None - command = str((step.args or {}).get("command", "")) - if not command: - return None - diagnosis = self._command_analyzer.analyze( - command=command, - task_id=task.task_id, - session_id=task.session_id, - ) - if diagnosis.get("type") == "ok": - return None - return { - "step_id": step.id, - "tool": step.tool, - "command": command, - "diagnosis": diagnosis, - "critic_assessment": { - "classification": "model_planning_error", - "needs_replan": True, - "explanation": "Structured command analysis found a model action error before recovery.", - }, - } - - def _recover_failed_step( - self, - task: UserTask, - step, - result: dict[str, Any], - step_results: list[dict[str, Any]], - permission_override: PermissionDecision | None = None, - secret_override: str | None = None, - password_override: str | None = None, - ) -> dict[str, Any]: - if self._recovery_limit <= 0 or not self._critic: - return {"status": "failed", "result": {"reason": "recovery_unavailable"}} - - decision = self._evaluate_recovery(task, step, result, step_results) - action = decision.get("action", "fail") - - if action == "continue": - recovered = dict(result) - recovered["status"] = "completed" - recovered["recovery_decision"] = decision - return {"status": "completed", "result": recovered} - - if action == "respond": - recovered = dict(result) - recovered["status"] = "completed" - recovered["recovery_decision"] = decision - return { - "status": "completed", - "result": recovered, - "finish": True, - "message": decision.get("message") or decision.get("reason") or "Recovered by responding to user", - } - - if action == "retry": - retry_tool = decision.get("tool") or step.tool - retry_args = decision.get("args") or step.args - retry_result = self._execute_tool( - task=task, - directive=ExecutionDirective( - type="tool", - payload={"tool": retry_tool, "args": retry_args}, - requires_permission=True, - reason=decision.get("reason", "Recovery retry"), - ), - permission_override=permission_override, - secret_override=secret_override, - password_override=password_override, - ) - if retry_result.get("status") == "awaiting_permission": - return retry_result - retry_result["recovery_decision"] = decision - if retry_result.get("status") == "completed": - return {"status": "completed", "result": retry_result} - return {"status": "failed", "result": {"decision": decision, "retry_result": retry_result}} - - return {"status": "failed", "result": decision} - - def _evaluate_recovery( - self, - task: UserTask, - step, - result: dict[str, Any], - step_results: list[dict[str, Any]], - ) -> dict[str, Any]: - prompt = self._build_recovery_prompt(task, step, result, step_results) - self._publish(task, CRITIC_CALLED, {"step_id": step.id, "mode": "recovery"}) - - try: - output = asyncio.run(self._critic.generate(prompt, max_tokens=512)) - decision = self._parse_recovery_decision(output) - self._publish(task, CRITIC_RESULT, { - "step_id": step.id, - "mode": "recovery", - "decision": decision, - "raw": output, - }) - return decision - except Exception as e: - logger.warning(f"Recovery evaluation failed: {e}") - self._publish(task, CRITIC_RESULT, { - "step_id": step.id, - "mode": "recovery", - "error": str(e), - }) - return {"action": "fail", "reason": str(e)} - - def _build_recovery_prompt( - self, - task: UserTask, - step, - result: dict[str, Any], - step_results: list[dict[str, Any]], - ) -> str: - return f"""You are a recovery controller for an agent runtime. - -Decide what to do after a failed tool step. A non-zero exit code is not always fatal. -Interpret the failure in context. - -Allowed actions: -- continue: failure is acceptable information; continue the plan. -- retry: try one alternative tool call. Include "tool" and "args". -- respond: stop and answer the user with available information. Include "message". -- fail: real failure; stop the task. - -Return ONLY JSON: -{{"action":"continue|retry|respond|fail","reason":"...","tool":"shell_exec","args":{{...}},"message":"..."}} - -Task: -{task.input} - -Failed step: -id={step.id} -tool={step.tool} -args={json.dumps(step.args, ensure_ascii=False)} -description={step.description} - -Failed result: -{json.dumps(result, ensure_ascii=False, indent=2)} - -Previous step results: -{json.dumps(step_results, ensure_ascii=False, indent=2)} -""" - - def _parse_recovery_decision(self, output: str) -> dict[str, Any]: - try: - json_start = output.find("{") - json_end = output.rfind("}") + 1 - if json_start < 0 or json_end <= 0: - return {"action": "fail", "reason": "Recovery output was not JSON"} - data = json.loads(output[json_start:json_end]) - action = data.get("action", "fail") - if action not in {"continue", "retry", "respond", "fail"}: - action = "fail" - data["action"] = action - return data - except (json.JSONDecodeError, TypeError, ValueError) as e: - return {"action": "fail", "reason": f"Recovery JSON parse failed: {e}"} - - def _get_ready_steps( - self, - graph: dict[str, Any], - completed: set[str], - ) -> list: - if not graph or not graph.get("nodes"): - return [] - - step_map: dict = graph.get("step_map", {}) - ready = [] - - for node in graph["nodes"]: - node_id = node["id"] - if node_id in completed: - continue - - deps = node.get("depends_on", []) - if all(dep in completed for dep in deps): - step = step_map.get(node_id) - if step: - ready.append(step) - - return ready - - def _evaluate_with_critic( - self, - task: UserTask, - step, - result: dict[str, Any], - ) -> CriticScore | None: - if not self._critic: - return None - - critic_prompt = self._build_critic_prompt(step, result) - - self._publish(task, CRITIC_CALLED, {"step_id": step.id}) - - try: - critic_output = asyncio.run(self._critic.generate(critic_prompt)) - score = self._parse_critic_score(critic_output) - - self._publish(task, CRITIC_RESULT, { - "step_id": step.id, - "score": score.model_dump(mode="json") if score else None, - }) - - if score: - result["critic_score"] = { - "correctness": score.correctness, - "usefulness": score.usefulness, - "safety": score.safety, - "memory_store": score.memory_store, - "weight": score.weight, - "explanation": score.explanation, - } - - return score - - except Exception as e: - logger.warning(f"Critic evaluation failed: {e}") - self._publish(task, CRITIC_RESULT, { - "step_id": step.id, - "error": str(e), - }) - return None - - def _save_critique_to_memory( - self, - task: UserTask, - step, - score: CriticScore, - ) -> None: - """Save critic evaluation as critique entry in memory, using MemoryWritePolicy.""" - if not self._memory_interface: - return - - try: - # Check with policy before saving - if self._memory_policy: - decision = self._memory_policy.decide( - critic_score=score, - memory_type="critique", - session_id=task.session_id, - ) - if decision == "skip": - logger.info(f"MemoryWritePolicy skipped critique for {step.tool}") - return - # For "store_with_weight", we could adjust weight, but critic score already has weight - - tool_name = step.tool - tool_args = step.args or {} - args_str = ", ".join([f"{k}={v}" for k, v in tool_args.items()]) - - critique_text = f"Tool: {tool_name}({args_str}) | Task: {task.input[:100]} | Scores: correctness={score.correctness}, usefulness={score.usefulness}, safety={score.safety} | {score.explanation}" - - metadata = { - "task_input": task.input, - "tool": tool_name, - "args": tool_args, - "step_id": step.id, - "scores": { - "correctness": score.correctness, - "usefulness": score.usefulness, - "safety": score.safety, - }, - } - - self._memory_interface.insert( - text=critique_text, - kind="critique", - source="critic", - task_id=task.task_id, - session_id=task.session_id, - weight=score.weight, - metadata=metadata, - ) - logger.info(f"Saved critique to memory: {tool_name} task_id={task.task_id}") - - except Exception as e: - logger.warning(f"Failed to save critique to memory: {e}") - - def _build_critic_prompt(self, step, result: dict[str, Any]) -> str: - base_prompt = self._prompts.get("critic", "") - tool_result = result.get("result", {}) - - # Truncate long outputs to avoid exceeding context window - # Keep output under ~2000 chars to leave room for prompt + generation - output = tool_result.get("output", "") - if isinstance(output, str) and len(output) > 2000: - output = output[:2000] + "\n... [truncated]" - elif not isinstance(output, str): - output_str = json.dumps(output, ensure_ascii=False) - if len(output_str) > 2000: - output = output_str[:2000] + "\n... [truncated]" - else: - output = output_str - - # Build a compact result representation - compact_result = { - "ok": tool_result.get("ok"), - "output": output, - "error": tool_result.get("error"), - "exit_code": tool_result.get("metadata", {}).get("exit_code"), - } - - return f"""{base_prompt} - -Step: {step.description} -Tool: {step.tool} -Args: {step.args} - -Result: -{json.dumps(compact_result, indent=2, ensure_ascii=False)} - -Evaluate and respond with JSON: -{{"correctness": 0.0-1.0, "usefulness": 0.0-1.0, "safety": 0.0-1.0, "memory_store": true|false, "weight": 0.0-1.0, "explanation": "..."}}""" - - def _parse_critic_score(self, output: str) -> CriticScore | None: - try: - json_start = output.find("{") - json_end = output.rfind("}") + 1 - if json_start < 0: - return None - - json_str = output[json_start:json_end] - data = json.loads(json_str) - - return CriticScore( - correctness=data.get("correctness", 0.5), - usefulness=data.get("usefulness", 0.5), - safety=data.get("safety", 1.0), - memory_store=data.get("memory_store", False), - weight=data.get("weight", 0.5), - explanation=data.get("explanation", ""), - ) - - except (json.JSONDecodeError, ValueError, TypeError) as e: - logger.warning(f"Critic score parsing failed: {e}") - return None - - def _execute_coder( - self, - task: UserTask, - directive: ExecutionDirective, - ) -> dict[str, Any]: - if not self._coder: - return {"status": "failed", "result": {"error": "Coder model not available"}} - - coder_task = directive.payload.get("task", "") - if not coder_task: - return {"status": "failed", "result": {"error": "Missing task for coder"}} - - try: - output = asyncio.run(self._coder.generate(coder_task)) - - return { - "status": "completed", - "result": {"code": output}, - } - except Exception as e: - logger.warning(f"Coder execution failed: {e}") - return {"status": "failed", "result": {"error": str(e)}} - - def _execute_tool( - self, - task: UserTask, - directive: ExecutionDirective, - permission_override: PermissionDecision | None = None, - secret_override: str | None = None, - password_override: str | None = None, - ) -> dict[str, Any]: - tool_name = str(directive.payload.get("tool", "")).strip() - tool_args = dict(directive.payload.get("args", {})) - - if password_override: - tool_args["password"] = password_override - - if not tool_name: - return {"status": "failed", "result": {"error": "Missing tool name"}} - - # Tool-first: validate tool exists in registry - available_tools = self._tool_registry.list_names() - if tool_name not in available_tools: - return {"status": "failed", "result": {"error": f"Unknown tool: {tool_name}. Available tools: {available_tools}"}} - - permission_result = None - - # If permission_override is provided, skip permission check - if permission_override is not None: - permission_result = { - "decision": permission_override.decision, - "command": tool_args.get("command", ""), - "cached": True, - } - # Check permission for shell_exec and file_write - elif tool_name == "shell_exec": - permission_result = self._permission_service.check_shell_command( - task_id=task.task_id, - session_id=task.session_id, - command=str(tool_args.get("command", "")), - ) - elif tool_name == "file_write": - # Allow writing to runtime data directory without permission check - write_path = str(tool_args.get("path", "")) - if "allowed_commands.json" in write_path or "/data/runtime" in write_path: - # Internal system write - allow without permission - permission_result = {"decision": "allowed", "path": write_path} - else: - permission_result = self._permission_service.check_write_path( - task_id=task.task_id, - session_id=task.session_id, - path=write_path, - ) - - # Handle permission result - if permission_result: - decision = permission_result.get("decision", "unknown") - - # Hard stop - deny execution - if decision == "hard_stop": - self._publish(task, PERMISSION_REQUESTED, permission_result) - return { - "status": "failed", - "result": { - "error": f"Command blocked: {permission_result.get('reason', 'Hard stop command')}", - "command": permission_result.get("command", ""), - }, - } - - # Cached - already allowed - if decision in ("allowed_always", "allowed") or permission_result.get("cached"): - self._publish(task, PERMISSION_RESOLVED, permission_result) - - # Need user confirmation - return immediately, don't continue execution - elif decision == "prompt": - self._publish(task, PERMISSION_REQUESTED, permission_result) - return { - "status": "awaiting_permission", - "result": { - "error": "Permission required before execution.", - "permission_request": permission_result, - }, - } - - # Hard stop - return immediately - elif decision == "deny": - self._publish(task, PERMISSION_RESOLVED, permission_result) - return { - "status": "failed", - "result": { - "error": "Permission denied", - "command": permission_result.get("command", ""), - }, - } - - # Deny - elif decision == "deny": - self._publish(task, PERMISSION_RESOLVED, permission_result) - return { - "status": "failed", - "result": { - "error": "Permission denied", - "command": permission_result.get("command", ""), - }, - } - - if tool_name == "shell_exec": - command = str(tool_args.get("command", "")) - - # Determine if sudo password is needed: - # 1. Command explicitly starts with "sudo" - # 2. Command is a known sudo-requiring command (apt, systemctl, etc.) — flagged by permission service - needs_password = command.startswith("sudo ") or (permission_result is not None and permission_result.get("requires_sudo", False)) - - if needs_password and secret_override is None: - secret_request = SecretRequest( - task_id=task.task_id, - session_id=task.session_id, - kind="sudo_password", - prompt="Sudo password required", - command=command, - ) - self._publish(task, SECRET_REQUESTED, secret_request.model_dump(mode="json")) - return { - "status": "awaiting_input", - "result": { - "error": "Secret required", - "secret_request": secret_request.model_dump(mode="json"), - }, - } - if needs_password and secret_override is not None: - # Inject sudo -S for explicit sudo commands, or prepend sudo -S for implicit ones - if command.startswith("sudo "): - tool_args["command"] = f"sudo -S -p '' {command[len('sudo '):]}" - else: - tool_args["command"] = f"sudo -S -p '' {command}" - tool_args["stdin_secret"] = f"{secret_override}\n" - - tool_call = ToolCall( - tool=tool_name, - args=tool_args, - task_id=task.task_id, - step_id="step-1", - ) - self._publish(task, TOOL_CALLED, tool_call.model_dump(mode="json")) - if tool_name == "shell_exec": - tool_args["__output_callback"] = lambda stream, chunk: self._publish( - task, - TOOL_OUTPUT_CHUNK, - { - "tool": tool_name, - "step_id": "step-1", - "stream": stream, - "chunk": chunk, - }, - ) - tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args) - self._publish(task, TOOL_COMPLETED, tool_result.model_dump(mode="json")) - - metadata = tool_result.metadata or {} - needs_sudo = metadata.get("needs_sudo", False) - sudo_auth_failed = metadata.get("sudo_auth_failed", False) or self._looks_like_sudo_auth_failure(tool_result) - - if tool_name == "shell_exec" and not tool_result.ok and sudo_auth_failed: - original_command = str(directive.payload.get("args", {}).get("command", tool_args.get("command", ""))) - secret_request = SecretRequest( - task_id=task.task_id, - session_id=task.session_id, - kind="sudo_password", - prompt="Sudo password incorrect. Try again", - command=original_command, - ) - self._publish(task, SECRET_REQUESTED, secret_request.model_dump(mode="json")) - return { - "status": "awaiting_input", - "result": { - "error": "Sudo password failed", - "secret_request": secret_request.model_dump(mode="json"), - "attempt_failed": True, - "tool_result": tool_result.model_dump(mode="json"), - }, - } - - if not tool_result.ok and needs_sudo: - return { - "status": "awaiting_password", - "result": { - "task_id": task.task_id, - "needs_sudo": True, - "command": tool_args.get("command", ""), - "error": tool_result.error or "Permission denied", - "tool_result": tool_result.model_dump(mode="json"), - }, - } - - if tool_name == "shell_exec" and not tool_result.ok and self._command_analyzer: - original_command = str(directive.payload.get("args", {}).get("command", tool_args.get("command", ""))) - diagnosis = self._command_analyzer.analyze( - command=original_command, - task_id=task.task_id, - session_id=task.session_id, - ) - if diagnosis.get("type") != "ok": - return { - "status": "awaiting_review", - "result": { - "error": "Tool action requires review before replanning", - "review": { - "step_id": "step-1", - "tool": tool_name, - "command": original_command, - "diagnosis": diagnosis, - "critic_assessment": { - "classification": "model_planning_error", - "needs_replan": True, - "explanation": "Structured command analysis found a model action error before recovery.", - }, - }, - "tool_result": tool_result.model_dump(mode="json"), - }, - } - - return { - "status": "completed" if tool_result.ok else "failed", - "result": tool_result.model_dump(mode="json"), - } - - def _looks_like_sudo_auth_failure(self, tool_result: ToolResult) -> bool: - output = f"{tool_result.output or ''}\n{tool_result.error or ''}".lower() - return any( - marker in output - for marker in ( - "incorrect password", - "incorrect password attempt", - "sudo: no password was provided", - "sorry, try again", - "authentication failure", - ) - ) - - def _publish(self, task: UserTask, event_type: str, payload: dict[str, Any]) -> None: - if not self._event_bus: - return - event = RuntimeEvent( - task_id=task.task_id, - session_id=task.session_id, - sequence=self._event_bus.next_sequence(task.task_id), - type=event_type, - payload=payload, - ) - self._event_bus.publish(event) diff --git a/app/core/execution_scheduler.py b/app/core/execution_scheduler.py deleted file mode 100644 index fe2e19e..0000000 --- a/app/core/execution_scheduler.py +++ /dev/null @@ -1,212 +0,0 @@ -from __future__ import annotations - -import json -import logging -from collections import deque -from typing import Any - -from app.core.contracts import ExecutionDirective, PlanStep - -logger = logging.getLogger(__name__) - - -class ExecutionScheduler: - def __init__(self, retry_limit: int = 2) -> None: - self._retry_limit = retry_limit - - def parse_plan_steps( - self, - json_str: str, - task_id: str | None = None, - ) -> list[PlanStep]: - try: - json_start = json_str.find("{") - json_end = json_str.rfind("}") + 1 - if json_start < 0: - return [] - - json_str = json_str[json_start:json_end] - data = json.loads(json_str) - - # Unified format: {"type": "plan", "payload": {"steps": [...]}} - # or direct: {"type": "step", "payload": {"tool": "...", "args": {...}}} - if isinstance(data, dict): - msg_type = data.get("type", "") - - # Single step format: {"type": "step", "payload": {"tool": ..., "args": ...}} - if msg_type == "step": - payload = data.get("payload", {}) - step = { - "id": "step-0", - "kind": "tool", - "tool": payload.get("tool"), - "args": payload.get("args", {}), - "description": payload.get("description", ""), - "depends_on": payload.get("depends_on", []), - } - data = [step] - - # Plan format: {"type": "plan", "payload": {"steps": [...]}} - elif msg_type == "plan": - payload = data.get("payload", {}) - steps_data = payload.get("steps", []) - - # Normalize steps: handle {"type": "step", "payload": {"tool": ...}} - normalized = [] - for step in steps_data: - if isinstance(step, dict) and step.get("type") == "step": - inner = step.get("payload", {}) - normalized.append({ - "tool": inner.get("tool"), - "args": inner.get("args", {}), - "description": inner.get("description", ""), - "depends_on": inner.get("depends_on", []), - }) - else: - normalized.append(step) - steps_data = normalized - - data = steps_data if steps_data else [] - - # Old format compatibility - elif "steps" in data: - data = data["steps"] - elif "plan" in data: - data = data["plan"] - else: - data = [data] - elif isinstance(data, str): - data = json.loads(data) - if isinstance(data, dict): - data = [data] - - steps = [] - for i, step_data in enumerate(data): - if isinstance(step_data, str): - step_data = {"id": f"step-{i}", "kind": "respond", "text": step_data} - - if not isinstance(step_data, dict): - continue - - step_data.setdefault("id", f"step-{i}") - - # Tool-first: scheduler получает tool напрямую, без трансформаций - # kind определяется по наличию tool name - # args передаются напрямую - if step_data.get("tool"): - step_data["kind"] = "tool" - - step_data.setdefault("kind", step_data.get("kind", "respond")) - step_data.setdefault("tool", step_data.get("tool")) - step_data.setdefault("args", step_data.get("args", {})) - step_data.setdefault("description", step_data.get("description", "")) - step_data.setdefault("requires_confirmation", False) - step_data.setdefault("depends_on", []) - - if "description" not in step_data: - step_data["description"] = f"Step {i}" - - steps.append(PlanStep(**step_data)) - - return steps - - except (json.JSONDecodeError, ValueError, TypeError) as e: - logger.warning(f"Plan parsing failed: {e}") - return [] - - def validate_no_cycles(self, steps: list[PlanStep]) -> bool: - if not steps: - return True - - graph: dict[str, set[str]] = {} - for step in steps: - graph[step.id] = set(step.depends_on) - - visited: set[str] = set() - rec_stack: set[str] = set() - - def has_cycle(node: str) -> bool: - if node in rec_stack: - return True - if node in visited: - return False - - visited.add(node) - rec_stack.add(node) - - for dep in graph.get(node, []): - if has_cycle(dep): - return True - - rec_stack.remove(node) - return False - - for step in steps: - if step.id not in visited: - if has_cycle(step.id): - logger.warning(f"Cycle detected in plan: {step.id}") - return False - - return True - - def build_task_graph( - self, - steps: list[PlanStep], - ) -> dict[str, Any]: - if not steps: - return {"nodes": [], "edges": []} - - if not self.validate_no_cycles(steps): - return {"nodes": [], "edges": [], "error": "Cycle detected in plan"} - - nodes = [] - edges = [] - - step_map = {s.id: s for s in steps} - - for step in steps: - nodes.append({ - "id": step.id, - "kind": step.kind, - "tool": step.tool, - "args": step.args, - "ready": len(step.depends_on) == 0, - }) - - for dep_id in step.depends_on: - edges.append({ - "from": dep_id, - "to": step.id, - }) - - return {"nodes": nodes, "edges": edges, "step_map": step_map} - - def get_ready_steps( - self, - graph: dict[str, Any], - completed: set[str], - ) -> list[PlanStep]: - if not graph or not graph.get("nodes"): - return [] - - step_map: dict[str, PlanStep] = graph.get("step_map", {}) - ready = [] - - for node in graph["nodes"]: - node_id = node["id"] - if node_id in completed: - continue - - deps = node.get("depends_on", []) - if all(dep in completed for dep in deps): - step = step_map.get(node_id) - if step: - ready.append(step) - - return ready - - def next_directive( - self, - directive: ExecutionDirective, - ) -> ExecutionDirective: - return directive \ No newline at end of file diff --git a/app/core/intent_parser.py b/app/core/intent_parser.py deleted file mode 100644 index 61adda1..0000000 --- a/app/core/intent_parser.py +++ /dev/null @@ -1,104 +0,0 @@ -from __future__ import annotations - -import re -from typing import Any - -from app.core.contracts import ExecutionDirective - -SHELL_PREFIXES = ( - "run ", - "execute ", - "launch ", - "запусти ", - "выполни ", - "выполнить ", -) - -MEMORY_STORE_PATTERNS = ( - r"запомни\s+(.+)", - r"сохрани\s+(.+)", - r"запиши\s+(.+)", - r"remember\s+(.+)", - r"save\s+(.+)", -) - -MEMORY_SEARCH_PATTERNS = ( - r"вспомни\s+(.+)", - r"search memory\s+(.+)", -) - - -class IntentParser: - """Extracts explicit tool intents from natural-language task text.""" - - def __init__(self) -> None: - self._store_patterns = [re.compile(p, re.IGNORECASE) for p in MEMORY_STORE_PATTERNS] - self._search_patterns = [re.compile(p, re.IGNORECASE) for p in MEMORY_SEARCH_PATTERNS] - - def parse(self, task_input: str) -> ExecutionDirective | None: - normalized = task_input.strip() - lowered = normalized.lower() - - if matched := self._match_patterns(self._store_patterns, normalized): - return ExecutionDirective( - type="tool", - payload={ - "tool": "memory_insert", - "args": { - "text": matched.group(1).strip(), - "kind": "fact", - "source": "user", - }, - }, - requires_permission=False, - confidence=0.85, - reason="User explicitly requested to store in memory.", - ) - - if matched := self._match_patterns(self._search_patterns, normalized): - return ExecutionDirective( - type="tool", - payload={ - "tool": "memory_search", - "args": {"query": matched.group(1).strip()}, - }, - requires_permission=False, - confidence=0.85, - reason="User explicitly requested to search memory.", - ) - - for prefix in SHELL_PREFIXES: - if lowered.startswith(prefix): - command = normalized[len(prefix) :].strip() - if command: - return ExecutionDirective( - type="tool", - payload={ - "tool": "shell_exec", - "args": {"command": command}, - }, - requires_permission=True, - confidence=0.92, - reason="Natural-language task explicitly requested shell execution.", - ) - - quoted = re.match(r"^`(.+)`$", normalized) - if quoted: - return ExecutionDirective( - type="tool", - payload={ - "tool": "shell_exec", - "args": {"command": quoted.group(1)}, - }, - requires_permission=True, - confidence=0.75, - reason="Backticked input treated as direct shell command.", - ) - - return None - - def _match_patterns(self, patterns: list[re.Pattern], text: str): - for pattern in patterns: - if match := pattern.match(text): - return match - return None diff --git a/app/core/permission_resolution.py b/app/core/permission_resolution.py deleted file mode 100644 index afd9d06..0000000 --- a/app/core/permission_resolution.py +++ /dev/null @@ -1,24 +0,0 @@ -from __future__ import annotations - -from pydantic import BaseModel - - -class PermissionResolutionRequest(BaseModel): - task_id: str - decision: str - - -class SecretResolutionRequest(BaseModel): - task_id: str - secret: str - - -class PasswordResolutionRequest(BaseModel): - task_id: str - password: str - - -class ReviewResolutionRequest(BaseModel): - task_id: str - decision: str - correction: str | None = None diff --git a/app/core/permission_service.py b/app/core/permission_service.py deleted file mode 100644 index dd0f852..0000000 --- a/app/core/permission_service.py +++ /dev/null @@ -1,370 +0,0 @@ -from __future__ import annotations - -import hashlib -import json -import logging -import os -import re -import shlex -from pathlib import Path -from typing import Any - -logger = logging.getLogger(__name__) - - -class PermissionService: - """Permission-first model - user is the authority.""" - - def __init__(self, config: dict[str, Any] | None = None, cache_file: Path | None = None): - self._config = config or self._load_config() - self._settings = self._config.get("settings", {}) - self._cache_file = cache_file - self._categories = self._config.get("command_categories", {}) - self._path_settings = self._config.get("path_settings", {}) - self._legacy_dangerous_commands = self._config.get("dangerous_commands", {}) - self._legacy_sensitive_paths = self._config.get("sensitive_paths", []) - - def _load_config(self) -> dict[str, Any]: - try: - config_path = Path(__file__).parents[2] / "config" / "permissions.json" - with open(config_path) as f: - return json.load(f) - except Exception as e: - logger.warning(f"Failed to load permissions config: {e}") - return {"settings": {}, "command_categories": {}} - - def _get_cache_file(self) -> Path: - if self._cache_file: - return self._cache_file - - base_dir = Path(__file__).parents[2] - cache_relative = self._settings.get("cache_file", "data/runtime/allowed_commands.json") - return base_dir / cache_relative - - def _load_cache(self) -> dict[str, Any]: - cache_file = self._get_cache_file() - try: - if cache_file.exists(): - with open(cache_file) as f: - return json.load(f) - except Exception as e: - logger.warning(f"Failed to load cache: {e}") - - return {"allowed_once": {}, "allowed_always": {}} - - def _save_cache(self, cache: dict[str, Any]) -> None: - cache_file = self._get_cache_file() - cache_file.parent.mkdir(parents=True, exist_ok=True) - with open(cache_file, "w") as f: - json.dump(cache, f, indent=2) - - def check_shell_command( - self, - task_id: str, - session_id: str, - command: str, - ) -> dict[str, Any]: - """Check if shell command requires permission.""" - normalized = self._normalize_command(command) - command_hash = self._hash_command(normalized) - - cache = self._load_cache() - - # Check cache first - if command_hash in cache.get("allowed_always", {}): - return { - "decision": "allowed_always", - "command": normalized, - "cached": True, - "requires_sudo": _requires_sudo(normalized), - } - - if command_hash in cache.get("allowed_once", {}): - cached = cache["allowed_once"][command_hash] - if cached.get("task_id") == task_id: - return { - "decision": "allowed_once", - "command": normalized, - "cached": True, - "requires_sudo": _requires_sudo(normalized), - } - - # Check hard stop - if self._is_hard_stop(normalized): - return { - "decision": "hard_stop", - "command": normalized, - "reason": "Hard stop command - execution denied", - } - - if not self._categories and self._legacy_dangerous_commands: - if self._matches_legacy_dangerous(normalized): - return { - "decision": "prompt", - "command": normalized, - "category": "legacy_dangerous", - "allow_always": False, - "task_id": task_id, - "session_id": session_id, - } - return { - "decision": "allowed", - "command": normalized, - "category": "legacy_safe", - "task_id": task_id, - "session_id": session_id, - } - - # Check no_always category - category = self._get_category(normalized) - can_always = self._categories.get(category, {}).get("allow_always", True) - - # Check if command requires sudo (e.g. apt, systemctl without explicit sudo prefix) - requires_sudo = _requires_sudo(normalized) - - # Need user confirmation - result = { - "decision": "prompt", - "command": normalized, - "category": category, - "allow_always": can_always, - "requires_sudo": requires_sudo, - "task_id": task_id, - "session_id": session_id, - } - return result - - def check_write_path( - self, - task_id: str, - session_id: str, - path: str, - ) -> dict[str, Any]: - """Check if write path requires permission.""" - if not self._path_settings and self._legacy_sensitive_paths: - if any(path.startswith(sensitive) for sensitive in self._legacy_sensitive_paths): - return { - "decision": "prompt", - "path": path, - "task_id": task_id, - "session_id": session_id, - } - return {"decision": "allowed", "path": path} - - allow_write_paths = self._path_settings.get("allow_write_paths", []) - - # Check if path is in allowed list - for allowed in allow_write_paths: - if path.startswith(allowed): - return {"decision": "allowed", "path": path} - - # Otherwise require permission - return { - "decision": "prompt", - "path": path, - "task_id": task_id, - "session_id": session_id, - } - - def resolve_permission( - self, - task_id: str, - session_id: str, - command: str, - decision: str, - ) -> dict[str, Any]: - """Resolve permission decision from user.""" - normalized = self._normalize_command(command) - command_hash = self._hash_command(normalized) - - cache = self._load_cache() - - if decision == "allow_once": - cache.setdefault("allowed_once", {})[command_hash] = { - "command": normalized, - "task_id": task_id, - "session_id": session_id, - } - self._save_cache(cache) - return {"status": "allowed_once", "command": normalized} - - elif decision == "allow_always": - cache.setdefault("allowed_always", {})[command_hash] = { - "command": normalized, - "task_id": task_id, - "session_id": session_id, - } - self._save_cache(cache) - return {"status": "allowed_always", "command": normalized} - - elif decision == "deny": - return {"status": "denied", "command": normalized} - - return {"status": "unknown", "decision": decision} - - def clear_cache(self) -> dict[str, Any]: - """Clear permission cache.""" - cache = {"allowed_once": {}, "allowed_always": {}} - self._save_cache(cache) - return {"status": "cache_cleared"} - - def _normalize_command(self, command: str) -> str: - """Normalize command for consistent hashing.""" - if not self._settings.get("normalize_commands", True): - return command.strip() - - normalized = command.strip() - - # Split chained commands if enabled - if self._settings.get("split_chained", True): - # Replace ; and || with && for splitting - normalized = normalized.replace(";", " && ") - normalized = normalized.replace("||", " && ") - - # Resolve environment variables - try: - normalized = os.path.expandvars(normalized) - except: - pass - - # Resolve home directory - normalized = normalized.replace("~", os.path.expanduser("~")) - - # Remove extra whitespace - normalized = " ".join(normalized.split()) - - return normalized - - def _hash_command(self, command: str) -> str: - """Generate hash for command.""" - return hashlib.sha256(command.encode()).hexdigest()[:16] - - def _matches_legacy_dangerous(self, command: str) -> bool: - cmd_lower = command.lower() - for pattern in self._legacy_dangerous_commands: - if pattern.lower() in cmd_lower: - return True - return False - - def _is_hard_stop(self, command: str) -> bool: - """Check if command is hard stop.""" - hard_stop_commands = self._categories.get("hard_stop", {}).get("commands", []) - - cmd_lower = command.lower().strip() - cmd_tokens = cmd_lower.split() - - for hs in hard_stop_commands: - hs_lower = hs.lower().strip() - # For "rm -rf /" and "rm -rf /*", only match exact command - # Don't match "rm -rf /tmp/nonexistent" as hard stop - if hs_lower in ("rm -rf /", "rm -rf /*"): - if cmd_lower == hs_lower: - return True - continue - # For other patterns, use substring match - if hs_lower in cmd_lower: - return True - - return False - - def _get_category(self, command: str) -> str: - """Get command category.""" - cmd_lower = command.lower().strip() - cmd_first_word = cmd_lower.split()[0] if cmd_lower.split() else "" - - # Check no_always category — match by first word or known multi-word prefixes - no_always = self._categories.get("no_always", {}).get("commands", []) - for pattern in no_always: - pat_lower = pattern.lower().strip() - # Match if first word matches (e.g. "apt" matches "apt list --upgradable") - # or if command starts with the pattern (e.g. "systemctl stop" matches "systemctl stop nginx") - if cmd_first_word == pat_lower or cmd_lower.startswith(pat_lower + " "): - return "no_always" - - # Check hard_stop by first word - hard_stop = self._categories.get("hard_stop", {}).get("commands", []) - for pattern in hard_stop: - pat_lower = pattern.lower().strip() - if cmd_first_word == pat_lower or cmd_lower.startswith(pat_lower + " "): - return "hard_stop" - - # Default to normal - return "normal" - - -SUDO_COMMANDS = { - "sudo", - "apt", "apt-get", "dpkg", "yum", "dnf", "pacman", "zypper", - "systemctl", "service", "mount", "umount", - "shutdown", "reboot", "halt", "poweroff", - "useradd", "usermod", "userdel", "groupadd", "groupmod", - "chmod", "chown", "chgrp", - "iptables", "ufw", - "kill", "killall", "pkill", -} - - -def _requires_sudo(command: str) -> bool: - """Check if command requires sudo.""" - if not command: - return False - cmd_lower = command.lower().strip() - first_word = cmd_lower.split()[0] if cmd_lower.split() else "" - return first_word in SUDO_COMMANDS - - -class PermissionRequest: - """Permission request to user.""" - - def __init__( - self, - task_id: str, - session_id: str, - command: str, - category: str = "normal", - allow_always: bool = True, - ) -> None: - self.task_id = task_id - self.session_id = session_id - self.command = command - self.category = category - self.allow_always = allow_always - self.requires_password = _requires_sudo(command) - - def to_dict(self) -> dict[str, Any]: - return { - "task_id": self.task_id, - "session_id": self.session_id, - "command": self.command, - "category": self.category, - "allow_always": self.allow_always, - "requires_password": self.requires_password, - "buttons": self._get_buttons(), - } - - def _get_buttons(self) -> list[dict[str, str]]: - buttons = [{"action": "deny", "label": "Запретить"}] - - if self.allow_always: - buttons.insert(0, {"action": "allow_always", "label": "Разрешить навсегда"}) - - if self.requires_password: - buttons.insert(0, {"action": "allow_with_password", "label": "Разрешить с паролем"}) - else: - buttons.insert(0, {"action": "allow_once", "label": "Разрешить"}) - - return buttons - - -class PermissionDecision: - """Permission decision.""" - - def __init__( - self, - decision: str, - command: str | None = None, - cached: bool = False, - ) -> None: - self.decision = decision - self.command = command - self.cached = cached diff --git a/app/events/__init__.py b/app/events/__init__.py deleted file mode 100644 index b89dc5d..0000000 --- a/app/events/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -"""Event bus and event store.""" - diff --git a/app/events/event_bus.py b/app/events/event_bus.py deleted file mode 100644 index fee048f..0000000 --- a/app/events/event_bus.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import annotations - -from typing import Callable - -from app.core.contracts import RuntimeEvent -from app.events.event_store import SQLiteEventStore - - -Subscriber = Callable[[RuntimeEvent], None] - - -class EventBus: - """Per-task ordered event publishing with durable storage.""" - - def __init__(self, event_store: SQLiteEventStore) -> None: - self._store = event_store - self._subscribers: list[Subscriber] = [] - - def next_sequence(self, task_id: str) -> int: - return self._store.get_latest_sequence(task_id) + 1 - - def publish(self, event: RuntimeEvent) -> RuntimeEvent: - self._store.append(event) - for subscriber in self._subscribers: - subscriber(event) - return event - - def subscribe(self, subscriber: Subscriber) -> None: - self._subscribers.append(subscriber) - - def list_for_task(self, task_id: str) -> list[RuntimeEvent]: - return self._store.list_for_task(task_id) - - def list_recent(self, limit: int = 500) -> list[RuntimeEvent]: - return self._store.list_recent(limit=limit) diff --git a/app/events/event_store.py b/app/events/event_store.py deleted file mode 100644 index 111d373..0000000 --- a/app/events/event_store.py +++ /dev/null @@ -1,122 +0,0 @@ -from __future__ import annotations - -import json -import sqlite3 -from pathlib import Path - -from app.core.contracts import RuntimeEvent - - -class SQLiteEventStore: - """Append-only event store with per-task ordered history.""" - - def __init__(self, db_path: str | Path) -> None: - self._db_path = Path(db_path) - self._db_path.parent.mkdir(parents=True, exist_ok=True) - self._initialize() - - def append(self, event: RuntimeEvent) -> None: - with sqlite3.connect(self._db_path) as conn: - conn.execute( - """ - INSERT INTO events ( - event_id, task_id, session_id, sequence, type, timestamp, - payload_json, causation_id, correlation_id - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) - """, - ( - event.event_id, - event.task_id, - event.session_id, - event.sequence, - event.type, - event.timestamp.isoformat(), - json.dumps(event.payload), - event.causation_id, - event.correlation_id, - ), - ) - conn.commit() - - def list_for_task(self, task_id: str) -> list[RuntimeEvent]: - with sqlite3.connect(self._db_path) as conn: - rows = conn.execute( - """ - SELECT event_id, task_id, session_id, sequence, type, timestamp, - payload_json, causation_id, correlation_id - FROM events - WHERE task_id = ? - ORDER BY sequence ASC - """, - (task_id,), - ).fetchall() - return [ - RuntimeEvent( - event_id=row[0], - task_id=row[1], - session_id=row[2], - sequence=row[3], - type=row[4], - timestamp=row[5], - payload=json.loads(row[6]), - causation_id=row[7], - correlation_id=row[8], - ) - for row in rows - ] - - def list_recent(self, limit: int = 500) -> list[RuntimeEvent]: - with sqlite3.connect(self._db_path) as conn: - rows = conn.execute( - """ - SELECT event_id, task_id, session_id, sequence, type, timestamp, - payload_json, causation_id, correlation_id - FROM events - ORDER BY timestamp DESC, task_id DESC, sequence DESC - LIMIT ? - """, - (limit,), - ).fetchall() - events = [ - RuntimeEvent( - event_id=row[0], - task_id=row[1], - session_id=row[2], - sequence=row[3], - type=row[4], - timestamp=row[5], - payload=json.loads(row[6]), - causation_id=row[7], - correlation_id=row[8], - ) - for row in rows - ] - return list(reversed(events)) - - def get_latest_sequence(self, task_id: str) -> int: - with sqlite3.connect(self._db_path) as conn: - row = conn.execute( - "SELECT COALESCE(MAX(sequence), 0) FROM events WHERE task_id = ?", - (task_id,), - ).fetchone() - return int(row[0]) if row else 0 - - def _initialize(self) -> None: - with sqlite3.connect(self._db_path) as conn: - conn.execute( - """ - CREATE TABLE IF NOT EXISTS events ( - event_id TEXT PRIMARY KEY, - task_id TEXT NOT NULL, - session_id TEXT NOT NULL, - sequence INTEGER NOT NULL, - type TEXT NOT NULL, - timestamp TEXT NOT NULL, - payload_json TEXT NOT NULL, - causation_id TEXT, - correlation_id TEXT NOT NULL, - UNIQUE(task_id, sequence) - ) - """ - ) - conn.commit() diff --git a/app/events/event_types.py b/app/events/event_types.py deleted file mode 100644 index 7ab7e91..0000000 --- a/app/events/event_types.py +++ /dev/null @@ -1,35 +0,0 @@ -TASK_RECEIVED = "task_received" -CONTEXT_BUILT = "context_built" -STEP_STARTED = "step_started" -TOOL_CALLED = "tool_called" -TOOL_OUTPUT_CHUNK = "tool_output_chunk" -TOOL_COMPLETED = "tool_completed" -PERMISSION_REQUESTED = "permission_requested" -PERMISSION_RESOLVED = "permission_resolved" -TASK_AWAITING_PERMISSION = "task_awaiting_permission" -SECRET_REQUESTED = "secret_requested" -TASK_AWAITING_INPUT = "task_awaiting_input" -TASK_AWAITING_REVIEW = "task_awaiting_review" -REVIEW_RESOLVED = "review_resolved" -CHECKPOINT_SAVED = "checkpoint_saved" -TASK_COMPLETED = "task_completed" -TASK_FAILED = "task_failed" -ORCHESTRATOR_CALLED = "orchestrator_called" -ORCHESTRATOR_RESULT = "orchestrator_result" -ORCHESTRATOR_UNAVAILABLE = "orchestrator_unavailable" -ORCHESTRATOR_FALLBACK_USED = "orchestrator_fallback_used" -ORCHESTRATOR_RETRY = "orchestrator_retry" -PLANNER_CALLED = "planner_called" -PLANNER_RETRY = "planner_retry" -CRITIC_CALLED = "critic_called" -CRITIC_RESULT = "critic_result" -MEMORY_WRITE_DECIDED = "memory_write_decided" -PLAN_STARTED = "plan_started" -PLAN_FAILED = "plan_failed" -PLAN_COMPLETED = "plan_completed" -STEPPED_COMPLETED = "step_completed" -THINKER_CALLED = "thinker_called" -THINKER_RESULT = "thinker_result" -JSON_COMPILER_CALLED = "json_compiler_called" -JSON_COMPILER_RESULT = "json_compiler_result" -MEMORY_RECALL_USED = "memory_recall_used" diff --git a/app/memory/__init__.py b/app/memory/__init__.py deleted file mode 100644 index f912364..0000000 --- a/app/memory/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -MEMORY_AVAILABLE = False -VECTOR_AVAILABLE = False - -try: - from app.memory.store import MemoryStore - from app.memory.vector_index import VectorIndex - from app.memory.interface import MemoryInterface - from app.memory.write_policy import MemoryWritePolicy - MEMORY_AVAILABLE = True - VECTOR_AVAILABLE = True -except ImportError: - MemoryStore = None - VectorIndex = None - MemoryInterface = None - MemoryWritePolicy = None - -__all__ = [ - "MemoryStore", - "VectorIndex", - "MemoryInterface", - "MemoryWritePolicy", - "MEMORY_AVAILABLE", - "VECTOR_AVAILABLE", -] \ No newline at end of file diff --git a/app/memory/interface.py b/app/memory/interface.py deleted file mode 100644 index 8ab756c..0000000 --- a/app/memory/interface.py +++ /dev/null @@ -1,155 +0,0 @@ -from __future__ import annotations - -import json -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Literal - -import numpy as np - -from app.core.contracts import MemoryEntry -from app.memory.store import MemoryStore -from app.memory.vector_index import VectorIndex -from app.models.embeddings import EmbeddingsAdapter - - -class MemoryInterface: - def __init__( - self, - store: MemoryStore, - vector_index: VectorIndex, - embeddings: EmbeddingsAdapter, - ) -> None: - self._store = store - self._vector_index = vector_index - self._embeddings = embeddings - - def insert( - self, - text: str, - kind: Literal["tool_result", "plan", "critique", "fact", "summary", "user_preference"], - source: Literal["tool", "critic", "user", "system"], - task_id: str | None = None, - session_id: str | None = None, - weight: float = 0.5, - metadata: dict[str, Any] | None = None, - ) -> MemoryEntry: - entry = MemoryEntry( - text=text, - kind=kind, - source=source, - weight=weight, - task_id=task_id, - session_id=session_id, - metadata=metadata or {}, - embedding_model=self._embeddings.__class__.__name__, - embedding_dim=self._embeddings.embedding_dim, - ) - - embedding = self._embeddings.encode(text) - embedding_bytes = embedding.astype("float32").tobytes() - - self._store.insert(entry, embedding_bytes) - self._vector_index.insert(entry.id, embedding) - self._vector_index.save() - - self.cleanup() - - return entry - - def search( - self, - query: str, - top_k: int = 5, - kind: str | None = None, - session_id: str | None = None, - ) -> list[tuple[MemoryEntry, float]]: - query_embedding = self._embeddings.encode(query) - memory_ids, scores = self._vector_index.search(query_embedding, k=top_k) - - results: list[tuple[MemoryEntry, float]] = [] - for memory_id, score in zip(memory_ids, scores): - entry = self._store.get(memory_id) - if entry: - if kind and entry.kind != kind: - continue - if session_id and entry.session_id != session_id: - continue - results.append((entry, score)) - - return results[:top_k] - - def get(self, memory_id: str) -> MemoryEntry | None: - return self._store.get(memory_id) - - def delete(self, memory_id: str) -> bool: - entry = self._store.get(memory_id) - if entry: - self._vector_index.delete(memory_id) - return self._store.delete(memory_id) - return False - - def get_by_task(self, task_id: str) -> list[MemoryEntry]: - return self._store.get_by_task(task_id) - - def get_by_session(self, session_id: str, limit: int = 100) -> list[MemoryEntry]: - return self._store.get_by_session(session_id, limit) - - def get_recent(self, limit: int = 10) -> list[MemoryEntry]: - return self._store.get_all(limit) - - def count(self) -> int: - return self._store.count() - - def reindex(self) -> int: - """Rebuild vector index from all entries in memory store. - Returns number of indexed entries.""" - entries = self._store.get_all(limit=10000) - # Delete old index file and re-initialize from scratch - import os - if self._vector_index._index_path and self._vector_index._index_path.exists(): - self._vector_index._index_path.unlink() - self._vector_index._index = None - self._vector_index._init_index() - count = 0 - for entry in entries: - text = entry.text - embedding = self._embeddings.encode(text) - self._vector_index.insert(entry.id, embedding) - count += 1 - self._vector_index.save() - return count - - def close(self) -> None: - self._store.close() - - def cleanup(self, max_items: int = 750, decay_factor: float = 0.95) -> int: - """Remove low-weight entries when exceeding max_items limit. - - Applies weight decay based on freshness before cleanup. - Returns number of removed entries. - """ - current_count = self._store.count() - if current_count <= max_items: - return 0 - - removed = 0 - entries_to_remove = current_count - max_items - - all_entries = self._store.get_all(limit=current_count) - - def effective_weight(entry: MemoryEntry) -> float: - entry_weight = entry.weight - if entry.created_at: - age_days = (datetime.now(timezone.utc) - entry.created_at).total_seconds() / 86400 - freshness_factor = max(0.1, decay_factor ** age_days) - return entry_weight * freshness_factor - return entry_weight - - sorted_entries = sorted(all_entries, key=effective_weight) - - for entry in sorted_entries[:entries_to_remove]: - self._store.delete(entry.id) - removed += 1 - - return removed \ No newline at end of file diff --git a/app/memory/recall.py b/app/memory/recall.py deleted file mode 100644 index e0e847a..0000000 --- a/app/memory/recall.py +++ /dev/null @@ -1,205 +0,0 @@ -from __future__ import annotations - -import json -import logging -from typing import Any - -from app.core.contracts import MemoryEntry -from app.memory.interface import MemoryInterface -from app.models.async_adapters import AsyncOrchestratorAdapter - -logger = logging.getLogger(__name__) - -RECALL_PROMPT_TEMPLATE = """Определи, нужно ли искать в долговременной памяти для ответа на этот запрос. - -Запрос: "{task_input}" - -ИСКАТЬ в памяти если запрос: -- Содержит вопрос о пользователе (имя, предпочтения, история) -- Содержит отсылки к прошлым разговорам или действиям -- Содержит местоимения без контекста ("он", "это", "тот файл") -- Просит вспомнить, повторить, рассказать о прошлом -- Спрашивает "что ты помнишь", "как меня зовут", "что я говорил" - -НЕ ИСКАТЬ если: -- Приветствие или прощание -- Простая команда (ls, pwd, echo) -- Общий вопрос не связанный с прошлым - -Ответь ТОЛЬКО JSON: -{{"should_recall": true, "search_query": "поисковый запрос"}} -или -{{"should_recall": false, "reason": "краткая причина"}}""" - - -class MemoryRecallService: - """Активное воспоминание: система сама решает, что и когда искать в памяти.""" - - def __init__( - self, - memory_interface: MemoryInterface | None, - recall_model: AsyncOrchestratorAdapter | None, - ) -> None: - self._memory = memory_interface - self._model = recall_model - - async def recall( - self, - task_input: str, - top_k: int = 5, - ) -> dict[str, Any]: - """ - Определяет необходимость воспоминания и выполняет поиск. - - Возвращает: - { - "should_recall": bool, - "reason": str, - "query": str, - "results": list[MemoryEntry], - "summary": str, # краткая сводка для оркестратора - } - """ - if not self._memory or not self._model: - with open("/tmp/recall_debug.log", "a") as f: - f.write(f"SKIP: memory={self._memory is not None}, model={self._model is not None}\n") - return self._empty_result("memory_or_model_unavailable") - - # 1. LLM решает, нужно ли искать - decision = await self._classify(task_input) - with open("/tmp/recall_debug.log", "a") as f: - f.write(f"DECISION type={type(decision)} value={decision}\n") - if not isinstance(decision, dict): - return self._empty_result("invalid_decision_type") - if not decision.get("should_recall"): - return self._empty_result(decision.get("reason", "not_needed")) - - search_query = decision.get("search_query", task_input) - logger.info(f"Memory recall: query='{search_query}', reason='{decision.get('reason')}'") - - # 2. Векторный поиск - try: - raw_results = self._memory.search(query=search_query, top_k=top_k) - except Exception as e: - logger.warning(f"Memory search failed: {e}") - return self._empty_result("search_failed") - - # 3. Фильтрация: убираем пустые и слишком нерелевантные - filtered = self._filter(raw_results) - - if not filtered: - return self._empty_result("no_relevant_results") - - # 4. Сводка для оркестратора - summary = self._summarize(filtered, search_query) - - return { - "should_recall": True, - "reason": decision.get("reason", ""), - "query": search_query, - "results": filtered, - "summary": summary, - } - - async def _classify(self, task_input: str) -> dict[str, Any]: - """LLM-классификация: нужно ли искать в памяти.""" - prompt = RECALL_PROMPT_TEMPLATE.format(task_input=task_input) - - try: - raw = await self._model.generate(prompt, max_tokens=512) - data = self._parse_json(raw) - if "should_recall" in data: - return data - logger.warning(f"Recall classification missing 'should_recall': {raw[:200]}") - return {"should_recall": False, "reason": "parse_error"} - except Exception as e: - logger.warning(f"Recall classification failed: {e}") - return {"should_recall": False, "reason": "classification_error"} - - def _filter( - self, - results: list[tuple[MemoryEntry, float]], - min_score: float = 0.3, - ) -> list[MemoryEntry]: - """Фильтрует результаты по score и убирает дубликаты.""" - seen_texts: set[str] = set() - filtered: list[MemoryEntry] = [] - - for entry, score in results: - if score < min_score: - continue - # Нормализуем текст для дедупликации - normalized = entry.text.strip().lower()[:100] - if normalized in seen_texts: - continue - seen_texts.add(normalized) - filtered.append(entry) - - return filtered - - def _summarize( - self, - results: list[MemoryEntry], - query: str, - ) -> str: - """Краткая сводка найденного для оркестратора.""" - parts = [f"По запросу '{query}' найдено {len(results)} записей:"] - for i, entry in enumerate(results[:5], 1): - text_preview = entry.text[:120].replace("\n", " ") - parts.append(f" {i}. [{entry.kind}] {text_preview}") - return "\n".join(parts) - - def _parse_json(self, raw: str) -> dict[str, Any]: - """Извлекает JSON из ответа модели, пропуская рассуждения перед ним.""" - try: - json_start = raw.find("{") - json_end = raw.rfind("}") + 1 - - if json_start < 0 or json_end <= 0: - return {} - - # Пробуем весь текст от первого { до последнего } - try: - data = json.loads(raw[json_start:json_end]) - if isinstance(data, dict): - return data - except json.JSONDecodeError: - pass - - # Ищем все возможные начала JSON - candidates = [] - pos = 0 - while True: - pos = raw.find("{", pos) - if pos < 0: - break - candidates.append(pos) - pos += 1 - - # Пробуем каждый candidate с конца - for start in reversed(candidates): - end = raw.rfind("}") + 1 - if end <= start: - continue - try: - data = json.loads(raw[start:end]) - if isinstance(data, dict): - return data - except json.JSONDecodeError: - continue - - return {} - except Exception as e: - with open("/tmp/recall_debug.log", "a") as f: - f.write(f"PARSE ERROR: {e}\n") - return {} - - @staticmethod - def _empty_result(reason: str) -> dict[str, Any]: - return { - "should_recall": False, - "reason": reason, - "query": "", - "results": [], - "summary": "", - } diff --git a/app/memory/store.py b/app/memory/store.py deleted file mode 100644 index dcf76ee..0000000 --- a/app/memory/store.py +++ /dev/null @@ -1,185 +0,0 @@ -from __future__ import annotations - -import json -import sqlite3 -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Sequence -from uuid import uuid4 - -from app.core.contracts import MemoryEntry - - -def utc_now() -> datetime: - return datetime.now(timezone.utc) - - -class MemoryStore: - def __init__(self, db_path: str | Path) -> None: - self._db_path = Path(db_path) - self._db_path.parent.mkdir(parents=True, exist_ok=True) - self._conn = sqlite3.connect(str(self._db_path), check_same_thread=False) - self._conn.row_factory = sqlite3.Row - self._init_tables() - - def _init_tables(self) -> None: - self._conn.executescript(""" - CREATE TABLE IF NOT EXISTS memory_items ( - id TEXT PRIMARY KEY, - text TEXT NOT NULL, - kind TEXT NOT NULL, - source TEXT NOT NULL, - weight REAL NOT NULL DEFAULT 0.5, - task_id TEXT, - session_id TEXT, - metadata_json TEXT, - created_at TEXT NOT NULL, - updated_at TEXT NOT NULL - ); - - CREATE TABLE IF NOT EXISTS memory_embeddings ( - memory_id TEXT PRIMARY KEY, - embedding BLOB NOT NULL, - embedding_model TEXT NOT NULL, - embedding_dim INTEGER NOT NULL, - created_at TEXT NOT NULL, - FOREIGN KEY (memory_id) REFERENCES memory_items(id) ON DELETE CASCADE - ); - - CREATE INDEX IF NOT EXISTS idx_memory_items_task ON memory_items(task_id); - CREATE INDEX IF NOT EXISTS idx_memory_items_session ON memory_items(session_id); - CREATE INDEX IF NOT EXISTS idx_memory_items_kind ON memory_items(kind); - CREATE INDEX IF NOT EXISTS idx_memory_embeddings_model ON memory_embeddings(embedding_model); - """) - self._conn.commit() - - def insert(self, entry: MemoryEntry, embedding: bytes) -> None: - cursor = self._conn.cursor() - cursor.execute( - """ - INSERT INTO memory_items (id, text, kind, source, weight, task_id, session_id, metadata_json, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, - ( - entry.id, - entry.text, - entry.kind, - entry.source, - entry.weight, - entry.task_id, - entry.session_id, - json.dumps(entry.metadata) if entry.metadata else None, - entry.created_at.isoformat(), - utc_now().isoformat(), - ), - ) - cursor.execute( - """ - INSERT INTO memory_embeddings (memory_id, embedding, embedding_model, embedding_dim, created_at) - VALUES (?, ?, ?, ?, ?) - """, - ( - entry.id, - embedding, - entry.embedding_model, - entry.embedding_dim, - utc_now().isoformat(), - ), - ) - self._conn.commit() - - def get(self, memory_id: str) -> MemoryEntry | None: - cursor = self._conn.cursor() - row = cursor.execute( - "SELECT * FROM memory_items WHERE id = ?", (memory_id,) - ).fetchone() - if not row: - return None - return self._row_to_entry(row) - - def get_embedding(self, memory_id: str) -> bytes | None: - cursor = self._conn.cursor() - row = cursor.execute( - "SELECT embedding FROM memory_embeddings WHERE memory_id = ?", (memory_id,) - ).fetchone() - return bytes(row["embedding"]) if row else None - - def get_all(self, limit: int = 1000) -> list[MemoryEntry]: - cursor = self._conn.cursor() - rows = cursor.execute( - "SELECT * FROM memory_items ORDER BY created_at DESC LIMIT ?", (limit,) - ).fetchall() - return [self._row_to_entry(row) for row in rows] - - def get_by_task(self, task_id: str) -> list[MemoryEntry]: - cursor = self._conn.cursor() - rows = cursor.execute( - "SELECT * FROM memory_items WHERE task_id = ? ORDER BY created_at DESC", (task_id,) - ).fetchall() - return [self._row_to_entry(row) for row in rows] - - def get_by_session(self, session_id: str, limit: int = 100) -> list[MemoryEntry]: - cursor = self._conn.cursor() - rows = cursor.execute( - "SELECT * FROM memory_items WHERE session_id = ? ORDER BY created_at DESC LIMIT ?", - (session_id, limit), - ).fetchall() - return [self._row_to_entry(row) for row in rows] - - def get_by_kind(self, kind: str, limit: int = 100) -> list[MemoryEntry]: - cursor = self._conn.cursor() - rows = cursor.execute( - "SELECT * FROM memory_items WHERE kind = ? ORDER BY created_at DESC LIMIT ?", (kind, limit) - ).fetchall() - return [self._row_to_entry(row) for row in rows] - - def delete(self, memory_id: str) -> bool: - cursor = self._conn.cursor() - cursor.execute("DELETE FROM memory_embeddings WHERE memory_id = ?", (memory_id,)) - cursor.execute("DELETE FROM memory_items WHERE id = ?", (memory_id,)) - self._conn.commit() - return cursor.rowcount > 0 - - def update_weight(self, memory_id: str, weight: float) -> bool: - cursor = self._conn.cursor() - cursor.execute( - "UPDATE memory_items SET weight = ?, updated_at = ? WHERE id = ?", - (weight, utc_now().isoformat(), memory_id), - ) - self._conn.commit() - return cursor.rowcount > 0 - - def search_text(self, query: str, limit: int = 10) -> list[MemoryEntry]: - cursor = self._conn.cursor() - rows = cursor.execute( - "SELECT * FROM memory_items WHERE text LIKE ? ORDER BY created_at DESC LIMIT ?", - (f"%{query}%", limit), - ).fetchall() - return [self._row_to_entry(row) for row in rows] - - def count(self) -> int: - cursor = self._conn.cursor() - row = cursor.execute("SELECT COUNT(*) FROM memory_items").fetchone() - return row[0] if row else 0 - - def close(self) -> None: - self._conn.close() - - def _row_to_entry(self, row: sqlite3.Row) -> MemoryEntry: - metadata = {} - if row["metadata_json"]: - import json - metadata = json.loads(row["metadata_json"]) - return MemoryEntry( - id=row["id"], - text=row["text"], - kind=row["kind"], - source=row["source"], - weight=row["weight"], - task_id=row["task_id"], - session_id=row["session_id"], - metadata=metadata, - created_at=datetime.fromisoformat(row["created_at"]), - embedding_model="", - embedding_dim=0, - ) \ No newline at end of file diff --git a/app/memory/vector_index.py b/app/memory/vector_index.py deleted file mode 100644 index fb24fcf..0000000 --- a/app/memory/vector_index.py +++ /dev/null @@ -1,149 +0,0 @@ -from __future__ import annotations - -import logging -import numpy as np -import hnswlib -from pathlib import Path -from typing import Any - -logger = logging.getLogger(__name__) - - -class VectorIndex: - def __init__( - self, - index_path: str | Path | None = None, - embedding_dim: int = 384, - max_elements: int = 10000, - ) -> None: - self._embedding_dim = embedding_dim - self._index_path = Path(index_path) if index_path else None - self._index: hnswlib.Index | None = None - self._max_elements = max_elements - self._loading = False # Prevent recursion - - self._init_index() - - def _init_index(self) -> None: - if self._loading: - return - self._loading = True - try: - if self._index_path and self._index_path.exists(): - self._load() - else: - self._index = hnswlib.Index( - space="l2", - dim=self._embedding_dim, - ) - self._index.init_index( - max_elements=self._max_elements, - ef_construction=200, - M=16, - ) - except Exception as e: - logger.warning(f"VectorIndex init failed: {e}") - self._index = hnswlib.Index( - space="l2", - dim=self._embedding_dim, - ) - self._index.init_index( - max_elements=self._max_elements, - ef_construction=100, - M=16, - ) - finally: - self._loading = False - - def insert(self, memory_id: str, embedding: np.ndarray) -> None: - if self._index is None: - self._init_index() - if self._index is None: - return - - try: - vector = self._normalize(embedding) - internal_id = self._get_internal_id(memory_id) - self._index.add_items(vector, ids=np.array([internal_id])) - except Exception as e: - logger.warning(f"VectorIndex insert failed: {e}") - - def search( - self, - query_embedding: np.ndarray, - k: int = 5, - ) -> tuple[list[str], list[float]]: - if self._index is None: - return [], [] - - try: - if self._index.get_current_count() == 0: - return [], [] - - # Set ef to at least k for proper search - self._index.set_ef(max(k * 2, 50)) - - vector = self._normalize(query_embedding) - labels, distances = self._index.knn_query(vector, k=k) - - memory_ids = [self._get_memory_id(int(label)) for label in labels[0]] - scores = [1.0 - dist for dist in distances[0]] - return memory_ids, scores - except Exception as e: - logger.warning(f"VectorIndex search failed: {e}") - return [], [] - - def delete(self, memory_id: str) -> bool: - return False - - def get_items(self, memory_ids: list[str]) -> np.ndarray: - if self._index is None: - raise RuntimeError("Index not initialized") - internal_ids = [self._get_internal_id(mid) for mid in memory_ids] - return self._index.get_items(np.array(internal_ids)) - - def save(self) -> None: - if self._index and self._index_path: - try: - self._index_path.parent.mkdir(parents=True, exist_ok=True) - self._index.save_index(str(self._index_path)) - except Exception as e: - logger.warning(f"VectorIndex save failed: {e}") - - def _load(self) -> None: - if self._loading: - return - self._loading = True - try: - if self._index_path and self._index_path.exists(): - self._index = hnswlib.Index(space="l2", dim=self._embedding_dim) - self._index.load_index( - str(self._index_path), - max_elements=self._max_elements - ) - except Exception as e: - logger.warning(f"VectorIndex load failed: {e}") - self._init_index() - finally: - self._loading = False - - def _normalize(self, vector: np.ndarray) -> np.ndarray: - vec = vector.flatten() - norm = np.linalg.norm(vec) - if norm > 0: - vec = vec / norm - return vec.reshape(1, -1) - - def _get_internal_id(self, memory_id: str) -> int: - return hash(memory_id) % (2**31) - - def _get_memory_id(self, internal_id: int) -> str: - return str(internal_id) - - @property - def embedding_dim(self) -> int: - return self._embedding_dim - - @property - def element_count(self) -> int: - return self._index.get_current_count() if self._index else 0 \ No newline at end of file diff --git a/app/memory/write_policy.py b/app/memory/write_policy.py deleted file mode 100644 index 9ac6c85..0000000 --- a/app/memory/write_policy.py +++ /dev/null @@ -1,98 +0,0 @@ -from __future__ import annotations - -from typing import Any, Literal - -from app.core.contracts import CriticScore, MemoryEntry - - -class MemoryWritePolicy: - def __init__( - self, - store_threshold: float = 0.7, - min_usefulness: float = 0.3, - max_entries_per_session: int = 50, - ) -> None: - self._store_threshold = store_threshold - self._min_usefulness = min_usefulness - self._max_entries_per_session = max_entries_per_session - - def decide( - self, - critic_score: CriticScore, - memory_type: MemoryEntry.Kind, - session_id: str | None = None, - has_duplicate: bool = False, - current_session_count: int = 0, - ) -> Literal["store", "store_with_weight", "skip", "merge"]: - if critic_score.safety < 0.5: - return "skip" - - if has_duplicate: - return "merge" - - if not critic_score.memory_store: - return "skip" - - if critic_score.usefulness < self._min_usefulness: - return "skip" - - if session_id and current_session_count >= self._max_entries_per_session: - return "skip" - - base_decision = self._evaluate_scores(critic_score, memory_type) - - if base_decision == "store" and critic_score.weight < self._store_threshold: - adjusted_weight = self._adjust_weight(critic_score, memory_type) - if adjusted_weight >= self._store_threshold: - return "store_with_weight" - return base_decision - - return base_decision - - def _evaluate_scores( - self, - critic_score: CriticScore, - memory_type: MemoryEntry.Kind, - ) -> Literal["store", "store_with_weight", "skip", "merge"]: - avg_score = (critic_score.correctness + critic_score.usefulness + critic_score.safety) / 3.0 - - if memory_type in ("fact", "plan", "summary"): - if avg_score >= 0.8: - return "store" - elif avg_score >= 0.6: - return "store_with_weight" - - if memory_type in ("tool_result", "critique"): - if avg_score >= self._store_threshold: - return "store" - elif avg_score >= 0.5: - return "store_with_weight" - - if memory_type == "user_preference": - if avg_score >= 0.5: - return "store" - - return "skip" - - def _adjust_weight( - self, - critic_score: CriticScore, - memory_type: MemoryEntry.Kind, - ) -> float: - base_weight = critic_score.weight - - type_boost = { - "fact": 0.15, - "plan": 0.1, - "summary": 0.1, - "user_preference": 0.2, - "tool_result": 0.05, - "critique": 0.05, - }.get(memory_type, 0.0) - - safety_boost = 0.0 - if critic_score.safety >= 0.9: - safety_boost = 0.1 - - adjusted = base_weight + type_boost + safety_boost - return min(adjusted, 1.0) \ No newline at end of file diff --git a/app/models/__init__.py b/app/models/__init__.py deleted file mode 100644 index 3c4e242..0000000 --- a/app/models/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -LLM_AVAILABLE = False -EMBEDDINGS_AVAILABLE = False - -try: - from app.models.adapters import create_adapter, create_llama_adapter - from app.models.orchestrator import OrchestratorAdapter - from app.models.coder import CoderAdapter - from app.models.critic import CriticAdapter - LLM_AVAILABLE = True -except ImportError: - create_adapter = None - create_llama_adapter = None - OrchestratorAdapter = None - CoderAdapter = None - CriticAdapter = None - -try: - from app.models.embeddings import EmbeddingsAdapter - EMBEDDINGS_AVAILABLE = True -except ImportError: - EmbeddingsAdapter = None - -__all__ = [ - "create_adapter", - "create_llama_adapter", - "OrchestratorAdapter", - "CoderAdapter", - "CriticAdapter", - "EmbeddingsAdapter", - "LLM_AVAILABLE", - "EMBEDDINGS_AVAILABLE", -] \ No newline at end of file diff --git a/app/models/adapters.py b/app/models/adapters.py deleted file mode 100644 index ef78f0e..0000000 --- a/app/models/adapters.py +++ /dev/null @@ -1,72 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from typing import Any, Protocol, Iterator -import os - -try: - from llama_cpp import Llama - LLAMA_AVAILABLE = True -except ImportError: - Llama = None - LLAMA_AVAILABLE = False - - -class BaseModelAdapter(Protocol): - async def generate(self, prompt: str, **kwargs: Any) -> str: ... - def stream(self, prompt: str, **kwargs: Any) -> Iterator[str]: ... - - -def create_llama_adapter( - model_path: str, - backend: str = "cpu", - n_gpu_layers: int = 0, - max_tokens: int = 2048, - temperature: float = 0.2, - base_dir: Path | None = None, -) -> "Llama": - if not LLAMA_AVAILABLE: - raise RuntimeError("llama-cpp-python not installed") - - if base_dir: - model_path = str(base_dir / model_path) - else: - model_path = str(Path.cwd() / model_path) - - return Llama( - model_path=model_path, - n_gpu_layers=n_gpu_layers, - n_ctx=4096, - n_threads=int(os.environ.get("DUCKLM_N_THREADS", max(4, min((os.cpu_count() or 4) // 2, 20)))), - n_threads_batch=-1, - max_tokens=max_tokens, - temperature=temperature, - verbose=False, - ) - - -def create_adapter( - model_type: str, - config: dict[str, Any], - base_dir: Path | None = None, -) -> "Llama": - if not LLAMA_AVAILABLE: - raise RuntimeError("llama-cpp-python not installed") - - model_path = config.get("path", "") - backend = config.get("backend", "cpu") - n_gpu_layers = config.get("n_gpu_layers", 0) - max_tokens = config.get("max_tokens", 2048) - temperature = config.get("temperature", 0.2) - - if backend == "vulkan" and n_gpu_layers != 0: - n_gpu_layers = -1 - - return create_llama_adapter( - model_path=model_path, - backend=backend, - n_gpu_layers=n_gpu_layers, - max_tokens=max_tokens, - temperature=temperature, - base_dir=base_dir, - ) diff --git a/app/models/async_adapters.py b/app/models/async_adapters.py deleted file mode 100644 index ae23d55..0000000 --- a/app/models/async_adapters.py +++ /dev/null @@ -1,58 +0,0 @@ -from __future__ import annotations - -import asyncio -from typing import Any, AsyncIterator - -from app.models.orchestrator import OrchestratorAdapter as SyncOrchestrator - - -class AsyncOrchestratorAdapter: - """Async wrapper for orchestrator - runs in executor to avoid blocking event loop.""" - - def __init__(self, sync_adapter: SyncOrchestrator) -> None: - self._sync = sync_adapter - - async def generate(self, prompt: str, max_tokens: int | None = None) -> str: - loop = asyncio.get_event_loop() - return await loop.run_in_executor( - None, - lambda: self._sync.generate(prompt, max_tokens) - ) - - async def stream(self, prompt: str, max_tokens: int | None = None) -> AsyncIterator[str]: - loop = asyncio.get_event_loop() - - async def gen(): - return list(self._sync.stream(prompt, max_tokens)) - - result = await loop.run_in_executor(None, gen) - for chunk in result: - yield chunk - - -class AsyncCoderAdapter: - """Async wrapper for coder.""" - - def __init__(self, sync_adapter) -> None: - self._sync = sync_adapter - - async def generate(self, prompt: str, max_tokens: int | None = None) -> str: - loop = asyncio.get_event_loop() - return await loop.run_in_executor( - None, - lambda: self._sync.generate(prompt, max_tokens) - ) - - -class AsyncCriticAdapter: - """Async wrapper for critic.""" - - def __init__(self, sync_adapter) -> None: - self._sync = sync_adapter - - async def generate(self, prompt: str, max_tokens: int | None = None) -> str: - loop = asyncio.get_event_loop() - return await loop.run_in_executor( - None, - lambda: self._sync.generate(prompt, max_tokens) - ) \ No newline at end of file diff --git a/app/models/coder.py b/app/models/coder.py deleted file mode 100644 index 17af40c..0000000 --- a/app/models/coder.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import annotations - -from threading import RLock -from typing import Any, Iterator -from llama_cpp import Llama - - -class CoderAdapter: - def __init__(self, llm: Llama, system_prompt: str | None = None, lock: RLock | None = None) -> None: - self._llm = llm - self._lock = lock or RLock() - self._system_prompt = system_prompt or ( - "You are an expert code generation model." - ) - self._temperature = 0.2 - - def generate(self, prompt: str, max_tokens: int | None = None) -> str: - messages = [ - {"role": "system", "content": self._system_prompt}, - {"role": "user", "content": prompt}, - ] - with self._lock: - output = self._llm.create_chat_completion( - messages=messages, - max_tokens=max_tokens or 1024, - temperature=self._temperature, - ) - return output["choices"][0]["message"]["content"] - - def stream(self, prompt: str, max_tokens: int | None = None) -> Iterator[str]: - messages = [ - {"role": "system", "content": self._system_prompt}, - {"role": "user", "content": prompt}, - ] - with self._lock: - for chunk in self._llm.create_chat_completion( - messages=messages, - max_tokens=max_tokens or 1024, - temperature=self._temperature, - stream=True, - ): - content = chunk["choices"][0].get("delta", {}).get("content") - if content: - yield content diff --git a/app/models/critic.py b/app/models/critic.py deleted file mode 100644 index 94ff83c..0000000 --- a/app/models/critic.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import annotations - -from threading import RLock -from typing import Any, Iterator -from llama_cpp import Llama - - -class CriticAdapter: - def __init__(self, llm: Llama, system_prompt: str | None = None, lock: RLock | None = None) -> None: - self._llm = llm - self._lock = lock or RLock() - self._system_prompt = system_prompt or ( - "You are a critic model. Evaluate tool results and respond with JSON." - ) - self._temperature = 0.1 - - def generate(self, prompt: str, max_tokens: int | None = None) -> str: - messages = [ - {"role": "system", "content": self._system_prompt}, - {"role": "user", "content": prompt}, - ] - with self._lock: - output = self._llm.create_chat_completion( - messages=messages, - max_tokens=max_tokens or 512, - temperature=self._temperature, - ) - return output["choices"][0]["message"]["content"] - - def stream(self, prompt: str, max_tokens: int | None = None) -> Iterator[str]: - messages = [ - {"role": "system", "content": self._system_prompt}, - {"role": "user", "content": prompt}, - ] - with self._lock: - for chunk in self._llm.create_chat_completion( - messages=messages, - max_tokens=max_tokens or 512, - temperature=self._temperature, - stream=True, - ): - content = chunk["choices"][0].get("delta", {}).get("content") - if content: - yield content diff --git a/app/models/embeddings.py b/app/models/embeddings.py deleted file mode 100644 index ea3958d..0000000 --- a/app/models/embeddings.py +++ /dev/null @@ -1,37 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from typing import Any - -import numpy as np -from sentence_transformers import SentenceTransformer - - -class EmbeddingsAdapter: - def __init__( - self, - model_path: str | Path | None = None, - model_name: str = "sentence-transformers/all-MiniLM-L6-v2", - embedding_dim: int = 384, - ) -> None: - self._embedding_dim = embedding_dim - if model_path and Path(model_path).exists(): - self._model = SentenceTransformer(str(model_path)) - else: - self._model = SentenceTransformer(model_name) - - def encode(self, texts: str | list[str]) -> np.ndarray: - is_single = isinstance(texts, str) - if is_single: - texts = [texts] - embeddings = self._model.encode(texts, convert_to_numpy=True) - if is_single: - return embeddings[0] - return embeddings - - def encode_batch(self, texts: list[str], batch_size: int = 32) -> np.ndarray: - return self._model.encode(texts, batch_size=batch_size, convert_to_numpy=True) - - @property - def embedding_dim(self) -> int: - return self._embedding_dim \ No newline at end of file diff --git a/app/models/orchestrator.py b/app/models/orchestrator.py deleted file mode 100644 index 0a7482d..0000000 --- a/app/models/orchestrator.py +++ /dev/null @@ -1,45 +0,0 @@ -from __future__ import annotations - -from threading import RLock -from typing import Any, Iterator -from llama_cpp import Llama - - -class OrchestratorAdapter: - def __init__(self, llm: Llama, system_prompt: str | None = None, lock: RLock | None = None) -> None: - self._llm = llm - self._lock = lock or RLock() - self._system_prompt = system_prompt or ( - "You are an expert orchestrator for a local AI agent system. " - "Your role is to analyze the user's task, decide whether planning is needed." - ) - self._temperature = 0.2 - - def generate(self, prompt: str, max_tokens: int | None = None) -> str: - messages = [ - {"role": "system", "content": self._system_prompt}, - {"role": "user", "content": prompt}, - ] - with self._lock: - output = self._llm.create_chat_completion( - messages=messages, - max_tokens=max_tokens or 512, - temperature=self._temperature, - ) - return output["choices"][0]["message"]["content"] - - def stream(self, prompt: str, max_tokens: int | None = None) -> Iterator[str]: - messages = [ - {"role": "system", "content": self._system_prompt}, - {"role": "user", "content": prompt}, - ] - with self._lock: - for chunk in self._llm.create_chat_completion( - messages=messages, - max_tokens=max_tokens or 512, - temperature=self._temperature, - stream=True, - ): - content = chunk["choices"][0].get("delta", {}).get("content") - if content: - yield content diff --git a/app/permissions/__init__.py b/app/permissions/__init__.py deleted file mode 100644 index ad3e429..0000000 --- a/app/permissions/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -"""Permission and approval handling.""" - diff --git a/app/permissions/approval_store.py b/app/permissions/approval_store.py deleted file mode 100644 index 5b9ea42..0000000 --- a/app/permissions/approval_store.py +++ /dev/null @@ -1,67 +0,0 @@ -from __future__ import annotations - -import sqlite3 -from pathlib import Path - -from app.core.contracts import PermissionDecision - - -class SQLiteApprovalStore: - """Stores persistent user approval decisions.""" - - def __init__(self, db_path: str | Path) -> None: - self._db_path = Path(db_path) - self._db_path.parent.mkdir(parents=True, exist_ok=True) - self._initialize() - - def save(self, decision: PermissionDecision) -> PermissionDecision: - with sqlite3.connect(self._db_path) as conn: - conn.execute( - """ - INSERT OR REPLACE INTO approvals (action_type, pattern, decision, created_at) - VALUES (?, ?, ?, ?) - """, - ( - decision.action_type, - decision.pattern, - decision.decision, - decision.created_at.isoformat(), - ), - ) - conn.commit() - return decision - - def load(self, action_type: str, pattern: str) -> PermissionDecision | None: - with sqlite3.connect(self._db_path) as conn: - row = conn.execute( - """ - SELECT action_type, pattern, decision, created_at - FROM approvals - WHERE action_type = ? AND pattern = ? - """, - (action_type, pattern), - ).fetchone() - if not row: - return None - return PermissionDecision( - action_type=row[0], - pattern=row[1], - decision=row[2], - created_at=row[3], - ) - - def _initialize(self) -> None: - with sqlite3.connect(self._db_path) as conn: - conn.execute( - """ - CREATE TABLE IF NOT EXISTS approvals ( - action_type TEXT NOT NULL, - pattern TEXT NOT NULL, - decision TEXT NOT NULL, - created_at TEXT NOT NULL, - PRIMARY KEY (action_type, pattern) - ) - """ - ) - conn.commit() - diff --git a/app/runtime/__init__.py b/app/runtime/__init__.py deleted file mode 100644 index b2327dd..0000000 --- a/app/runtime/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -"""Runtime loop and execution coordination.""" - diff --git a/app/runtime/async_runtime_loop.py b/app/runtime/async_runtime_loop.py deleted file mode 100644 index 77196e8..0000000 --- a/app/runtime/async_runtime_loop.py +++ /dev/null @@ -1,148 +0,0 @@ -from __future__ import annotations - -import asyncio -from app.core.context_builder import ContextBuilder -from app.core.contracts import ExecutionDirective, PermissionDecision, PermissionRequest, RuntimeEvent, TaskCheckpoint, UserTask -from app.core.execution_engine import ExecutionEngine -from app.core.async_router import AsyncRouter -from app.events.event_bus import EventBus -from app.events.event_types import CHECKPOINT_SAVED, CONTEXT_BUILT, TASK_AWAITING_PERMISSION, TASK_COMPLETED, TASK_FAILED, TASK_RECEIVED -from app.core.permission_service import PermissionService -from app.state.checkpoint_store import SQLiteCheckpointStore -from app.state.task_state_store import SQLiteTaskStateStore - - -class AsyncRuntimeLoop: - """Async runtime loop using LLM orchestrator.""" - - def __init__( - self, - event_bus: EventBus, - task_state_store: SQLiteTaskStateStore, - checkpoint_store: SQLiteCheckpointStore, - context_builder: ContextBuilder, - router: AsyncRouter, - execution_engine: ExecutionEngine, - permission_service: PermissionService, - memory_interface=None, - ) -> None: - self._event_bus = event_bus - self._task_state_store = task_state_store - self._checkpoint_store = checkpoint_store - self._context_builder = context_builder - self._router = router - self._execution_engine = execution_engine - self._permission_service = permission_service - self._memory_interface = memory_interface - - async def run_task(self, task: UserTask) -> dict[str, object]: - state = self._task_state_store.create_task( - task.task_id, - { - "status": "received", - "session_id": task.session_id, - "plan": None, - "task_input": task.input, - "task_context": task.context, - }, - ) - self._publish(task, TASK_RECEIVED, {"status": "received"}) - - checkpoint = TaskCheckpoint(task_id=task.task_id, status="received") - self._checkpoint_store.save(checkpoint) - self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json")) - - context = self._context_builder.build(task=task, checkpoint=checkpoint) - self._publish(task, CONTEXT_BUILT, {"keys": sorted(context.keys())}) - - directive = await self._router.decide(state=state, context=context, task_id=task.task_id, session_id=task.session_id) - - execution_result = await asyncio.to_thread( - self._execution_engine.execute, - task=task, - directive=directive, - ) - - state_patch = {"status": execution_result["status"], "last_directive": directive.model_dump(mode="json")} - - if execution_result["status"] == "awaiting_permission": - state_patch["pending_permission_request"] = execution_result["result"].get("permission_request") - - self._task_state_store.update_task(task.task_id, state_patch) - - status = execution_result["status"] - - if status == "completed": - self._publish(task, TASK_COMPLETED, {"directive": directive.model_dump(mode="json"), "execution_result": execution_result["result"]}) - elif status == "failed": - self._publish(task, TASK_FAILED, {"error": execution_result.get("result", {}).get("error")}) - - checkpoint.status = status - self._checkpoint_store.save(checkpoint) - self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json")) - - # Save task and result to memory for session context - self._save_to_memory(task, execution_result, status) - - return { - "task_id": task.task_id, - "status": status, - "directive": directive.model_dump(mode="json"), - "result": execution_result.get("result"), - "events": list(self._event_bus.get_task_events(task.task_id)), - } - - def _publish(self, task: UserTask, event_type: str, payload: dict) -> None: - if not self._event_bus: - return - event = RuntimeEvent( - task_id=task.task_id, - session_id=task.session_id, - sequence=self._event_bus.next_sequence(task.task_id), - type=event_type, - payload=payload, - ) - self._event_bus.publish(event) - - def _save_to_memory(self, task: UserTask, execution_result: dict, status: str) -> None: - """Save task input and result to memory for session context.""" - if not self._memory_interface: - return - - try: - # Save task input as summary - self._memory_interface.insert( - text=f"User request: {task.input}", - kind="summary", - source="user", - task_id=task.task_id, - session_id=task.session_id, - weight=0.8, - metadata={"status": status}, - ) - - # Save execution result - result_text = "" - if status == "completed": - step_results = execution_result.get("result", {}).get("step_results", []) - if step_results: - for step in step_results: - tool_result = step.get("result", {}).get("result", {}) - if tool_result.get("output"): - result_text += f" | {step.get('step_id')}: {tool_result.get('output')[:200]}" - elif status == "failed": - result_text = f" | Error: {execution_result.get('result', {}).get('error', 'Unknown')}" - - if result_text: - self._memory_interface.insert( - text=f"Result: {status}{result_text}", - kind="tool_result", - source="system", - task_id=task.task_id, - session_id=task.session_id, - weight=0.7, - metadata={"status": status}, - ) - except Exception as e: - import logging - logging.getLogger(__name__).warning(f"Failed to save to memory: {e}") \ No newline at end of file diff --git a/app/runtime/runtime_controller.py b/app/runtime/runtime_controller.py deleted file mode 100644 index 47eb065..0000000 --- a/app/runtime/runtime_controller.py +++ /dev/null @@ -1,643 +0,0 @@ -from __future__ import annotations - -import json -from concurrent.futures import Future, ThreadPoolExecutor -from threading import RLock -from pathlib import Path - -from app.core.config import AppConfig, load_app_config -from app.core.context_builder import ContextBuilder -from app.core.command_analyzer import CommandAnalyzer -from app.core.contracts import UserTask -from app.core.execution_engine import ExecutionEngine -from app.core.execution_scheduler import ExecutionScheduler -from app.core.async_router import AsyncRouter -from app.events.event_bus import EventBus -from app.events.event_store import SQLiteEventStore -from app.memory import MemoryInterface, MemoryStore, VectorIndex -from app.memory.recall import MemoryRecallService -from app.memory.write_policy import MemoryWritePolicy -from app.models import ( - CoderAdapter, - CriticAdapter, - EmbeddingsAdapter, - OrchestratorAdapter, - create_adapter, -) -from app.models.async_adapters import AsyncOrchestratorAdapter, AsyncCriticAdapter, AsyncCoderAdapter -from app.permissions.approval_store import SQLiteApprovalStore -from app.core.permission_service import PermissionService -from app.runtime.runtime_loop import RuntimeLoop -from app.state.checkpoint_store import SQLiteCheckpointStore -from app.state.task_state_store import SQLiteTaskStateStore -from app.tools.file_read import FileReadTool -from app.tools.file_write import FileWriteTool -from app.tools.registry import ToolRegistry -from app.tools.sandbox import ToolSandbox -from app.tools.shell_exec import ShellExecTool -from app.tools.memory_tools import MemoryInsertTool, MemorySearchTool, MemoryListTool - - -class RuntimeController: - """Composition root for the ducklm runtime.""" - - def __init__(self, base_dir: str | Path | None = None) -> None: - self.base_dir = Path(base_dir or Path(__file__).resolve().parents[2]) - self.config: AppConfig = load_app_config(self.base_dir / "config") - - self.event_bus = EventBus( - SQLiteEventStore(self.base_dir / "data" / "events" / "events.sqlite3") - ) - self.task_state_store = SQLiteTaskStateStore( - self.base_dir / "data" / "state" / "task_state.sqlite3" - ) - self.checkpoint_store = SQLiteCheckpointStore( - self.base_dir / "data" / "state" / "checkpoints.sqlite3" - ) - self.approval_store = SQLiteApprovalStore( - self.base_dir / "data" / "permissions" / "approvals.sqlite3" - ) - - self._thinker: OrchestratorAdapter | None = None - self._json_compiler: OrchestratorAdapter | None = None - self._orchestrator: OrchestratorAdapter | None = None - self._coder: CoderAdapter | None = None - self._critic: CriticAdapter | None = None - self._sys_util: OrchestratorAdapter | None = None - self._model_cache: dict[tuple[object, ...], tuple[object, RLock]] = {} - self._memory_interface: MemoryInterface | None = None - self._memory_policy: MemoryWritePolicy | None = None - self._background_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ducklm-task") - self._background_tasks: dict[str, Future[dict[str, object]]] = {} - self.tool_registry = None - self.tool_sandbox = None - - self._init_models() - self._init_memory() - - runtime_config = self.config.runtime - - self.tool_sandbox = ToolSandbox( - allowed_root=self.base_dir, - timeout_ms=runtime_config.step_timeout_ms, - command_timeout_ms=runtime_config.shell_command_timeout_ms, - idle_timeout_ms=runtime_config.shell_idle_timeout_ms, - ) - - self.tool_registry = self._create_tool_registry() - - context_config = { - "max_context_tokens": runtime_config.max_context_tokens, - "context_budgets": runtime_config.context_budgets, - "reserve_for_generation_pct": runtime_config.reserve_for_generation_pct, - } - - self.context_builder = ContextBuilder( - memory_interface=self._memory_interface, - tool_registry=self.tool_registry, - config=context_config, - ) - - self._prompts = self._load_prompts() - # ensure sys_util prompt is present in prompts dict for router - # ensure sys_util prompt is available to router (prompts.json may have "sys_util" key) - if "sys_util" not in self._prompts and "prompts" in self.config: - self._prompts["sys_util"] = self.config.get("sys_util") - - self.context_builder = ContextBuilder( - memory_interface=self._memory_interface, - tool_registry=self.tool_registry, - config=context_config, - ) - - self.router = AsyncRouter( - thinker=None, - json_compiler=None, - intent_parser=None, - prompts=self._prompts, - event_bus=self.event_bus, - tool_registry=self.tool_registry, - retry_limit=runtime_config.orchestrator_retry_limit, - debug=runtime_config.debug if hasattr(runtime_config, 'debug') else False, - log_length=runtime_config.debug_orchestrator_log_length if hasattr(runtime_config, 'debug_orchestrator_log_length') else 500, - json_fix_retry_limit=runtime_config.json_fix_retry_limit if hasattr(runtime_config, 'json_fix_retry_limit') else 2, - json_fix_use_sys_util=runtime_config.json_fix_use_sys_util if hasattr(runtime_config, "json_fix_use_sys_util") else True, - intent_classifier=runtime_config.intent_classifier if hasattr(runtime_config, "intent_classifier") else "thinker", - ) - - self.permission_service = PermissionService( - config=self._load_permissions_config(), - ) - self.command_analyzer = CommandAnalyzer(self.permission_service) - - self.execution_engine = ExecutionEngine( - event_bus=self.event_bus, - tool_registry=self.tool_registry, - permission_service=self.permission_service, - scheduler=ExecutionScheduler( - retry_limit=runtime_config.planner_retry_limit - ), - critic=self._critic, - memory_policy=self._memory_policy, - memory_interface=self._memory_interface, - prompts=self._prompts, - recovery_limit=runtime_config.tool_retry_limit, - critic_retry_limit=runtime_config.critic_retry_limit, - command_analyzer=self.command_analyzer, - ) - - self.runtime_loop = RuntimeLoop( - event_bus=self.event_bus, - task_state_store=self.task_state_store, - checkpoint_store=self.checkpoint_store, - context_builder=self.context_builder, - router=self.router, - execution_engine=self.execution_engine, - permission_service=self.permission_service, - memory_interface=self._memory_interface, - ) - - def _load_prompts(self) -> dict[str, str]: - prompts_dir = self.base_dir / "config" / "prompts" - prompts = {} - - if prompts_dir.is_dir(): - for md_file in prompts_dir.glob("*.md"): - role = md_file.stem - prompts[role] = md_file.read_text(encoding="utf-8") - - if prompts: - return prompts - - prompts_file = self.base_dir / "config" / "prompts.json" - if prompts_file.exists(): - with open(prompts_file) as f: - return json.load(f) - return {} - - def _load_permissions_config(self) -> dict: - permissions_file = self.base_dir / "config" / "permissions.json" - if not permissions_file.exists(): - return {} - with permissions_file.open("r", encoding="utf-8") as handle: - return json.load(handle) - - def _init_models(self) -> None: - try: - memory_config = self.config.runtime.memory_thresholds or {} - if memory_config: - self._memory_policy = MemoryWritePolicy( - store_threshold=memory_config.get("default_store_weight", 0.8), - ) - print("Models policy ready") - except Exception as e: - print(f"Models init failed: {e}") - - def load_models_at_startup(self) -> None: - """Load all LLM models synchronously. Called from startup hook in executor.""" - import os - os.chdir(str(self.base_dir / "models")) - - try: - print("Loading thinker model...") - thinker_config = self.config.models.thinker or {} - if thinker_config.get("path"): - llm, lock = self._get_or_create_llm("thinker", thinker_config) - self._thinker = OrchestratorAdapter(llm, system_prompt=self._prompts.get("thinker"), lock=lock) - print(f"Thinker loaded: {self._thinker} (model: {thinker_config.get('path')})") - - print("Loading json_compiler model...") - compiler_config = self.config.models.json_compiler or {} - if compiler_config.get("path"): - llm, lock = self._get_or_create_llm("json_compiler", compiler_config) - self._json_compiler = OrchestratorAdapter(llm, system_prompt=self._prompts.get("json_compiler"), lock=lock) - print(f"JSON Compiler loaded: {self._json_compiler} (model: {compiler_config.get('path')})") - - print("Loading coder model...") - coder_config = self.config.models.coder or {} - if coder_config.get("path"): - llm, lock = self._get_or_create_llm("coder", coder_config) - self._coder = CoderAdapter(llm, system_prompt=self._prompts.get("coder"), lock=lock) - print(f"Coder loaded: {self._coder} (model: {coder_config.get('path')})") - - print("Loading critic model...") - critic_config = self.config.models.critic or {} - if critic_config.get("path"): - llm, lock = self._get_or_create_llm("critic", critic_config) - self._critic = CriticAdapter(llm, system_prompt=self._prompts.get("critic"), lock=lock) - print(f"Critic loaded: {self._critic} (model: {critic_config.get('path')})") - - print("Loading sys_util model...") - sys_util_config = self.config.models.sys_util or {} - if sys_util_config.get("path"): - llm, lock = self._get_or_create_llm("sys_util", sys_util_config) - self._sys_util = OrchestratorAdapter(llm, system_prompt=self._prompts.get("sys_util"), lock=lock) - print(f"Sys_util loaded: {self._sys_util} (model: {sys_util_config.get('path')})") - - print("All models loaded successfully") - - async_thinker = AsyncOrchestratorAdapter(self._thinker) if self._thinker else None - async_compiler = AsyncOrchestratorAdapter(self._json_compiler) if self._json_compiler else None - async_coder = AsyncCoderAdapter(self._coder) if self._coder else None - async_critic = AsyncCriticAdapter(self._critic) if self._critic else None - async_sys_util = AsyncOrchestratorAdapter(self._sys_util) if self._sys_util else None - - self.router.set_thinker(async_thinker) - self.router.set_json_compiler(async_compiler) - self.router.set_sys_util(async_sys_util) - self.router.set_tool_registry(self.tool_registry) - if async_critic: - self.execution_engine.set_critic(async_critic) - if async_coder: - self.execution_engine.set_coder(async_coder) - - # Create MemoryRecallService using the configured model (default: sys_util) - # Reuses already-loaded async adapter — no duplicate model loading - recall_model_name = self.config.runtime.recall_model - recall_async_model = { - "sys_util": async_sys_util, - "thinker": async_thinker, - "json_compiler": async_compiler, - "critic": async_critic, - "coder": async_coder, - }.get(recall_model_name, async_sys_util) - - self._recall_service = MemoryRecallService( - memory_interface=self._memory_interface, - recall_model=recall_async_model, - ) - self.runtime_loop.set_recall_service(self._recall_service) - print(f"MemoryRecallService initialized with model: {recall_model_name}") - - # Set memory policy in runtime loop - self.runtime_loop.set_memory_policy(self._memory_policy) - print(f"MemoryWritePolicy set: {self._memory_policy is not None}") - - except Exception as e: - print(f"Failed to load models at startup: {e}") - raise RuntimeError(f"Model loading failed: {e}") from e - - def _model_cache_key(self, model_config: dict) -> tuple[object, ...]: - path = str((self.base_dir / "models" / model_config.get("path", "")).resolve()) - return ( - path, - model_config.get("backend", "cpu"), - model_config.get("n_gpu_layers", 0), - model_config.get("n_ctx", 4096), - ) - - def _get_or_create_llm(self, model_type: str, model_config: dict): - key = self._model_cache_key(model_config) - cached = self._model_cache.get(key) - if cached: - print(f"Reusing model instance: {model_config.get('path')} for {model_type}") - return cached - - llm = create_adapter(model_type, model_config, self.base_dir / "models") - lock = RLock() - cached = (llm, lock) - self._model_cache[key] = cached - return cached - - def _init_memory(self) -> None: - try: - emb_config = self.config.models.embeddings or {} - model_path = self.base_dir / emb_config.get("path", "models/all-MiniLM-L6-v2") - if not model_path.exists() and not Path(emb_config.get("path", "")).is_absolute(): - model_path = self.base_dir / "models" / emb_config.get("path", "all-MiniLM-L6-v2") - if not model_path.exists(): - print(f"Memory init skipped: embeddings model not found at {model_path}") - self._memory_interface = None - return - embeddings = EmbeddingsAdapter( - model_path=model_path, - embedding_dim=emb_config.get("embedding_dim", 384), - ) - - store = MemoryStore( - self.base_dir / "data" / "memory" / "memory.sqlite3" - ) - vector_index = VectorIndex( - index_path=self.base_dir / "data" / "memory" / "index.bin", - embedding_dim=embeddings.embedding_dim, - ) - - self._memory_interface = MemoryInterface(store, vector_index, embeddings) - - except Exception as e: - print(f"Memory init failed: {e}") - self._memory_interface = None - - def _create_tool_registry(self) -> ToolRegistry: - from app.tools.registry import ToolRegistry - from app.tools.plugins.shell_exec import Tool as ShellExecTool - from app.tools.plugins.file_read import Tool as FileReadTool - from app.tools.plugins.file_write import Tool as FileWriteTool - from app.tools.plugins.memory_tools import Tool as MemoryTool - from app.tools.discover import ToolDiscovery - - registry = ToolRegistry() - - tool_init_map = { - "shell_exec": lambda m: ShellExecTool(self.tool_sandbox), - "file_read": lambda m: FileReadTool(self.tool_sandbox), - "file_write": lambda m: FileWriteTool(self.tool_sandbox), - "memory": lambda m: MemoryTool(self._memory_interface), - } - - discovery = ToolDiscovery() - discovered = discovery.discover() - - for name, data in discovered.items(): - init_fn = tool_init_map.get(name) - if init_fn: - tool = init_fn(data.get("manifest", {})) - registry.register(tool) - registry._schemas[name] = { - "description": data.get("manifest", {}).get("description", ""), - "args_schema": data.get("manifest", {}).get("args_schema", {}), - "requires_permission": data.get("manifest", {}).get("requires_permission", False), - } - print(f"Registered tool: {name}") - else: - print(f"No init mapping for tool: {name} - skipping") - - return registry - - @property - def orchestrator(self) -> OrchestratorAdapter | None: - return self._orchestrator - - @property - def coder(self) -> CoderAdapter | None: - return self._coder - - @property - def critic(self) -> CriticAdapter | None: - return self._critic - - @property - def memory_interface(self) -> MemoryInterface | None: - return self._memory_interface - - def _ensure_orchestrator(self) -> OrchestratorAdapter | None: - if self._orchestrator is not None: - return self._orchestrator - try: - orch_config = self.config.models.orchestrator or {} - if orch_config.get("path"): - llm, lock = self._get_or_create_llm("orchestrator", orch_config) - self._orchestrator = OrchestratorAdapter(llm, lock=lock) - except Exception as e: - print(f"Orchestrator load failed: {e}") - return self._orchestrator - - def _ensure_critic(self) -> CriticAdapter | None: - if self._critic is not None: - return self._critic - try: - critic_config = self.config.models.critic or {} - if critic_config.get("path"): - llm, lock = self._get_or_create_llm("critic", critic_config) - self._critic = CriticAdapter(llm, lock=lock) - except Exception as e: - print(f"Critic load failed: {e}") - return self._critic - - def handle_task(self, task: UserTask) -> dict[str, object]: - return self.runtime_loop.run_task(task) - - def submit_task(self, task: UserTask) -> dict[str, object]: - self._background_tasks[task.task_id] = self._background_executor.submit( - self.handle_task, - task, - ) - return {"task_id": task.task_id, "status": "accepted"} - - def resolve_permission(self, task_id: str, decision: str) -> dict[str, object]: - return self.runtime_loop.resolve_permission( - task_id=task_id, decision=decision - ) - - def submit_permission_resolution(self, task_id: str, decision: str) -> dict[str, object]: - if not self.task_state_store.get_task(task_id): - return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}} - self._background_tasks[task_id] = self._background_executor.submit( - self.resolve_permission, - task_id, - decision, - ) - return {"task_id": task_id, "status": "accepted"} - - def resolve_secret(self, task_id: str, secret: str) -> dict[str, object]: - return self.runtime_loop.resolve_secret( - task_id=task_id, secret=secret - ) - - def submit_secret_resolution(self, task_id: str, secret: str) -> dict[str, object]: - if not self.task_state_store.get_task(task_id): - return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}} - self._background_tasks[task_id] = self._background_executor.submit( - self.resolve_secret, - task_id, - secret, - ) - return {"task_id": task_id, "status": "accepted"} - - def resolve_password(self, task_id: str, password: str) -> dict[str, object]: - return self.runtime_loop.resolve_password( - task_id=task_id, password=password - ) - - def resolve_review(self, task_id: str, decision: str, correction: str | None = None) -> dict[str, object]: - return self.runtime_loop.resolve_review( - task_id=task_id, - decision=decision, - correction=correction, - ) - - def submit_review_resolution(self, task_id: str, decision: str, correction: str | None = None) -> dict[str, object]: - if not self.task_state_store.get_task(task_id): - return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}} - self._background_tasks[task_id] = self._background_executor.submit( - self.resolve_review, - task_id, - decision, - correction, - ) - return {"task_id": task_id, "status": "accepted"} - - def submit_password_resolution(self, task_id: str, password: str) -> dict[str, object]: - if not self.task_state_store.get_task(task_id): - return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}} - self._background_tasks[task_id] = self._background_executor.submit( - self.resolve_password, - task_id, - password, - ) - return {"task_id": task_id, "status": "accepted"} - - def handle_critic_feedback( - self, - feedback: str, - task_id: str | None = None, - session_id: str | None = None, - feedback_type: str | None = None, - severity: str | None = None, - correction: str | None = None, - remember: bool = True, - retry: bool = False, - assistant_answer: str | None = None, - correctness_override: float | None = None, - usefulness_override: float | None = None, - safety_override: float | None = None, - ) -> dict[str, object]: - target_task_id = task_id - target_session_id = session_id - - if not target_session_id and not target_task_id: - return { - "status": "error", - "message": "Either task_id or session_id must be provided", - } - - state = self.task_state_store.get_task(target_task_id) if target_task_id else None - if not target_session_id and state: - target_session_id = state.get("session_id") - - if not target_task_id and target_session_id: - recent_tasks = self.task_state_store.get_session_tasks(target_session_id, limit=1) - if recent_tasks: - target_task_id = recent_tasks[0]["task_id"] - - min_weight = 0.3 - max_weight = 0.95 - user_weight = 0.9 - - final_weight = max(min_weight, min(max_weight, user_weight)) - - task_input = state.get("task_input") if state else None - last_directive = state.get("last_directive") if state else None - feedback_type = feedback_type or "other" - severity = severity or "major" - - lesson = self._build_feedback_lesson( - feedback_type=feedback_type, - severity=severity, - feedback=feedback, - correction=correction, - task_input=task_input, - ) - - metadata = { - "feedback_text": feedback, - "feedback_type": feedback_type, - "severity": severity, - "correction": correction, - "assistant_answer": assistant_answer, - "task_input": task_input, - "last_directive": last_directive, - "overrides": { - "correctness": correctness_override, - "usefulness": usefulness_override, - "safety": safety_override, - }, - "source": "user", - } - - feedback_text = lesson - if correctness_override is not None: - feedback_text += f" | Correctness corrected to: {correctness_override}" - if usefulness_override is not None: - feedback_text += f" | Usefulness corrected to: {usefulness_override}" - if safety_override is not None: - feedback_text += f" | Safety corrected to: {safety_override}" - - retry_result = None - stored = False - store_error = None - try: - if remember and self._memory_interface: - self._memory_interface.insert( - text=feedback_text, - kind="critique", - source="user", - task_id=target_task_id, - session_id=target_session_id, - weight=final_weight, - metadata=metadata, - ) - stored = True - elif remember and not self._memory_interface: - store_error = "Memory not available" - except Exception as e: - store_error = str(e) - - if retry and task_input: - retry_input = self._build_retry_input( - task_input=task_input, - feedback=feedback, - feedback_type=feedback_type, - correction=correction, - ) - retry_task = UserTask( - session_id=target_session_id or "feedback-retry", - input=retry_input, - context={ - "feedback_retry": True, - "original_task_id": target_task_id, - "feedback_type": feedback_type, - "severity": severity, - "correction": correction, - }, - ) - retry_result = self.handle_task(retry_task) - - status = "ok" if stored or not remember else "error" - return { - "status": status, - "message": "Feedback saved" if stored else (store_error or "Feedback accepted"), - "stored": stored, - "task_id": target_task_id, - "session_id": target_session_id, - "lesson": lesson, - "retry_result": retry_result, - } - - def _build_feedback_lesson( - self, - feedback_type: str, - severity: str, - feedback: str, - correction: str | None, - task_input: str | None, - ) -> str: - parts = [ - "User critique lesson.", - f"Error type: {feedback_type}.", - f"Severity: {severity}.", - ] - if task_input: - parts.append(f"Original task: {task_input}") - if feedback: - parts.append(f"What was wrong: {feedback}") - if correction: - parts.append(f"Preferred correction: {correction}") - return " | ".join(parts) - - def _build_retry_input( - self, - task_input: str, - feedback: str, - feedback_type: str, - correction: str | None, - ) -> str: - retry_input = ( - f"Повтори задачу с учетом обратной связи.\n" - f"Исходная задача: {task_input}\n" - f"Тип ошибки: {feedback_type}\n" - f"Что было неверно: {feedback}\n" - ) - if correction: - retry_input += f"Как должно быть: {correction}\n" - return retry_input diff --git a/app/runtime/runtime_loop.py b/app/runtime/runtime_loop.py deleted file mode 100644 index 29d00c7..0000000 --- a/app/runtime/runtime_loop.py +++ /dev/null @@ -1,688 +0,0 @@ -from __future__ import annotations - -import asyncio - -from app.core.context_builder import ContextBuilder -from app.core.contracts import CriticScore, ExecutionDirective, PermissionDecision, PermissionRequest, RuntimeEvent, SecretRequest, TaskCheckpoint, UserTask -from app.core.execution_engine import ExecutionEngine -from app.core.async_router import AsyncRouter -from app.events.event_bus import EventBus -from app.events.event_types import CHECKPOINT_SAVED, CONTEXT_BUILT, MEMORY_RECALL_USED, MEMORY_WRITE_DECIDED, REVIEW_RESOLVED, TASK_AWAITING_INPUT, TASK_AWAITING_PERMISSION, TASK_AWAITING_REVIEW, TASK_COMPLETED, TASK_FAILED, TASK_RECEIVED -from app.core.permission_service import PermissionService -from app.memory.recall import MemoryRecallService -from app.memory.write_policy import MemoryWritePolicy -from app.state.checkpoint_store import SQLiteCheckpointStore -from app.state.task_state_store import SQLiteTaskStateStore - - -def _build_response_directive(execution_result: dict) -> dict | None: - """Build a response_directive from step_results or direct output for the client.""" - result = execution_result.get("result", {}) - - # Case 1: step_results from plan execution - step_results = result.get("step_results") - if step_results: - response_parts = [] - for step in step_results: - result_data = step.get("result", {}) - tool_result = result_data.get("result", result_data) - if tool_result.get("ok") and tool_result.get("output"): - response_parts.append(str(tool_result["output"])) - if response_parts: - response_text = "\n\n".join(response_parts) - return ExecutionDirective( - type="respond", payload={"text": response_text} - ).model_dump(mode="json") - - # Case 2: direct tool output (e.g. from resolve_secret -> execute_tool) - if result.get("ok") and result.get("output"): - return ExecutionDirective( - type="respond", payload={"text": str(result["output"])} - ).model_dump(mode="json") - - return None - - -class RuntimeLoop: - """Central control loop skeleton coordinating task state and events.""" - - def __init__( - self, - event_bus: EventBus, - task_state_store: SQLiteTaskStateStore, - checkpoint_store: SQLiteCheckpointStore, - context_builder: ContextBuilder, - router: AsyncRouter, - execution_engine: ExecutionEngine, - permission_service: PermissionService, - memory_interface=None, - recall_service: MemoryRecallService | None = None, - memory_policy: MemoryWritePolicy | None = None, - ) -> None: - self._event_bus = event_bus - self._task_state_store = task_state_store - self._checkpoint_store = checkpoint_store - self._context_builder = context_builder - self._router = router - self._execution_engine = execution_engine - self._permission_service = permission_service - self._memory_interface = memory_interface - self._recall_service = recall_service - self._memory_policy = memory_policy - - def set_recall_service(self, recall_service: MemoryRecallService) -> None: - self._recall_service = recall_service - - def set_memory_policy(self, policy: MemoryWritePolicy | None) -> None: - self._memory_policy = policy - - def run_task(self, task: UserTask) -> dict[str, object]: - # Check input for hard-stop commands BEFORE processing - hard_stop_check = self._permission_service.check_shell_command( - task_id=task.task_id, - session_id=task.session_id, - command=task.input, - ) - if hard_stop_check.get("decision") == "hard_stop": - # Immediately reject hard-stop commands - self._publish(task, TASK_RECEIVED, {"status": "received"}) - checkpoint = TaskCheckpoint(task_id=task.task_id, status="received") - self._checkpoint_store.save(checkpoint) - self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json")) - - error_msg = f"⚠️ BLOCKED: {hard_stop_check.get('reason', 'Hard stop command')}" - self._publish(task, TASK_FAILED, { - "directive": {}, - "execution_result": {"error": error_msg}, - }) - return { - "task_id": task.task_id, - "status": "failed", - "directive": {}, - "result": {"error": error_msg}, - "events": [e.model_dump(mode="json") for e in self._event_bus.list_for_task(task.task_id)], - } - - state = self._task_state_store.create_task( - task.task_id, - { - "status": "received", - "session_id": task.session_id, - "plan": None, - "task_input": task.input, - "task_context": task.context, - }, - ) - self._publish(task, TASK_RECEIVED, {"status": "received"}) - - checkpoint = TaskCheckpoint(task_id=task.task_id, status="received") - self._checkpoint_store.save(checkpoint) - self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json")) - - context = self._context_builder.build(task=task, checkpoint=checkpoint) - self._publish(task, CONTEXT_BUILT, {"keys": sorted(context.keys())}) - - # Active memory recall: system decides if it needs to search memory - recall_result = asyncio.run(self._run_recall(task)) - if recall_result["should_recall"]: - context["memory_recall"] = { - "query": recall_result["query"], - "summary": recall_result["summary"], - "entries": [ - {"text": e.text, "kind": e.kind, "weight": e.weight} - for e in recall_result["results"] - ], - } - self._publish(task, MEMORY_RECALL_USED, { - "query": recall_result["query"], - "results_count": len(recall_result["results"]), - "reason": recall_result["reason"], - }) - - directive = asyncio.run( - self._router.decide(state=state, context=context, task_id=task.task_id, session_id=task.session_id) - ) - execution_result = self._execution_engine.execute(task=task, directive=directive) - state_patch = {"status": execution_result["status"], "last_directive": directive.model_dump(mode="json")} - if execution_result["status"] == "awaiting_permission": - state_patch["pending_permission_request"] = execution_result["result"]["permission_request"] - state_patch["pending_secret_request"] = None - state_patch["resolved_permission_decision"] = None - elif execution_result["status"] == "awaiting_input": - state_patch["pending_permission_request"] = None - state_patch["pending_secret_request"] = execution_result["result"]["secret_request"] - state_patch["resolved_permission_decision"] = None - elif execution_result["status"] == "awaiting_password": - state_patch["pending_permission_request"] = None - state_patch["pending_secret_request"] = None - state_patch["resolved_permission_decision"] = None - state_patch["pending_password_request"] = { - "command": execution_result["result"].get("command", ""), - "reason": "Permission denied - требуется sudo пароль", - "attempts": 0, - } - elif execution_result["status"] == "awaiting_review": - state_patch["pending_permission_request"] = None - state_patch["pending_secret_request"] = None - state_patch["resolved_permission_decision"] = None - state_patch["pending_review"] = execution_result["result"]["review"] - else: - state_patch["pending_permission_request"] = None - state_patch["pending_secret_request"] = None - state_patch["resolved_permission_decision"] = None - state_patch["pending_review"] = None - self._task_state_store.update_task(task.task_id, state_patch) - final_status = str(execution_result["status"]) - - # For awaiting states - do NOT mark task as completed, keep it in pending state - if final_status in ("awaiting_permission", "awaiting_input", "awaiting_password", "awaiting_review"): - # Task stays in pending state, don't update to completed - pass - else: - self._task_state_store.update_task(task.task_id, {"status": final_status}) - - final_checkpoint = TaskCheckpoint( - task_id=task.task_id, - status=final_status, - context_snapshot=context, - ) - self._checkpoint_store.save(final_checkpoint) - - # Generate response for user - # Case 1: step_results from plan execution - if final_status == "completed" and execution_result.get("result", {}).get("step_results"): - step_results = execution_result["result"]["step_results"] - response_parts = [] - for step in step_results: - result_data = step.get("result", {}) - tool_result = result_data.get("result", result_data) - if tool_result.get("ok") and tool_result.get("output"): - response_parts.append(tool_result["output"]) - if response_parts: - response_text = "\n\n".join(response_parts) - execution_result["response_directive"] = ExecutionDirective( - type="respond", payload={"text": response_text} - ).model_dump(mode="json") - - # Case 2: respond directive from orchestrator (direct response, no steps) - if final_status == "completed" and not execution_result.get("response_directive"): - # Use the original directive from router.decide() - if hasattr(directive, "type") and directive.type == "respond": - if directive.payload.get("text"): - execution_result["response_directive"] = directive.model_dump(mode="json") - elif isinstance(directive, dict) and directive.get("type") == "respond": - if directive.get("payload", {}).get("text"): - execution_result["response_directive"] = directive - - # Map status to terminal event type - if final_status == "completed": - terminal_event_type = TASK_COMPLETED - elif final_status == "failed": - terminal_event_type = TASK_FAILED - elif final_status == "awaiting_permission": - terminal_event_type = TASK_AWAITING_PERMISSION - elif final_status == "awaiting_input": - terminal_event_type = TASK_AWAITING_INPUT - elif final_status == "awaiting_review": - terminal_event_type = TASK_AWAITING_REVIEW - elif final_status == "awaiting_password": - terminal_event_type = TASK_AWAITING_PERMISSION - else: - terminal_event_type = TASK_FAILED - self._publish( - task, - terminal_event_type, - { - "directive": directive.model_dump(mode="json"), - "execution_result": execution_result["result"], - }, - ) - - # Save task and result to memory for session context - self._save_to_memory(task, execution_result, final_status) - - return { - "task_id": task.task_id, - "status": final_status, - "directive": directive.model_dump(mode="json"), - "result": { - **execution_result["result"], - "response_directive": execution_result.get("response_directive"), - }, - "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)], - } - - def resolve_permission(self, task_id: str, decision: str) -> dict[str, object]: - state = self._task_state_store.get_task(task_id) - if not state: - return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}} - - pending_request_payload = state.get("pending_permission_request") - last_directive_payload = state.get("last_directive") - if not pending_request_payload or not last_directive_payload: - return {"task_id": task_id, "status": "failed", "result": {"error": "No pending permission request"}} - - task = UserTask( - task_id=task_id, - session_id=state["session_id"], - input=state["task_input"], - context=state.get("task_context", {}), - ) - # Get command from pending request - command = pending_request_payload.get("command", "") - - # Resolve permission using new service - resolved = self._permission_service.resolve_permission( - task_id=task_id, - session_id=state["session_id"], - command=command, - decision=decision, - ) - - if decision == "deny": - execution_result = { - "status": "failed", - "result": { - "error": "Permission denied by user.", - "permission_decision": resolved, - }, - } - elif decision == "allow_with_password": - directive = ExecutionDirective.model_validate(last_directive_payload) - self._task_state_store.update_task( - task.task_id, - { - "status": "awaiting_password", - "pending_password_request": { - "command": command, - "reason": pending_request_payload.get("reason", "Требуется пароль для выполнения команды"), - "attempts": 0, - }, - "pending_permission_request": None, - }, - ) - self._publish(task, TASK_AWAITING_PERMISSION, { - "password_required": True, - "command": command, - }) - return { - "task_id": task_id, - "status": "awaiting_password", - "result": {"message": "Требуется ввод пароля"}, - } - else: - directive = ExecutionDirective.model_validate(last_directive_payload) - execution_result = self._execution_engine.execute( - task=task, - directive=directive, - ) - - final_status = str(execution_result["status"]) - if decision != "allow_with_password": - self._task_state_store.update_task( - task.task_id, - { - "status": final_status, - "pending_permission_request": None, - "pending_secret_request": execution_result["result"].get("secret_request") - if final_status == "awaiting_input" - else None, - "pending_review": execution_result["result"].get("review") - if final_status == "awaiting_review" - else None, - "resolved_permission_decision": resolved, - }, - ) - checkpoint = TaskCheckpoint(task_id=task.task_id, status=final_status) - self._checkpoint_store.save(checkpoint) - self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json")) - if final_status == "completed": - terminal_event_type = TASK_COMPLETED - elif final_status == "awaiting_input": - terminal_event_type = TASK_AWAITING_INPUT - elif final_status == "awaiting_permission": - terminal_event_type = TASK_AWAITING_PERMISSION - elif final_status == "awaiting_review": - terminal_event_type = TASK_AWAITING_REVIEW - else: - terminal_event_type = TASK_FAILED - self._publish( - task, - terminal_event_type, - { - "permission_resolution": resolved.model_dump(mode="json") if hasattr(resolved, 'model_dump') else resolved, - "execution_result": execution_result["result"], - }, - ) - - # Save to memory after permission resolution - self._save_to_memory(task, execution_result, final_status) - - return { - "task_id": task.task_id, - "status": final_status, - "result": { - **execution_result["result"], - "response_directive": _build_response_directive(execution_result), - }, - "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)], - } - - def resolve_secret(self, task_id: str, secret: str) -> dict[str, object]: - state = self._task_state_store.get_task(task_id) - if not state: - return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}} - pending_secret_payload = state.get("pending_secret_request") - last_directive_payload = state.get("last_directive") - resolved_permission_payload = state.get("resolved_permission_decision") - if not pending_secret_payload or not last_directive_payload: - return {"task_id": task_id, "status": "failed", "result": {"error": "No pending secret request"}} - if not resolved_permission_payload: - return {"task_id": task_id, "status": "failed", "result": {"error": "No resolved permission available"}} - - task = UserTask( - task_id=task_id, - session_id=state["session_id"], - input=state["task_input"], - context=state.get("task_context", {}), - ) - _secret_request = SecretRequest.model_validate(pending_secret_payload) - directive = ExecutionDirective.model_validate(last_directive_payload) - execution_result = self._execution_engine.execute( - task=task, - directive=directive, - permission_override=None, - secret_override=secret, - ) - final_status = str(execution_result["status"]) - pending_review = execution_result["result"].get("review") if final_status == "awaiting_review" else None - pending_secret = execution_result["result"].get("secret_request") if final_status == "awaiting_input" else None - self._task_state_store.update_task( - task.task_id, - { - "status": final_status, - "pending_secret_request": pending_secret, - "resolved_permission_decision": resolved_permission_payload if final_status == "awaiting_input" else None, - "pending_review": pending_review, - }, - ) - checkpoint = TaskCheckpoint(task_id=task.task_id, status=final_status) - self._checkpoint_store.save(checkpoint) - self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json")) - if final_status == "completed": - terminal_event_type = TASK_COMPLETED - elif final_status == "awaiting_input": - terminal_event_type = TASK_AWAITING_INPUT - elif final_status == "awaiting_permission": - terminal_event_type = TASK_AWAITING_PERMISSION - elif final_status == "awaiting_review": - terminal_event_type = TASK_AWAITING_REVIEW - else: - terminal_event_type = TASK_FAILED - self._publish( - task, - terminal_event_type, - { - "secret_resolution": {"task_id": task_id}, - "execution_result": execution_result["result"], - }, - ) - return { - "task_id": task.task_id, - "status": final_status, - "result": { - **execution_result["result"], - "response_directive": _build_response_directive(execution_result), - }, - "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)], - } - - def resolve_review(self, task_id: str, decision: str, correction: str | None = None) -> dict[str, object]: - state = self._task_state_store.get_task(task_id) - if not state: - return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}} - pending_review = state.get("pending_review") - if not pending_review: - return {"task_id": task_id, "status": "failed", "result": {"error": "No pending review"}} - - task = UserTask( - task_id=task_id, - session_id=state["session_id"], - input=state["task_input"], - context={ - **state.get("task_context", {}), - "previous_action_review": { - "decision": decision, - "correction": correction, - "review": pending_review, - }, - }, - ) - self._publish(task, REVIEW_RESOLVED, { - "decision": decision, - "correction": correction, - "review": pending_review, - }) - if self._memory_interface: - try: - self._memory_interface.insert( - text=f"User reviewed model action as {decision}. Correction: {correction or ''}. Review: {pending_review}", - kind="critique", - source="user", - task_id=task_id, - session_id=state["session_id"], - weight=0.9 if decision == "wrong_action" else 0.5, - metadata={"decision": decision, "review": pending_review}, - ) - except Exception: - pass - self._task_state_store.update_task(task_id, {"pending_review": None, "status": "replanning"}) - return self.run_task(task) - - def resolve_password(self, task_id: str, password: str) -> dict[str, object]: - state = self._task_state_store.get_task(task_id) - if not state: - return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}} - - pending_password_payload = state.get("pending_password_request") - last_directive_payload = state.get("last_directive") - if not pending_password_payload or not last_directive_payload: - return {"task_id": task_id, "status": "failed", "result": {"error": "No pending password request"}} - - current_attempt = pending_password_payload.get("attempts", 0) + 1 - - task = UserTask( - task_id=task_id, - session_id=state["session_id"], - input=state["task_input"], - context=state.get("task_context", {}), - ) - directive = ExecutionDirective.model_validate(last_directive_payload) - - execution_result = self._execution_engine.execute( - task=task, - directive=directive, - password_override=password, - ) - - final_status = str(execution_result["status"]) - - if final_status == "failed": - error_msg = execution_result.get("result", {}).get("error", "") - is_password_error = "permission denied" in error_msg.lower() or "incorrect password" in error_msg.lower() - - if is_password_error and current_attempt < 3: - self._task_state_store.update_task( - task.task_id, - { - "status": "awaiting_password", - "pending_password_request": { - "command": pending_password_payload.get("command"), - "reason": pending_password_payload.get("reason"), - "attempts": current_attempt, - }, - }, - ) - self._publish(task, TASK_AWAITING_PERMISSION, { - "password_attempt_failed": True, - "attempts": current_attempt, - "max_attempts": 3, - "message": "Неверный пароль. Попробуйте снова.", - }) - return { - "task_id": task_id, - "status": "awaiting_password", - "result": {"error": "Неверный пароль", "attempts": current_attempt, "max_attempts": 3}, - } - else: - self._task_state_store.update_task( - task.task_id, - { - "status": "failed", - "pending_password_request": None, - "password_attempts": current_attempt, - }, - ) - self._publish(task, TASK_FAILED, { - "password_failed": True, - "attempts": current_attempt, - "message": "Неверный пароль (3 попытки). Передаю решение модели.", - "execution_result": execution_result["result"], - }) - return { - "task_id": task_id, - "status": "failed", - "result": { - "error": "Password failed after 3 attempts", - "attempts": current_attempt, - "message": "Пользователь 3 раза ввёл неверный пароль. Решение за вами.", - }, - } - - self._task_state_store.update_task( - task.task_id, - { - "status": final_status, - "pending_password_request": None, - }, - ) - checkpoint = TaskCheckpoint(task_id=task.task_id, status=final_status) - self._checkpoint_store.save(checkpoint) - self._publish(task, TASK_COMPLETED, {"execution_result": execution_result["result"]}) - - # Save to memory after password resolution - self._save_to_memory(task, execution_result, final_status) - - return { - "task_id": task.task_id, - "status": final_status, - "result": { - **execution_result["result"], - "response_directive": _build_response_directive(execution_result), - }, - "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)], - } - - def _publish(self, task: UserTask, event_type: str, payload: dict[str, object]) -> None: - event = RuntimeEvent( - task_id=task.task_id, - session_id=task.session_id, - sequence=self._event_bus.next_sequence(task.task_id), - type=event_type, - payload=payload, - ) - self._event_bus.publish(event) - - async def _run_recall(self, task: UserTask) -> dict: - """Run active memory recall before orchestration.""" - if not self._recall_service: - return {"should_recall": False, "reason": "no_recall_service", "query": "", "results": [], "summary": ""} - try: - return await self._recall_service.recall(task_input=task.input) - except Exception as e: - return {"should_recall": False, "reason": f"recall_error: {e}", "query": "", "results": [], "summary": ""} - - def _save_to_memory(self, task: UserTask, execution_result: dict, status: str) -> None: - """Save task input and result to memory for session context, using MemoryWritePolicy.""" - if not self._memory_interface: - return - - try: - # Build a synthetic critic_score for policy based on task status - # For summary/tool_result without real critic, we derive from execution outcome - if status == "completed": - synthetic_score = CriticScore( - correctness=0.9, usefulness=0.8, safety=0.95, - memory_store=True, weight=0.85, explanation="Task completed successfully" - ) - elif status == "failed": - synthetic_score = CriticScore( - correctness=0.2, usefulness=0.3, safety=0.7, - memory_store=True, weight=0.5, explanation="Task failed — store for learning" - ) - else: - synthetic_score = CriticScore( - correctness=0.5, usefulness=0.5, safety=0.8, - memory_store=False, weight=0.3, explanation=f"Status: {status}" - ) - - # Save task input as summary - decision = "store" - if self._memory_policy: - decision = self._memory_policy.decide( - critic_score=synthetic_score, - memory_type="summary", - session_id=task.session_id, - ) - if decision in ("store", "store_with_weight"): - weight = synthetic_score.weight if decision == "store_with_weight" else 0.8 - self._memory_interface.insert( - text=f"User request: {task.input}", - kind="summary", - source="user", - task_id=task.task_id, - session_id=task.session_id, - weight=weight, - metadata={"status": status, "policy_decision": decision}, - ) - self._publish(task, MEMORY_WRITE_DECIDED, { - "kind": "summary", "decision": decision, "text_preview": task.input[:80] - }) - - # Save execution result - result_text = "" - if status == "completed": - step_results = execution_result.get("result", {}).get("step_results", []) - if step_results: - for step in step_results: - tool_result = step.get("result", {}).get("result", {}) - if tool_result.get("output"): - result_text += f" | {step.get('step_id')}: {tool_result.get('output')[:200]}" - elif status == "failed": - result_text = f" | Error: {execution_result.get('result', {}).get('error', 'Unknown')}" - - if result_text: - decision = "store" - if self._memory_policy: - decision = self._memory_policy.decide( - critic_score=synthetic_score, - memory_type="tool_result", - session_id=task.session_id, - ) - if decision in ("store", "store_with_weight"): - weight = synthetic_score.weight if decision == "store_with_weight" else 0.7 - self._memory_interface.insert( - text=f"Result: {status}{result_text}", - kind="tool_result", - source="system", - task_id=task.task_id, - session_id=task.session_id, - weight=weight, - metadata={"status": status, "policy_decision": decision}, - ) - self._publish(task, MEMORY_WRITE_DECIDED, { - "kind": "tool_result", "decision": decision, "text_preview": result_text[:80] - }) - except Exception as e: - import logging - logging.getLogger(__name__).warning(f"Failed to save to memory: {e}") diff --git a/app/services/__init__.py b/app/services/__init__.py deleted file mode 100644 index 6f66849..0000000 --- a/app/services/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -"""Shared services.""" - diff --git a/app/state/__init__.py b/app/state/__init__.py deleted file mode 100644 index 5cc321c..0000000 --- a/app/state/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -"""Task state and checkpoints.""" - diff --git a/app/state/checkpoint_store.py b/app/state/checkpoint_store.py deleted file mode 100644 index 277d8b8..0000000 --- a/app/state/checkpoint_store.py +++ /dev/null @@ -1,75 +0,0 @@ -from __future__ import annotations - -import json -import sqlite3 -from pathlib import Path - -from app.core.contracts import TaskCheckpoint - - -class SQLiteCheckpointStore: - """Durable checkpoint store for resumable runtime state.""" - - def __init__(self, db_path: str | Path) -> None: - self._db_path = Path(db_path) - self._db_path.parent.mkdir(parents=True, exist_ok=True) - self._initialize() - - def save(self, checkpoint: TaskCheckpoint) -> TaskCheckpoint: - with sqlite3.connect(self._db_path) as conn: - conn.execute( - """ - INSERT OR REPLACE INTO checkpoints ( - task_id, status, active_step_id, plan_snapshot_json, - context_snapshot_json, updated_at - ) VALUES (?, ?, ?, ?, ?, ?) - """, - ( - checkpoint.task_id, - checkpoint.status, - checkpoint.active_step_id, - json.dumps(checkpoint.plan_snapshot, default=str), - json.dumps(checkpoint.context_snapshot, default=str), - checkpoint.updated_at.isoformat(), - ), - ) - conn.commit() - return checkpoint - - def load(self, task_id: str) -> TaskCheckpoint | None: - with sqlite3.connect(self._db_path) as conn: - row = conn.execute( - """ - SELECT task_id, status, active_step_id, plan_snapshot_json, - context_snapshot_json, updated_at - FROM checkpoints - WHERE task_id = ? - """, - (task_id,), - ).fetchone() - if not row: - return None - return TaskCheckpoint( - task_id=row[0], - status=row[1], - active_step_id=row[2], - plan_snapshot=json.loads(row[3]), - context_snapshot=json.loads(row[4]), - updated_at=row[5], - ) - - def _initialize(self) -> None: - with sqlite3.connect(self._db_path) as conn: - conn.execute( - """ - CREATE TABLE IF NOT EXISTS checkpoints ( - task_id TEXT PRIMARY KEY, - status TEXT NOT NULL, - active_step_id TEXT, - plan_snapshot_json TEXT NOT NULL, - context_snapshot_json TEXT NOT NULL, - updated_at TEXT NOT NULL - ) - """ - ) - conn.commit() diff --git a/app/state/task_state_store.py b/app/state/task_state_store.py deleted file mode 100644 index b6b7470..0000000 --- a/app/state/task_state_store.py +++ /dev/null @@ -1,77 +0,0 @@ -from __future__ import annotations - -import json -import sqlite3 -from pathlib import Path -from typing import Any - - -class SQLiteTaskStateStore: - """Durable task state store for runtime lifecycle state.""" - - def __init__(self, db_path: str | Path) -> None: - self._db_path = Path(db_path) - self._db_path.parent.mkdir(parents=True, exist_ok=True) - self._initialize() - - def create_task(self, task_id: str, initial_state: dict[str, Any]) -> dict[str, Any]: - state = dict(initial_state) - session_id = state.get("session_id") - with sqlite3.connect(self._db_path) as conn: - conn.execute( - """ - INSERT OR REPLACE INTO task_states (task_id, state_json, session_id) - VALUES (?, ?, ?) - """, - (task_id, json.dumps(state), session_id), - ) - conn.commit() - return state - - def get_task(self, task_id: str) -> dict[str, Any] | None: - with sqlite3.connect(self._db_path) as conn: - row = conn.execute( - "SELECT state_json FROM task_states WHERE task_id = ?", - (task_id,), - ).fetchone() - return json.loads(row[0]) if row else None - - def update_task(self, task_id: str, patch: dict[str, Any]) -> dict[str, Any]: - state = self.get_task(task_id) or {} - state.update(patch) - with sqlite3.connect(self._db_path) as conn: - conn.execute( - """ - INSERT OR REPLACE INTO task_states (task_id, state_json) - VALUES (?, ?) - """, - (task_id, json.dumps(state)), - ) - conn.commit() - return state - - def _initialize(self) -> None: - with sqlite3.connect(self._db_path) as conn: - conn.execute( - """ - CREATE TABLE IF NOT EXISTS task_states ( - task_id TEXT PRIMARY KEY, - state_json TEXT NOT NULL - ) - """ - ) - conn.commit() - try: - conn.execute("ALTER TABLE task_states ADD COLUMN session_id TEXT") - conn.commit() - except sqlite3.OperationalError: - pass - - def get_session_tasks(self, session_id: str, limit: int = 10) -> list[dict[str, Any]]: - with sqlite3.connect(self._db_path) as conn: - conn.row_factory = sqlite3.Row - rows = conn.execute( - "SELECT state_json FROM task_states WHERE session_id = ? ORDER BY rowid DESC LIMIT ?", - (session_id, limit), - ).fetchall() - return [json.loads(row[0]) for row in rows] diff --git a/app/streaming/__init__.py b/app/streaming/__init__.py deleted file mode 100644 index 24d18ec..0000000 --- a/app/streaming/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -"""Streaming projections.""" - diff --git a/app/streaming/manager.py b/app/streaming/manager.py deleted file mode 100644 index 64d0c49..0000000 --- a/app/streaming/manager.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import annotations - -import asyncio -from collections import defaultdict -from dataclasses import dataclass - -from app.core.contracts import RuntimeEvent -from app.events.event_bus import EventBus - - -class StreamingManager: - """Simple in-process projection from event bus to websocket consumers.""" - - def __init__(self, event_bus: EventBus) -> None: - self._event_bus = event_bus - self._subscribers: dict[str, list[StreamSubscriber]] = defaultdict(list) - self._event_bus.subscribe(self._on_event) - - def replay_events(self, task_id: str) -> list[RuntimeEvent]: - return self._event_bus.list_for_task(task_id) - - def subscribe(self, task_id: str) -> asyncio.Queue[RuntimeEvent]: - queue: asyncio.Queue[RuntimeEvent] = asyncio.Queue() - self._subscribers[task_id].append( - StreamSubscriber(loop=asyncio.get_running_loop(), queue=queue) - ) - return queue - - def unsubscribe(self, task_id: str, queue: asyncio.Queue[RuntimeEvent]) -> None: - listeners = self._subscribers.get(task_id, []) - for listener in list(listeners): - if listener.queue is queue: - listeners.remove(listener) - break - if not listeners and task_id in self._subscribers: - del self._subscribers[task_id] - - def _on_event(self, event: RuntimeEvent) -> None: - for listener in list(self._subscribers.get(event.task_id, [])): - listener.loop.call_soon_threadsafe(listener.queue.put_nowait, event) - - -@dataclass -class StreamSubscriber: - loop: asyncio.AbstractEventLoop - queue: asyncio.Queue[RuntimeEvent] diff --git a/app/tools/__init__.py b/app/tools/__init__.py deleted file mode 100644 index b8046a9..0000000 --- a/app/tools/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -"""Tool registry and tool adapters.""" - diff --git a/app/tools/base.py b/app/tools/base.py deleted file mode 100644 index 6601eba..0000000 --- a/app/tools/base.py +++ /dev/null @@ -1,24 +0,0 @@ -from __future__ import annotations - -from abc import ABC, abstractmethod -from typing import Any - -from app.core.contracts import ToolResult, UserTask - - -class BaseTool(ABC): - name: str = "" - description: str = "" - - @property - def name(self) -> str: - return getattr(self, '_name', self.__class__.__name__.replace('Tool', '').lower()) - - @property - def description(self) -> str: - return getattr(self, '_description', "") - - @abstractmethod - def execute(self, task: UserTask, args: dict[str, Any]) -> ToolResult: - raise NotImplementedError - diff --git a/app/tools/discover.py b/app/tools/discover.py deleted file mode 100644 index 421acce..0000000 --- a/app/tools/discover.py +++ /dev/null @@ -1,83 +0,0 @@ -from __future__ import annotations - -import importlib -import json -import logging -from pathlib import Path -from typing import Any - -logger = logging.getLogger(__name__) - -PLUGINS_DIR = Path(__file__).parent / "plugins" - - -class ToolDiscovery: - """Decentralized tool discovery system.""" - - def __init__(self, plugins_dir: Path | None = None) -> None: - self._plugins_dir = plugins_dir or PLUGINS_DIR - - def discover(self) -> dict[str, Any]: - """Discover all tools from plugins directory.""" - tools = {} - - if not self._plugins_dir.exists(): - logger.warning(f"Plugins directory not found: {self._plugins_dir}") - return tools - - for folder in self._plugins_dir.iterdir(): - if not folder.is_dir(): - continue - - manifest_file = folder / "manifest.json" - if not manifest_file.exists(): - logger.warning(f"Missing manifest.json in {folder.name}") - continue - - try: - manifest = self._load_manifest(manifest_file) - - tool_name = manifest.get("name", folder.name) - tools[tool_name] = { - "manifest": manifest, - "tool_class": folder.name, - } - logger.info(f"Discovered tool: {tool_name}") - - except Exception as e: - logger.error(f"Failed to load tool {folder.name}: {e}") - continue - - return tools - - def _load_manifest(self, manifest_file: Path) -> dict[str, Any]: - with open(manifest_file) as f: - return json.load(f) - - def _load_tool_class(self, tool_name: str, manifest: dict[str, Any]) -> Any: - entrypoint = manifest.get("entrypoint", "Tool") - module = importlib.import_module(f"app.tools.plugins.{tool_name}") - tool_class = getattr(module, entrypoint) - return tool_class - - def get_tool_schemas(self) -> list[dict[str, Any]]: - """Get schemas for all discovered tools.""" - tools = self.discover() - schemas = [] - - for name, data in tools.items(): - manifest = data.get("manifest", {}) - schemas.append({ - "name": name, - "description": manifest.get("description", ""), - "args_schema": manifest.get("args_schema", {}), - "requires_permission": manifest.get("requires_permission", False), - }) - - return schemas - - -def discover_tools() -> dict[str, Any]: - """Convenience function for quick tool discovery.""" - discovery = ToolDiscovery() - return discovery.discover() \ No newline at end of file diff --git a/app/tools/file_read.py b/app/tools/file_read.py deleted file mode 100644 index 6bba378..0000000 --- a/app/tools/file_read.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import annotations - -from app.core.contracts import ToolResult, UserTask -from app.tools.base import BaseTool -from app.tools.sandbox import ToolSandbox - - -class FileReadTool(BaseTool): - name = "file_read" - - def __init__(self, sandbox: ToolSandbox) -> None: - self._sandbox = sandbox - - def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: - path = args.get("path") - if not path: - return ToolResult(tool=self.name, ok=False, error="Missing path") - resolved = self._sandbox.ensure_path_allowed(str(path)) - content = resolved.read_text(encoding="utf-8") - return ToolResult( - tool=self.name, - ok=True, - output=content, - metadata={"path": str(resolved), "size": len(content)}, - ) - diff --git a/app/tools/file_write.py b/app/tools/file_write.py deleted file mode 100644 index 0bf7708..0000000 --- a/app/tools/file_write.py +++ /dev/null @@ -1,27 +0,0 @@ -from __future__ import annotations - -from app.core.contracts import ToolResult, UserTask -from app.tools.base import BaseTool -from app.tools.sandbox import ToolSandbox - - -class FileWriteTool(BaseTool): - name = "file_write" - - def __init__(self, sandbox: ToolSandbox) -> None: - self._sandbox = sandbox - - def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: - path = args.get("path") - content = str(args.get("content", "")) - if not path: - return ToolResult(tool=self.name, ok=False, error="Missing path") - resolved = self._sandbox.ensure_path_allowed(str(path)) - resolved.parent.mkdir(parents=True, exist_ok=True) - resolved.write_text(content, encoding="utf-8") - return ToolResult( - tool=self.name, - ok=True, - output=f"Wrote {len(content)} bytes", - metadata={"path": str(resolved), "size": len(content)}, - ) diff --git a/app/tools/memory_tools.py b/app/tools/memory_tools.py deleted file mode 100644 index 77bf03b..0000000 --- a/app/tools/memory_tools.py +++ /dev/null @@ -1,123 +0,0 @@ -from __future__ import annotations - -import logging -from typing import Any - -from app.tools.base import BaseTool -from app.core.contracts import ToolResult, UserTask -from app.tools.sandbox import ToolSandbox - -logger = logging.getLogger(__name__) - - -class MemoryInsertTool(BaseTool): - _name = "memory_insert" - _description = "Store information in memory" - - def __init__(self, sandbox: ToolSandbox, memory_interface=None) -> None: - super().__init__() - self._sandbox = sandbox - self._memory = memory_interface - - def execute(self, task: UserTask, args: dict[str, Any]) -> ToolResult: - text = args.get("text", "") - kind = args.get("kind", "fact") - source = args.get("source", "user") - weight = args.get("weight", 0.5) - - if not text: - return ToolResult(tool="memory_insert", ok=False, output="", error="text is required") - if not self._memory: - return ToolResult(tool="memory_insert", ok=False, output="", error="Memory not available") - - try: - entry = self._memory.insert( - text=text, - kind=kind, - source=source, - task_id=task.task_id, - session_id=task.session_id, - weight=weight, - ) - return ToolResult( - tool="memory_insert", - ok=True, - output=f"Stored: {entry.id}", - metadata={"entry_id": entry.id}, - ) - except Exception as e: - logger.warning(f"Memory insert failed: {e}") - return ToolResult(tool="memory_insert", ok=False, output="", error=str(e)) - - -class MemorySearchTool(BaseTool): - _name = "memory_search" - _description = "Search memory for information" - - def __init__(self, sandbox: ToolSandbox, memory_interface=None) -> None: - super().__init__() - self._sandbox = sandbox - self._memory = memory_interface - - def execute(self, task: UserTask, args: dict[str, Any]) -> ToolResult: - query = args.get("query", "") - top_k = args.get("top_k", 5) - - if not query: - return ToolResult(tool="memory_search", ok=False, output="", error="query is required") - if not self._memory: - return ToolResult(tool="memory_search", ok=False, output="", error="Memory not available") - - try: - results = self._memory.search(query, top_k=top_k) - if not results: - return ToolResult(tool="memory_search", ok=True, output="No results found", metadata={"count": 0}) - - output_lines = [] - for entry, score in results: - output_lines.append(f"[{score:.2f}] {entry.text[:100]}") - - return ToolResult( - tool="memory_search", - ok=True, - output="\n".join(output_lines), - metadata={"count": len(results)}, - ) - except Exception as e: - logger.warning(f"Memory search failed: {e}") - return ToolResult(tool="memory_search", ok=False, output="", error=str(e)) - - -class MemoryListTool(BaseTool): - _name = "memory_list" - _description = "List recent memories" - - def __init__(self, sandbox: ToolSandbox, memory_interface=None) -> None: - super().__init__() - self._sandbox = sandbox - self._memory = memory_interface - - def execute(self, task: UserTask, args: dict[str, Any]) -> ToolResult: - limit = args.get("limit", 10) - - if not self._memory: - return ToolResult(tool="memory_list", ok=False, output="", error="Memory not available") - - try: - entries = self._memory.get_recent(limit=limit) - if not entries: - return ToolResult(tool="memory_list", ok=True, output="No memories", metadata={"count": 0}) - - output_lines = [] - for entry in entries: - output_lines.append(f"{entry.kind}: {entry.text[:80]}") - - return ToolResult( - tool="memory_list", - ok=True, - output="\n".join(output_lines), - metadata={"count": len(entries)}, - ) - except Exception as e: - logger.warning(f"Memory list failed: {e}") - return ToolResult(tool="memory_list", ok=False, output="", error=str(e)) \ No newline at end of file diff --git a/app/tools/plugins/file_read/__init__.py b/app/tools/plugins/file_read/__init__.py deleted file mode 100644 index a06af7b..0000000 --- a/app/tools/plugins/file_read/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import annotations - -from app.core.contracts import ToolResult, UserTask -from app.tools.base import BaseTool -from app.tools.sandbox import ToolSandbox - - -class Tool(BaseTool): - name = "file_read" - description = "Read file contents" - - def __init__(self, sandbox: ToolSandbox) -> None: - self._sandbox = sandbox - - def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: - path = args.get("path") - if not path: - return ToolResult(tool=self.name, ok=False, error="Missing path") - try: - resolved = self._sandbox.ensure_path_allowed(str(path)) - if not resolved.exists(): - return ToolResult(tool=self.name, ok=False, error=f"File not found: {path}") - content = resolved.read_text(encoding="utf-8") - return ToolResult( - tool=self.name, - ok=True, - output=content, - metadata={"path": str(resolved), "size": len(content)}, - ) - except PermissionError as e: - return ToolResult(tool=self.name, ok=False, error=f"Access denied: {e}") - except FileNotFoundError as e: - return ToolResult(tool=self.name, ok=False, error=f"File not found: {path}") - except Exception as e: - return ToolResult(tool=self.name, ok=False, error=f"Error: {e}") \ No newline at end of file diff --git a/app/tools/plugins/file_read/manifest.json b/app/tools/plugins/file_read/manifest.json deleted file mode 100644 index ec51f07..0000000 --- a/app/tools/plugins/file_read/manifest.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "name": "file_read", - "version": "1.0", - "entrypoint": "Tool", - "description": "Read file contents from allowed paths", - "args_schema": { - "path": {"type": "string", "required": true, "description": "File path to read"} - }, - "requires_permission": false -} \ No newline at end of file diff --git a/app/tools/plugins/file_write/__init__.py b/app/tools/plugins/file_write/__init__.py deleted file mode 100644 index 7cd8572..0000000 --- a/app/tools/plugins/file_write/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -from __future__ import annotations - -from app.core.contracts import ToolResult, UserTask -from app.tools.base import BaseTool -from app.tools.sandbox import ToolSandbox - - -class Tool(BaseTool): - name = "file_write" - description = "Write content to file" - - def __init__(self, sandbox: ToolSandbox) -> None: - self._sandbox = sandbox - - def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: - path = args.get("path") - content = str(args.get("content", "")) - if not path: - return ToolResult(tool=self.name, ok=False, error="Missing path") - try: - resolved = self._sandbox.ensure_path_allowed(str(path)) - resolved.parent.mkdir(parents=True, exist_ok=True) - resolved.write_text(content, encoding="utf-8") - return ToolResult( - tool=self.name, - ok=True, - output=f"Wrote {len(content)} bytes", - metadata={"path": str(resolved), "size": len(content)}, - ) - except PermissionError as e: - return ToolResult(tool=self.name, ok=False, error=f"Access denied: {e}") - except Exception as e: - return ToolResult(tool=self.name, ok=False, error=f"Error: {e}") \ No newline at end of file diff --git a/app/tools/plugins/file_write/manifest.json b/app/tools/plugins/file_write/manifest.json deleted file mode 100644 index 742451a..0000000 --- a/app/tools/plugins/file_write/manifest.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "name": "file_write", - "version": "1.0", - "entrypoint": "Tool", - "description": "Write content to file", - "args_schema": { - "path": {"type": "string", "required": true, "description": "File path to write"}, - "content": {"type": "string", "required": true, "description": "Content to write"} - }, - "requires_permission": true -} \ No newline at end of file diff --git a/app/tools/plugins/memory_tools/__init__.py b/app/tools/plugins/memory_tools/__init__.py deleted file mode 100644 index ba60907..0000000 --- a/app/tools/plugins/memory_tools/__init__.py +++ /dev/null @@ -1,112 +0,0 @@ -from __future__ import annotations - -import logging -from typing import Any - -from app.core.contracts import ToolResult, UserTask -from app.tools.base import BaseTool - -logger = logging.getLogger(__name__) - - -class Tool(BaseTool): - name = "memory" - description = "Memory operations: insert, search, list" - - def __init__(self, memory_interface=None) -> None: - self._memory = memory_interface - - def execute(self, task: UserTask, args: dict[str, Any]) -> ToolResult: - action = args.get("action", "search") - - if action == "insert": - return self._insert(task, args) - elif action == "search": - return self._search(task, args) - elif action == "list": - return self._list(task, args) - else: - return ToolResult(tool=self.name, ok=False, error=f"Unknown action: {action}") - - def _insert(self, task: UserTask, args: dict[str, Any]) -> ToolResult: - text = args.get("text", "") - kind = args.get("kind", "fact") - source = args.get("source", "user") - weight = args.get("weight", 0.5) - - if not text: - return ToolResult(tool=self.name, ok=False, output="", error="text is required") - if not self._memory: - return ToolResult(tool=self.name, ok=False, output="", error="Memory not available") - - try: - entry = self._memory.insert( - text=text, - kind=kind, - source=source, - task_id=task.task_id, - session_id=task.session_id, - weight=weight, - ) - return ToolResult( - tool=self.name, - ok=True, - output=f"Stored: {entry.id}", - metadata={"entry_id": entry.id}, - ) - except Exception as e: - logger.warning(f"Memory insert failed: {e}") - return ToolResult(tool=self.name, ok=False, output="", error=str(e)) - - def _search(self, task: UserTask, args: dict[str, Any]) -> ToolResult: - query = args.get("query", "") - top_k = args.get("top_k", 5) - - if not query: - return ToolResult(tool=self.name, ok=False, output="", error="query is required") - if not self._memory: - return ToolResult(tool=self.name, ok=False, output="", error="Memory not available") - - try: - results = self._memory.search(query, top_k=top_k) - if not results: - return ToolResult(tool=self.name, ok=True, output="No results found", metadata={"count": 0}) - - output_lines = [] - for entry, score in results: - output_lines.append(f"[{score:.2f}] {entry.text[:100]}") - - return ToolResult( - tool=self.name, - ok=True, - output="\n".join(output_lines), - metadata={"count": len(results)}, - ) - except Exception as e: - logger.warning(f"Memory search failed: {e}") - return ToolResult(tool=self.name, ok=False, output="", error=str(e)) - - def _list(self, task: UserTask, args: dict[str, Any]) -> ToolResult: - limit = args.get("limit", 10) - - if not self._memory: - return ToolResult(tool=self.name, ok=False, output="", error="Memory not available") - - try: - entries = self._memory.get_recent(limit=limit) - if not entries: - return ToolResult(tool=self.name, ok=True, output="No memories", metadata={"count": 0}) - - output_lines = [] - for entry in entries: - output_lines.append(f"{entry.kind}: {entry.text[:80]}") - - return ToolResult( - tool=self.name, - ok=True, - output="\n".join(output_lines), - metadata={"count": len(entries)}, - ) - except Exception as e: - logger.warning(f"Memory list failed: {e}") - return ToolResult(tool=self.name, ok=False, output="", error=str(e)) \ No newline at end of file diff --git a/app/tools/plugins/memory_tools/manifest.json b/app/tools/plugins/memory_tools/manifest.json deleted file mode 100644 index ac23ef9..0000000 --- a/app/tools/plugins/memory_tools/manifest.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "name": "memory", - "version": "1.0", - "entrypoint": "Tool", - "description": "Memory operations: insert, search, list", - "args_schema": { - "action": { - "type": "string", - "required": true, - "description": "Action: insert, search, or list", - "enum": ["insert", "search", "list"] - }, - "text": {"type": "string", "required": false, "description": "Text to store (insert)"}, - "query": {"type": "string", "required": false, "description": "Query string (search)"}, - "kind": {"type": "string", "required": false, "description": "Memory kind: fact, command, etc"}, - "source": {"type": "string", "required": false, "description": "Source: user, system, etc"}, - "weight": {"type": "number", "required": false, "description": "Memory weight 0-1"}, - "top_k": {"type": "number", "required": false, "description": "Max results (search)"}, - "limit": {"type": "number", "required": false, "description": "Max entries (list)"} - }, - "requires_permission": false -} \ No newline at end of file diff --git a/app/tools/plugins/shell_exec/__init__.py b/app/tools/plugins/shell_exec/__init__.py deleted file mode 100644 index 83aecc6..0000000 --- a/app/tools/plugins/shell_exec/__init__.py +++ /dev/null @@ -1,55 +0,0 @@ -from __future__ import annotations - -from app.core.contracts import ToolResult, UserTask -from app.tools.base import BaseTool -from app.tools.sandbox import ToolSandbox - - -def _detect_sudo_auth_failure(output: str) -> bool: - normalized = output.lower() - return any( - marker in normalized - for marker in ( - "incorrect password", - "incorrect password attempt", - "sudo: no password was provided", - "sorry, try again", - "authentication failure", - ) - ) - - -class Tool(BaseTool): - name = "shell_exec" - description = "Execute shell commands" - - def __init__(self, sandbox: ToolSandbox) -> None: - self._sandbox = sandbox - - def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: - command = str(args.get("command", "")).strip() - if not command: - return ToolResult(tool=self.name, ok=False, error="Missing command", metadata={"exit_code": -1}) - cwd = args.get("cwd") - stdin_secret = args.get("stdin_secret") - output_callback = args.get("__output_callback") - completed = self._sandbox.run_shell( - command=command, - cwd=str(cwd) if cwd else None, - stdin_data=str(stdin_secret) if stdin_secret is not None else None, - output_callback=output_callback if callable(output_callback) else None, - ) - output = completed.stdout if completed.returncode == 0 else completed.stderr or completed.stdout - sudo_auth_failed = completed.returncode != 0 and _detect_sudo_auth_failure( - f"{completed.stdout}\n{completed.stderr}" - ) - return ToolResult( - tool=self.name, - ok=completed.returncode == 0, - output=output, - error=None if completed.returncode == 0 else f"Command failed with exit code {completed.returncode}", - metadata={ - "exit_code": completed.returncode, - "sudo_auth_failed": sudo_auth_failed, - }, - ) diff --git a/app/tools/plugins/shell_exec/manifest.json b/app/tools/plugins/shell_exec/manifest.json deleted file mode 100644 index a797718..0000000 --- a/app/tools/plugins/shell_exec/manifest.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "name": "shell_exec", - "version": "1.0", - "entrypoint": "Tool", - "description": "Execute shell commands in sandboxed environment", - "args_schema": { - "command": {"type": "string", "required": true, "description": "Shell command to execute"}, - "cwd": {"type": "string", "required": false, "description": "Working directory"}, - "stdin_secret": {"type": "string", "required": false, "description": "Data to pass via stdin"} - }, - "requires_permission": true -} \ No newline at end of file diff --git a/app/tools/registry.py b/app/tools/registry.py deleted file mode 100644 index 1bcc296..0000000 --- a/app/tools/registry.py +++ /dev/null @@ -1,61 +0,0 @@ -from __future__ import annotations - -import logging -from typing import Any, Callable - -from app.tools.base import BaseTool -from app.tools.discover import ToolDiscovery - -logger = logging.getLogger(__name__) - - -class ToolRegistry: - def __init__(self) -> None: - self._tools: dict[str, BaseTool] = {} - self._schemas: dict[str, dict[str, Any]] = {} - - def register(self, tool: BaseTool) -> None: - self._tools[tool.name] = tool - - def discover_and_init( - self, - init_factory: Callable[[dict], BaseTool] | None = None, - ) -> None: - """Discover tools from plugins and initialize them.""" - discovery = ToolDiscovery() - discovered = discovery.discover() - - for name, data in discovered.items(): - manifest = data.get("manifest", {}) - - if init_factory: - tool = init_factory({"name": name, "manifest": manifest}) - else: - tool_instance = data.get("instance") - if tool_instance: - self._tools[name] = tool_instance - self._schemas[name] = { - "description": manifest.get("description", ""), - "args_schema": manifest.get("args_schema", {}), - "requires_permission": manifest.get("requires_permission", False), - } - logger.info(f"Registered tool: {name}") - logger.warning(f"No init_factory provided for {name}") - - def get(self, name: str) -> BaseTool: - if name not in self._tools: - raise KeyError(f"Tool {name} is not registered") - return self._tools[name] - - def list_names(self) -> list[str]: - return list(self._tools.keys()) - - def get_schema(self, name: str) -> dict[str, Any]: - return self._schemas.get(name, {}) - - def list_schemas(self) -> list[dict[str, Any]]: - return [ - {"name": name, **schema} - for name, schema in self._schemas.items() - ] - diff --git a/app/tools/sandbox.py b/app/tools/sandbox.py deleted file mode 100644 index 48de9ec..0000000 --- a/app/tools/sandbox.py +++ /dev/null @@ -1,139 +0,0 @@ -from __future__ import annotations - -import os -import signal -import subprocess -import threading -import time -from pathlib import Path -from typing import Callable - - -class ToolSandbox: - """Applies simple working directory and timeout restrictions.""" - - def __init__( - self, - allowed_root: str | Path, - timeout_ms: int, - command_timeout_ms: int | None = None, - idle_timeout_ms: int | None = None, - ) -> None: - self._allowed_root = Path(allowed_root).resolve() - self._timeout_seconds = max(timeout_ms / 1000, 0.001) - self._command_timeout_seconds = max((command_timeout_ms or timeout_ms) / 1000, 0.001) - self._idle_timeout_seconds = max((idle_timeout_ms or timeout_ms) / 1000, 0.001) - - def ensure_path_allowed(self, path: str | Path) -> Path: - resolved = Path(path).expanduser().resolve() - # Permission-first model: path is allowed if it exists - # Permission service will handle write/shell restrictions - return resolved - - def run_shell( - self, - command: str, - cwd: str | Path | None = None, - stdin_data: str | None = None, - output_callback: Callable[[str, str], None] | None = None, - ) -> subprocess.CompletedProcess[str]: - working_directory = self.ensure_path_allowed(cwd or self._allowed_root) - env = {"PATH": os.environ.get("PATH", "")} - if output_callback is None: - return subprocess.run( - command, - shell=True, - cwd=str(working_directory), - env=env, - text=True, - capture_output=True, - input=stdin_data, - timeout=self._command_timeout_seconds, - check=False, - ) - - process = subprocess.Popen( - command, - shell=True, - cwd=str(working_directory), - env=env, - text=True, - stdin=subprocess.PIPE if stdin_data is not None else None, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - start_new_session=True, - ) - - stdout_chunks: list[str] = [] - stderr_chunks: list[str] = [] - output_lock = threading.Lock() - last_output_at = time.monotonic() - - if stdin_data is not None and process.stdin is not None: - process.stdin.write(stdin_data) - process.stdin.close() - - def read_stream(stream_name: str) -> None: - stream = process.stdout if stream_name == "stdout" else process.stderr - if stream is None: - return - chunks = stdout_chunks if stream_name == "stdout" else stderr_chunks - try: - for line in iter(stream.readline, ""): - if not line: - break - chunks.append(line) - nonlocal last_output_at - with output_lock: - last_output_at = time.monotonic() - output_callback(stream_name, line) - finally: - stream.close() - - stdout_thread = threading.Thread(target=read_stream, args=("stdout",), daemon=True) - stderr_thread = threading.Thread(target=read_stream, args=("stderr",), daemon=True) - stdout_thread.start() - stderr_thread.start() - - timed_out = False - timeout_reason: str | None = None - started_at = time.monotonic() - return_code: int | None = None - while return_code is None: - return_code = process.poll() - if return_code is not None: - break - - now = time.monotonic() - with output_lock: - idle_for = now - last_output_at - if now - started_at > self._command_timeout_seconds: - timed_out = True - timeout_reason = f"Command timed out after {self._command_timeout_seconds:.0f}s" - break - if idle_for > self._idle_timeout_seconds: - timed_out = True - timeout_reason = f"Command produced no output for {self._idle_timeout_seconds:.0f}s" - break - time.sleep(0.1) - - if timed_out: - try: - os.killpg(process.pid, signal.SIGKILL) - except ProcessLookupError: - pass - except PermissionError: - process.kill() - return_code = process.wait() - timeout_message = f"{timeout_reason}\n" - stderr_chunks.append(timeout_message) - output_callback("stderr", timeout_message) - - stdout_thread.join(timeout=1) - stderr_thread.join(timeout=1) - return subprocess.CompletedProcess( - args=command, - returncode=return_code if not timed_out else -9, - stdout="".join(stdout_chunks), - stderr="".join(stderr_chunks), - ) diff --git a/app/tools/shell_exec.py b/app/tools/shell_exec.py deleted file mode 100644 index ea572c2..0000000 --- a/app/tools/shell_exec.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import annotations - -from app.core.contracts import ToolResult, UserTask -from app.tools.base import BaseTool -from app.tools.sandbox import ToolSandbox - - -def _detect_sudo_auth_failure(output: str) -> bool: - normalized = output.lower() - return any( - marker in normalized - for marker in ( - "incorrect password", - "incorrect password attempt", - "sudo: no password was provided", - "sudo: password incorrect", - "sorry, try again", - "authentication failure", - "wrong password", - ) - ) - - -class ShellExecTool(BaseTool): - name = "shell_exec" - - def __init__(self, sandbox: ToolSandbox) -> None: - self._sandbox = sandbox - - def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: - command = str(args.get("command", "")).strip() - if not command: - return ToolResult(tool=self.name, ok=False, error="Missing command", metadata={"exit_code": -1}) - cwd = args.get("cwd") - stdin_secret = args.get("stdin_secret") - password = args.get("password") - output_callback = args.get("__output_callback") - - if password: - command = f'echo "{password}" | sudo -S {command}' - - completed = self._sandbox.run_shell( - command=command, - cwd=str(cwd) if cwd else None, - stdin_data=str(stdin_secret) if stdin_secret is not None else None, - output_callback=output_callback if callable(output_callback) else None, - ) - output = completed.stdout if completed.returncode == 0 else completed.stderr or completed.stdout - error_output = completed.stderr or completed.stdout - sudo_auth_failed = completed.returncode != 0 and _detect_sudo_auth_failure( - f"{completed.stdout}\n{completed.stderr}" - ) - needs_sudo = completed.returncode != 0 and "permission denied" in error_output.lower() and not sudo_auth_failed - - return ToolResult( - tool=self.name, - ok=completed.returncode == 0, - output=output, - error=None if completed.returncode == 0 else f"Command failed with exit code {completed.returncode}", - metadata={ - "exit_code": completed.returncode, - "needs_sudo": needs_sudo, - "sudo_auth_failed": sudo_auth_failed, - }, - ) diff --git a/config/models.json b/config/models.json deleted file mode 100644 index 0f221d6..0000000 --- a/config/models.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "thinker": { - "path": "Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf", - "backend": "vulkan", - "n_gpu_layers": -1, - "max_tokens": 2048, - "temperature": 0.3 - }, - "json_compiler": { - "path": "gemma-4-E4B-it-Q4_K_M.gguf", - "backend": "cpu", - "n_gpu_layers": 0, - "max_tokens": 1024, - "temperature": 0.1 - }, - "coder": { - "path": "X-Coder-SFT-Qwen3-8B.Q6_K.gguf", - "backend": "cpu", - "n_gpu_layers": 0, - "max_tokens": 2048, - "temperature": 0.2 - }, - "critic": { - "path": "gemma-4-E4B-it-Q4_K_M.gguf", - "backend": "cpu", - "n_gpu_layers": 0, - "max_tokens": 1024, - "temperature": 0.1 - }, - "sys_util": { - "path": "Menlo_Lucy-Q4_K_M.gguf", - "backend": "cpu", - "n_gpu_layers": 0, - "max_tokens": 1024, - "temperature": 0.1 - }, - "embeddings": { - "path": "all-MiniLM-L6-v2", - "model_name": "sentence-transformers/all-MiniLM-L6-v2", - "embedding_dim": 384 - } -} \ No newline at end of file diff --git a/config/models.json.backup b/config/models.json.backup deleted file mode 100644 index 0f221d6..0000000 --- a/config/models.json.backup +++ /dev/null @@ -1,42 +0,0 @@ -{ - "thinker": { - "path": "Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf", - "backend": "vulkan", - "n_gpu_layers": -1, - "max_tokens": 2048, - "temperature": 0.3 - }, - "json_compiler": { - "path": "gemma-4-E4B-it-Q4_K_M.gguf", - "backend": "cpu", - "n_gpu_layers": 0, - "max_tokens": 1024, - "temperature": 0.1 - }, - "coder": { - "path": "X-Coder-SFT-Qwen3-8B.Q6_K.gguf", - "backend": "cpu", - "n_gpu_layers": 0, - "max_tokens": 2048, - "temperature": 0.2 - }, - "critic": { - "path": "gemma-4-E4B-it-Q4_K_M.gguf", - "backend": "cpu", - "n_gpu_layers": 0, - "max_tokens": 1024, - "temperature": 0.1 - }, - "sys_util": { - "path": "Menlo_Lucy-Q4_K_M.gguf", - "backend": "cpu", - "n_gpu_layers": 0, - "max_tokens": 1024, - "temperature": 0.1 - }, - "embeddings": { - "path": "all-MiniLM-L6-v2", - "model_name": "sentence-transformers/all-MiniLM-L6-v2", - "embedding_dim": 384 - } -} \ No newline at end of file diff --git a/config/models.json.test b/config/models.json.test deleted file mode 100644 index 0f221d6..0000000 --- a/config/models.json.test +++ /dev/null @@ -1,42 +0,0 @@ -{ - "thinker": { - "path": "Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf", - "backend": "vulkan", - "n_gpu_layers": -1, - "max_tokens": 2048, - "temperature": 0.3 - }, - "json_compiler": { - "path": "gemma-4-E4B-it-Q4_K_M.gguf", - "backend": "cpu", - "n_gpu_layers": 0, - "max_tokens": 1024, - "temperature": 0.1 - }, - "coder": { - "path": "X-Coder-SFT-Qwen3-8B.Q6_K.gguf", - "backend": "cpu", - "n_gpu_layers": 0, - "max_tokens": 2048, - "temperature": 0.2 - }, - "critic": { - "path": "gemma-4-E4B-it-Q4_K_M.gguf", - "backend": "cpu", - "n_gpu_layers": 0, - "max_tokens": 1024, - "temperature": 0.1 - }, - "sys_util": { - "path": "Menlo_Lucy-Q4_K_M.gguf", - "backend": "cpu", - "n_gpu_layers": 0, - "max_tokens": 1024, - "temperature": 0.1 - }, - "embeddings": { - "path": "all-MiniLM-L6-v2", - "model_name": "sentence-transformers/all-MiniLM-L6-v2", - "embedding_dim": 384 - } -} \ No newline at end of file diff --git a/config/models.yaml b/config/models.yaml new file mode 100644 index 0000000..4449fc4 --- /dev/null +++ b/config/models.yaml @@ -0,0 +1,53 @@ +default_provider: llama_server + +models: + thinker: + provider: llama_server + base_url: http://127.0.0.1:8081/v1 + model: local-main + purpose: free_cognition + structured_output: false + temperature: 0.4 + max_output_tokens: 8192 + system_prompt: prompts/roles/thinker.md + + critic: + provider: llama_server + base_url: http://127.0.0.1:8081/v1 + model: local-main + purpose: reflection + structured_output: false + temperature: 0.1 + max_output_tokens: 4096 + system_prompt: prompts/roles/critic.md + + coder: + provider: llama_server + base_url: http://127.0.0.1:8081/v1 + model: local-main + purpose: code_generation + structured_output: false + temperature: 0.2 + max_output_tokens: 16384 + system_prompt: prompts/roles/coder.md + + action: + provider: llama_server + base_url: http://127.0.0.1:8081/v1 + model: local-main + purpose: action_directive + structured_output: true + temperature: 0.0 + max_output_tokens: 2048 + system_prompt: prompts/roles/action.md + response_schema: duck_core/schemas/action_directive.schema.json + + summary: + provider: llama_server + base_url: http://127.0.0.1:8081/v1 + model: local-main + purpose: context_summary + structured_output: false + temperature: 0.1 + max_output_tokens: 4096 + system_prompt: prompts/roles/summary.md diff --git a/config/permissions.json b/config/permissions.json deleted file mode 100644 index cca6b1b..0000000 --- a/config/permissions.json +++ /dev/null @@ -1,94 +0,0 @@ -{ - "description": "Permission-first model configuration", - "settings": { - "allow_caching": true, - "cache_file": "data/runtime/allowed_commands.json", - "normalize_commands": true, - "split_chained": true - }, - "command_categories": { - "hard_stop": { - "description": "Commands that are never executed - hard stop", - "allow_once": false, - "allow_always": false, - "commands": [ - "rm -rf /", - "rm -rf /*", - "dd if=/dev/zero of=/dev/sd*", - "dd if=/dev/zero of=/dev/hd*", - "mkfs", - "> /dev/sd*", - "> /dev/hd*" - ] - }, - "no_always": { - "description": "Dangerous commands - allow once only", - "allow_once": true, - "allow_always": false, - "commands": [ - "rm -rf *", - "rm -rf .*", - "curl |", - "wget -O- |", - ":(){:|:&};:", - "fork", - "chmod -R 000", - "chmod -R 777", - "chown -R", - "apt", - "apt-get", - "dpkg", - "yum", - "dnf", - "pacman", - "shutdown", - "reboot", - "halt", - "init 0", - "init 6", - "telinit", - "systemctl stop", - "systemctl start", - "systemctl restart", - "service stop", - "service start", - "kill -9 -1", - "killall", - "pkill -9", - "reboot -f", - "shutdown -h now", - "poweroff", - "echo .* > /proc/", - "echo .* > /sys/" - ] - }, - "normal": { - "description": "Normal commands - allow once or always", - "allow_once": true, - "allow_always": true, - "commands": [ - "shell_exec", - "file_write" - ], - "file_extensions": [ - ".py", - ".txt", - ".json", - ".md", - ".yaml", - ".yml", - ".sh", - ".bash" - ] - } - }, - "path_settings": { - "allow_read_outside": true, - "allow_write_paths": [ - "/home/mirivlad/git/ducklm", - "/tmp" - ], - "require_confirmation_for_write": true, - "require_confirmation_for_shell": true - } -} \ No newline at end of file diff --git a/config/prompts.json b/config/prompts.json deleted file mode 100644 index 4b39235..0000000 --- a/config/prompts.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "thinker": "You are the orchestrator of a local AI agent runtime. Your job is to analyze the user's task and decide how to execute it.\n\n## Decision Types\n\n1. **Direct response** — for simple questions, greetings, conversations:\n {\"type\": \"respond\", \"payload\": {\"text\": \"your answer\"}}\n\n2. **Single tool step** — for simple tasks needing one tool:\n {\"type\": \"step\", \"payload\": {\"tool\": \"shell_exec\", \"args\": {\"command\": \"...\"}}}\n\n3. **Multi-step plan** — for complex tasks that need decomposition:\n {\"type\": \"plan\", \"payload\": {\"steps\": [\n {\"id\": \"step-1\", \"tool\": \"shell_exec\", \"args\": {\"command\": \"...\"}, \"description\": \"...\", \"depends_on\": []},\n {\"id\": \"step-2\", \"tool\": \"file_read\", \"args\": {\"path\": \"...\"}, \"description\": \"...\", \"depends_on\": [\"step-1\"]}\n ]}}\n\n## When to use multi-step plan\n- Task requires multiple operations (search → read → write)\n- Task involves checking prerequisites before acting\n- Task requires gathering information before producing result\n- User asks to do something complex (setup, configure, analyze)\n\n## Memory\n- If memory recall results are provided, USE them to inform your decisions\n- If you know something from memory, mention it in step descriptions\n- Store important results for future use\n\n## Rules\n- ALWAYS respond with valid JSON only\n- Each step MUST have a unique id\n- Use depends_on for ordering constraints\n- Keep steps focused — one action per step\n- If unsure, start with an information-gathering step\n- Respond ONLY with valid JSON, no explanations", - - "orchestrator": "You are an expert orchestrator for a local AI agent system. Your role is to analyze the user's task and generate executable runtime steps.\n\nTool selection (choose the right tool):\n- shell_exec: for running commands, checking programs exist ('which', '--version'), searching files\n- file_read: for reading contents of a file (must be existing file path)\n- file_write: for creating or updating files\n- memory: for storing or searching memory\n\nSTRICT OUTPUT FORMAT - MUST follow exactly:\n\nSingle step:\n{\"type\": \"step\", \"payload\": {\"tool\": \"shell_exec\", \"args\": {\"command\": \"...\"}}}\n{\"type\": \"step\", \"payload\": {\"tool\": \"file_read\", \"args\": {\"path\": \"...\"}}}\n{\"type\": \"step\", \"payload\": {\"tool\": \"file_write\", \"args\": {\"path\": \"...\", \"content\": \"...\"}}}\n\nMulti-step plan:\n{\"type\": \"plan\", \"payload\": {\"steps\": [{\"tool\": \"file_read\", \"args\": {\"path\": \"...\"}, \"description\": \"...\", \"depends_on\": []}]}}\n\nDirect response:\n{\"type\": \"respond\", \"payload\": {\"text\": \"...\"}}\n\nIMPORTANT:\n- Use exactly {\"type\": \"step|plan|respond\", \"payload\": {...}} format\n- Do NOT output array alone\n- Do NOT use \"kind\" - use \"type\"\n- Respond ONLY with valid JSON\n- Your response MUST be complete valid JSON - the closing brace } MUST be present\n- Do NOT truncate your response - if you cannot fit all steps, use a single step\n\nTool selection:\n- For checking if a program/command exists: use shell_exec with 'which ' or ' --version'\n- For reading file contents: use file_read with path to file (NOT command)\n- For executing any command: use shell_exec\n- Previous experience (from memory) may help - consider it but YOU decide how to proceed", - - "planning": "You are a planning specialist. Generate execution plans.\n\nOutput MUST be:\n{\"type\": \"plan\", \"version\": \"1.0\", \"payload\": {\"steps\": [{\"tool\": \"\", \"args\": {}, \"description\": \"...\", \"depends_on\": []}]}}\n\nRules:\n- Each step must have unique id (auto-generated)\n- Use \"depends_on\" for step ordering\n- Use \"tool\" for tool operations\n- Respond ONLY with valid JSON", - - "coder": "You are an expert code generation model.\n\nOutput format:\n{\"type\": \"code\", \"payload\": {\"language\": \"python\", \"content\": \"...\"}}\n\nOR for completion:\n{\"type\": \"respond\", \"payload\": {\"text\": \"...\"}}\n\nGenerate clean, working code. Respond ONLY with valid JSON.", - - "critic": "You are a critic model. Evaluate tool execution results.\n\nScoring criteria:\n- correctness: 0-1 (does result accomplish task?)\n- usefulness: 0-1 (is result useful?)\n- safety: 0-1 (is result safe?)\n- suggest_memory: boolean (should this be stored in memory?)\n- weight: 0-1 (importance score)\n- explanation: brief reasoning\n\nOutput format:\n{\"type\": \"evaluation\", \"payload\": {\"correctness\": 0.0-1.0, \"usefulness\": 0.0-1.0, \"safety\": 0.0-1.0, \"suggest_memory\": true|false, \"weight\": 0.0-1.0, \"explanation\": \"...\"}}\n\nRespond ONLY with valid JSON.", - - "system": "You are ducklm, a local AI agent runtime.\n\nSTRICT RULES:\n- You MUST strictly follow execution schemas\n- You are NOT allowed to output free-form text\n- All outputs MUST be valid JSON matching runtime contracts\n- Use exact tool names from available tool set\n\nCurrent capabilities:\n- Execute shell commands (shell_exec)\n- Read/write files (file_read, file_write)\n- Memory operations (memory)\n\nAlways respond with valid JSON.", - - "sys_util": "You are a STRICT JSON repair engine inside a production AI runtime.\nYour job is ONLY to fix invalid JSON syntax.\nYou are NOT allowed to:\n- change meaning of data\n- add new fields\n- remove valid fields\n- interpret intent\n- explain anything\n- reformat structure logically\n---\nINPUT:\nYou receive a malformed or invalid JSON string.\n---\nOUTPUT RULES:\n- Output ONLY valid JSON\n- No markdown\n- No comments\n- No explanations\n- No extra text\n---\nREPAIR RULES (STRICT):\nFix ONLY syntax issues:\n- missing or extra commas\n- missing quotes\n- incorrect brackets\n- trailing commas\n- invalid escaping\n- broken strings\n- unbalanced braces\nDO NOT:\n- rename keys\n- reorder fields intentionally\n- guess missing semantic data\n- \"improve\" structure\n---\nIMPORTANT:\nIf multiple valid repairs exist:\n→ choose the minimal change that makes JSON valid\n---\nOUTPUT MUST BE VALID JSON OR NOTHING ELSE\nInvalid JSON:" -} diff --git a/config/prompts/coder.md b/config/prompts/coder.md deleted file mode 100644 index 4dde8b6..0000000 --- a/config/prompts/coder.md +++ /dev/null @@ -1,9 +0,0 @@ -You are an expert code generation model. - -Output format: -{"type": "code", "payload": {"language": "python", "content": "..."}} - -OR for completion: -{"type": "respond", "payload": {"text": "..."}} - -Generate clean, working code. Respond ONLY with valid JSON. \ No newline at end of file diff --git a/config/prompts/critic.md b/config/prompts/critic.md deleted file mode 100644 index f3b986e..0000000 --- a/config/prompts/critic.md +++ /dev/null @@ -1,14 +0,0 @@ -You are a critic model. Evaluate tool execution results. - -Scoring criteria: -- correctness: 0-1 (does result accomplish task?) -- usefulness: 0-1 (is result useful?) -- safety: 0-1 (is result safe?) -- suggest_memory: boolean (should this be stored in memory?) -- weight: 0-1 (importance score) -- explanation: brief reasoning - -Output format: -{"type": "evaluation", "payload": {"correctness": 0.0-1.0, "usefulness": 0.0-1.0, "safety": 0.0-1.0, "suggest_memory": true|false, "weight": 0.0-1.0, "explanation": "..."}} - -Respond ONLY with valid JSON. \ No newline at end of file diff --git a/config/prompts/json_compiler.md b/config/prompts/json_compiler.md deleted file mode 100644 index b944a93..0000000 --- a/config/prompts/json_compiler.md +++ /dev/null @@ -1,25 +0,0 @@ -You are a JSON Compiler. Convert semantic plan to strict JSON. - -INPUT: Semantic plan from Thinker -OUTPUT: Valid JSON only - -RULES: -- Convert ONLY, do not make decisions -- Do not invent tools -- Do not modify plan logic -- Do not skip steps -- Output ONLY valid JSON - -AVAILABLE TOOLS: -- file_write (requires permission) -- shell_exec (execute shell commands, requires permission) -- memory (no permission needed) -- file_read (no permission needed) -- respond (just return text to user, no execution) - -IMPORTANT: Use exactly "shell_exec" (not "shell") for shell commands! - -OUTPUT FORMAT: -{"type": "plan", "payload": {"steps": [{"id": "1", "tool": "shell_exec", "args": {"command": "..."}, "depends_on": []}]}} -OR -{"type": "respond", "payload": {"text": "..."}} diff --git a/config/prompts/orchestrator.md b/config/prompts/orchestrator.md deleted file mode 100644 index e8eeb95..0000000 --- a/config/prompts/orchestrator.md +++ /dev/null @@ -1,34 +0,0 @@ -You are an expert orchestrator for a local AI agent system. Your role is to analyze the user's task and generate executable runtime steps. - -Tool selection (choose the right tool): -- shell_exec: for running commands, checking programs exist ('which', '--version'), searching files -- file_read: for reading contents of a file (must be existing file path) -- file_write: for creating or updating files -- memory: for storing or searching memory - -STRICT OUTPUT FORMAT - MUST follow exactly: - -Single step: -{"type": "step", "payload": {"tool": "shell_exec", "args": {"command": "..."}}} -{"type": "step", "payload": {"tool": "file_read", "args": {"path": "..."}}} -{"type": "step", "payload": {"tool": "file_write", "args": {"path": "...", "content": "..."}}} - -Multi-step plan: -{"type": "plan", "payload": {"steps": [{"tool": "file_read", "args": {"path": "..."}, "description": "...", "depends_on": []}]}} - -Direct response: -{"type": "respond", "payload": {"text": "..."}} - -IMPORTANT: -- Use exactly {"type": "step|plan|respond", "payload": {...}} format -- Do NOT output array alone -- Do NOT use "kind" - use "type" -- Respond ONLY with valid JSON -- Your response MUST be complete valid JSON - the closing brace } MUST be present -- Do NOT truncate your response - if you cannot fit all steps, use a single step - -Tool selection: -- For checking if a program/command exists: use shell_exec with 'which ' or ' --version' -- For reading file contents: use file_read with path to file (NOT command) -- For executing any command: use shell_exec -- Previous experience (from memory) may help - consider it but YOU decide how to proceed \ No newline at end of file diff --git a/config/prompts/planning.md b/config/prompts/planning.md deleted file mode 100644 index 186cdf6..0000000 --- a/config/prompts/planning.md +++ /dev/null @@ -1,10 +0,0 @@ -You are a planning specialist. Generate execution plans. - -Output MUST be: -{"type": "plan", "version": "1.0", "payload": {"steps": [{"tool": "", "args": {}, "description": "...", "depends_on": []}]}} - -Rules: -- Each step must have unique id (auto-generated) -- Use "depends_on" for step ordering -- Use "tool" for tool operations -- Respond ONLY with valid JSON \ No newline at end of file diff --git a/config/prompts/sys_util.md b/config/prompts/sys_util.md deleted file mode 100644 index daeefb2..0000000 --- a/config/prompts/sys_util.md +++ /dev/null @@ -1,41 +0,0 @@ -You are a STRICT JSON repair engine inside a production AI runtime. -Your job is ONLY to fix invalid JSON syntax. -You are NOT allowed to: -- change meaning of data -- add new fields -- remove valid fields -- interpret intent -- explain anything -- reformat structure logically ---- -INPUT: -You receive a malformed or invalid JSON string. ---- -OUTPUT RULES: -- Output ONLY valid JSON -- No markdown -- No comments -- No explanations -- No extra text ---- -REPAIR RULES (STRICT): -Fix ONLY syntax issues: -- missing or extra commas -- missing quotes -- incorrect brackets -- trailing commas -- invalid escaping -- broken strings -- unbalanced braces -DO NOT: -- rename keys -- reorder fields intentionally -- guess missing semantic data -- "improve" structure ---- -IMPORTANT: -If multiple valid repairs exist: -→ choose the minimal change that makes JSON valid ---- -OUTPUT MUST BE VALID JSON OR NOTHING ELSE -Invalid JSON: \ No newline at end of file diff --git a/config/prompts/system.md b/config/prompts/system.md deleted file mode 100644 index 6e5c00f..0000000 --- a/config/prompts/system.md +++ /dev/null @@ -1,14 +0,0 @@ -You are ducklm, a local AI agent runtime. - -STRICT RULES: -- You MUST strictly follow execution schemas -- You are NOT allowed to output free-form text -- All outputs MUST be valid JSON matching runtime contracts -- Use exact tool names from available tool set - -Current capabilities: -- Execute shell commands (shell_exec) -- Read/write files (file_read, file_write) -- Memory operations (memory) - -Always respond with valid JSON. \ No newline at end of file diff --git a/config/prompts/thinker.md b/config/prompts/thinker.md deleted file mode 100644 index 679d89d..0000000 --- a/config/prompts/thinker.md +++ /dev/null @@ -1,36 +0,0 @@ -You are a Thinker. Analyze user task and create execution plan. - -CONTEXT: -{task_summary} -{memory_context} - -AVAILABLE TOOLS (injected at runtime): -{tools_json} - -INSTRUCTIONS: -1. Understand what user wants -2. Create step-by-step plan in natural language -3. Choose appropriate tools from available -4. If the user asks about the current local machine, filesystem, processes, - packages, logs, runtime state, or anything that must be observed rather than - answered from general knowledge, use an appropriate tool. -5. For exploratory tasks, prefer one robust inspection command over many brittle - dependent checks. Missing optional files should be treated as information, not - as a fatal failure. - -MODE: {mode_hint} -- If mode is "execution": create a plan with TOOL STEPS (shell_exec, file_write, etc) -- If mode is "conversation": just respond with text, NO tool execution -- If mode is "clarification_needed": ask user for clarification - -OUTPUT FORMAT (SEMANTIC PLAN - NOT JSON): -For execution mode: -ПЛАН: -Шаг 1: [use tool - e.g., shell_exec] -Шаг 2: [use tool] - -For conversation mode: -ОТВЕТ: [just text, no tools needed] - -For clarification: -ОТВЕТ: [вопрос пользователю для уточнения] diff --git a/config/runtime.json b/config/runtime.json deleted file mode 100644 index 1cf4efb..0000000 --- a/config/runtime.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "step_timeout_ms": 30000, - "task_timeout_ms": 300000, - "shell_command_timeout_ms": 3600000, - "shell_idle_timeout_ms": 600000, - "planner_retry_limit": 2, - "tool_retry_limit": 1, - "replan_limit": 1, - "max_execution_steps": 20, - "retrieval_top_k": 5, - "max_context_tokens": 8192, - "context_budgets": { - "system": 512, - "task": 512, - "memory": 2048, - "execution": 2048, - "tools": 1024, - "safety": 512 - }, - "reserve_for_generation_pct": 25, - "orchestrator_retry_limit": 2, - "memory_thresholds": { - "default_store_weight": 0.8 - }, - "critic_fallback_policy": "continue_without_critic", - "checkpoint_policy": { - "save_on_transition": true - }, - "event_retention_policy": { - "keep_all": true - }, - "streaming_settings": { - "enabled": true - }, - "debug": true, - "debug_orchestrator_log_length": 500, - "json_fix_retry_limit": 2, - "json_fix_use_sys_util": true, - "intent_classifier": "thinker", - "recall_model": "json_compiler", - "critic_retry_limit": 2 -} diff --git a/docker-compose.memory.yml b/docker-compose.memory.yml new file mode 100644 index 0000000..0040f47 --- /dev/null +++ b/docker-compose.memory.yml @@ -0,0 +1,11 @@ +services: + qdrant: + image: qdrant/qdrant:latest + ports: + - "6333:6333" + - "6334:6334" + volumes: + - qdrant_storage:/qdrant/storage + +volumes: + qdrant_storage: diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..1862a09 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,5 @@ +# Architecture + +DuckLM is organized as WebChat and FastAPI over Duck Core. Duck Core owns task state, context building, model calls, events, tools, approvals, skills, experience, and memory adapters. + +The first vertical slice is WebChat -> FastAPI -> RuntimeLoop -> ModelClient -> llama-server -> SQLite event timeline. diff --git a/docs/experience_learning.md b/docs/experience_learning.md new file mode 100644 index 0000000..a5ad5a4 --- /dev/null +++ b/docs/experience_learning.md @@ -0,0 +1,9 @@ +# Experience Learning + +Experience records are stored in SQLite. Suggested skill updates are written to `skills/_proposals/` and are not applied automatically. + +Use: + +```bash +curl http://127.0.0.1:8000/v1/experience +``` diff --git a/docs/how_to_run.md b/docs/how_to_run.md new file mode 100644 index 0000000..b185a8f --- /dev/null +++ b/docs/how_to_run.md @@ -0,0 +1,71 @@ +# How To Run + +1. Install dependencies: + +```bash +python3 -m venv .venv +. .venv/bin/activate +python -m pip install -e ".[dev]" +``` + +2. Configure: + +```bash +cp .env.example .env +``` + +The default `DUCK_MAIN_MODEL_PATH` points to `./models/Qwen3.6/nonMTP/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf`. + +3. Start `llama-server`: + +```bash +bash scripts/llama/start_main.sh start +``` + +Useful process commands: + +```bash +bash scripts/llama/start_main.sh status +bash scripts/llama/start_main.sh logs --follow +bash scripts/llama/start_main.sh restart +bash scripts/llama/start_main.sh stop +``` + +4. Start DuckLM API: + +```bash +python -m duck_core.api +``` + +5. Open WebChat: + +```text +http://127.0.0.1:8000/ +``` + +6. Send a task: + +```bash +curl -X POST http://127.0.0.1:8000/v1/chat \ + -H "Content-Type: application/json" \ + -d '{"message":"Скажи коротко, что ты DuckLM","workspace":"./workspace","debug":true}' +``` + +7. Inspect events: + +```bash +curl http://127.0.0.1:8000/v1/tasks//events +``` + +8. Approvals: + +```bash +curl http://127.0.0.1:8000/v1/approvals/pending +``` + +9. Stop services: + +```bash +bash scripts/llama/start_main.sh stop +docker compose -f docker-compose.memory.yml down +``` diff --git a/docs/how_to_test.md b/docs/how_to_test.md new file mode 100644 index 0000000..7f86e0e --- /dev/null +++ b/docs/how_to_test.md @@ -0,0 +1,15 @@ +# How To Test + +Run smoke tests: + +```bash +python -m pytest tests/smoke -v +``` + +Run verification scripts against a running API: + +```bash +bash scripts/verify/verify_basic_chat.sh +bash scripts/verify/verify_models_roles.sh +bash scripts/verify/verify_tool_blocking.sh +``` diff --git a/docs/local_llama_server.md b/docs/local_llama_server.md new file mode 100644 index 0000000..8b68e44 --- /dev/null +++ b/docs/local_llama_server.md @@ -0,0 +1,44 @@ +# Local Llama Server + +DuckLM expects an OpenAI-compatible `llama-server` at `http://127.0.0.1:8081/v1` by default. + +On the current Radeon RX580 system, `llama.cpp` is built locally with Vulkan: + +```bash +bash scripts/llama/build_vulkan.sh +``` + +The main model is Qwen3.6 35B A3B nonMTP: + +```text +models/Qwen3.6/nonMTP/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf +``` + +Start it in the background with: + +```bash +bash scripts/llama/start_main.sh start +``` + +Manage the process: + +```bash +bash scripts/llama/start_main.sh status +bash scripts/llama/start_main.sh logs +bash scripts/llama/start_main.sh logs --follow +bash scripts/llama/start_main.sh restart +bash scripts/llama/start_main.sh stop +``` + +The local `.env` uses: + +```env +DUCK_LLAMA_SERVER_BIN=./vendor/llama.cpp/build/bin/llama-server +DUCK_CTX_SIZE=4096 +DUCK_N_GPU_LAYERS=20 +DUCK_PARALLEL=1 +DUCK_LLAMA_DEVICE=Vulkan0 +DUCK_LLAMA_EXTRA_ARGS="--reasoning off --cache-ram 0" +``` + +MTP is available only through `scripts/llama/start_thinker_mtp_experimental.sh` and is not used by the action JSON endpoint by default. diff --git a/docs/memory_architecture.md b/docs/memory_architecture.md new file mode 100644 index 0000000..4f40f7f --- /dev/null +++ b/docs/memory_architecture.md @@ -0,0 +1,5 @@ +# Memory Architecture + +Semantic memory uses Qdrant as the vector store. Embeddings come from `/v1/embeddings` when the model backend supports it. + +If embeddings are unavailable, `VectorMemory` fails explicitly with `EmbeddingsUnavailableError`; it does not invent a local embedding algorithm. diff --git a/docs/model_roles.md b/docs/model_roles.md new file mode 100644 index 0000000..be84223 --- /dev/null +++ b/docs/model_roles.md @@ -0,0 +1,7 @@ +# Model Roles + +Roles are logical, not physical. `thinker`, `critic`, `coder`, `action`, and `summary` may all point to the same model. + +Each role can differ by prompt, temperature, output limit, response format, schema, memory scope, and endpoint. Request-level parameters can change per call. Backend-level parameters such as GGUF path, context size, GPU offload, MTP, and server port require the backend to be started with the desired settings. + +See `config/models.yaml` for one model mapped to all roles. diff --git a/docs/performance_mtp.md b/docs/performance_mtp.md new file mode 100644 index 0000000..49e2c26 --- /dev/null +++ b/docs/performance_mtp.md @@ -0,0 +1,5 @@ +# Performance And MTP + +MTP/speculative decoding is an inference backend concern. DuckLM keeps action JSON on the normal endpoint by default. + +Use `scripts/llama/start_thinker_mtp_experimental.sh` only for experiments. Benchmark scaffolding is in `scripts/bench/bench_runtime.py`. diff --git a/docs/plans/ui-bootstrap-review-plan.md b/docs/plans/ui-bootstrap-review-plan.md deleted file mode 100644 index 7b58c27..0000000 --- a/docs/plans/ui-bootstrap-review-plan.md +++ /dev/null @@ -1,24 +0,0 @@ -# UI Bootstrap And Review Flow Plan - -## Goal - -Move the web chat UI to Bootstrap 5.3 with Bootswatch themes and improve review/password/terminal-output ergonomics. - -## Required Changes - -- Replace the current hand-written visual system in `app/api/static/index.html` with Bootstrap 5.3 layout/components. -- Add Bootswatch theme support with a visible theme selector and persistent localStorage choice. -- Password/secret input must submit on Enter as well as the "Отправить" button. -- Console/tool output must render inside a collapsed Bootstrap accordion item. -- The accordion body must contain terminal-style output inside `
`.
-- The terminal accordion must expand only when the user clicks it.
-- Review UI must show critic/system assessment and user voting buttons:
-  - `Ошибочное действие`
-  - `Всё верно`
-  - optional correction/comment text.
-
-## Notes
-
-- Keep runtime event handling WebSocket-driven.
-- Do not mix console output with assistant prose.
-- Keep raw tool output available for debugging, but collapsed by default.
diff --git a/docs/skills.md b/docs/skills.md
new file mode 100644
index 0000000..d2275e9
--- /dev/null
+++ b/docs/skills.md
@@ -0,0 +1,9 @@
+# Skills
+
+Skills are procedural memory, not hardcoded routing. The first skill is `analyze_project`, loaded from `skills/analyze_project/skill.yaml`.
+
+Use:
+
+```bash
+curl http://127.0.0.1:8000/v1/skills
+```
diff --git a/docs/superpowers/plans/2026-05-19-ducklm-runtime.md b/docs/superpowers/plans/2026-05-19-ducklm-runtime.md
new file mode 100644
index 0000000..97a10cd
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-19-ducklm-runtime.md
@@ -0,0 +1,83 @@
+# DuckLM Runtime Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Build the first runnable DuckLM local cognitive runtime from `Ducklm.md`.
+
+**Architecture:** FastAPI exposes WebChat and HTTP endpoints, RuntimeLoop coordinates tasks, ModelClient calls an OpenAI-compatible `llama-server`, and SQLite persists tasks/events/approvals/experience. Tools, skills, and memory are small adapters with clear boundaries so later stages can grow without turning the runtime into hardcoded workflow rules.
+
+**Tech Stack:** Python 3.11+, FastAPI, httpx, aiosqlite, Pydantic, Jinja2, PyYAML, jsonschema, Qdrant client.
+
+---
+
+### Task 1: Tests First
+
+**Files:**
+- Create: `tests/smoke/test_models_config.py`
+- Create: `tests/smoke/test_model_client.py`
+- Create: `tests/smoke/test_api_health.py`
+- Create: `tests/smoke/test_event_log.py`
+- Create: `tests/smoke/test_action_directive_schema.py`
+- Create: `tests/smoke/test_tool_gateway.py`
+- Create: `tests/smoke/test_approvals.py`
+- Create: `tests/smoke/test_skill_registry.py`
+- Create: `tests/smoke/test_experience_recorder.py`
+- Create: `tests/smoke/test_vector_memory.py`
+
+- [ ] Write smoke tests for config, model role mapping, stores, tools, approvals, skills, experience, memory, and API health.
+- [ ] Run `python -m pytest tests/smoke -v` and verify tests fail because implementation modules do not exist.
+
+### Task 2: Runtime Core
+
+**Files:**
+- Create: `pyproject.toml`
+- Create: `.env.example`
+- Create: `config/models.yaml`
+- Create: `duck_core/config.py`
+- Create: `duck_core/model_client.py`
+- Create: `duck_core/events/store.py`
+- Create: `duck_core/tasks/store.py`
+- Create: `duck_core/tasks/state.py`
+- Create: `duck_core/context_builder.py`
+- Create: `duck_core/runtime_loop.py`
+- Create: `duck_core/api.py`
+
+- [ ] Implement settings and model config loading.
+- [ ] Implement ModelClient role-based chat calls with latency and usage capture.
+- [ ] Implement SQLite task and event stores.
+- [ ] Implement RuntimeLoop for `POST /v1/chat`.
+- [ ] Implement FastAPI endpoints and WebChat rendering.
+
+### Task 3: Stage Adapters
+
+**Files:**
+- Create: `duck_core/tools/*`
+- Create: `duck_core/approvals/service.py`
+- Create: `duck_core/skills/registry.py`
+- Create: `duck_core/experience/recorder.py`
+- Create: `duck_core/reflection.py`
+- Create: `duck_core/memory/*`
+- Create: `duck_core/schemas/action_directive.schema.json`
+
+- [ ] Implement safe file read/write and allowlisted shell execution.
+- [ ] Implement exact-action approval records.
+- [ ] Implement filesystem SkillRegistry.
+- [ ] Implement experience recording and skill proposal writing.
+- [ ] Implement Qdrant memory adapter with explicit embedding-disabled errors.
+
+### Task 4: Project Surface
+
+**Files:**
+- Create: `scripts/llama/*`
+- Create: `scripts/verify/*`
+- Create: `scripts/bench/bench_runtime.py`
+- Create: `duck_core/web/templates/*`
+- Create: `duck_core/web/static/*`
+- Create: `skills/analyze_project/*`
+- Create: `docker-compose.memory.yml`
+- Create: `Makefile`
+- Create: `README.md`
+- Create: `docs/*.md`
+
+- [ ] Add llama-server scripts, verification scripts, benchmark, WebChat pages, starter skill, compose file, make targets, and docs.
+- [ ] Run smoke tests and syntax checks.
diff --git a/docs/tool_gateway.md b/docs/tool_gateway.md
new file mode 100644
index 0000000..e960d81
--- /dev/null
+++ b/docs/tool_gateway.md
@@ -0,0 +1,9 @@
+# Tool Gateway
+
+The model does not execute tools directly. It emits an action directive and `ToolGateway` validates the tool name and arguments before execution.
+
+Implemented tools:
+
+- `file_read`: reads inside workspace only.
+- `file_write`: writes inside workspace only and refuses overwrites unless `overwrite=true`.
+- `shell_exec_safe`: runs only allowlisted commands and blocks dangerous commands.
diff --git a/docs/web_api.md b/docs/web_api.md
new file mode 100644
index 0000000..edd83a5
--- /dev/null
+++ b/docs/web_api.md
@@ -0,0 +1,25 @@
+# Web API
+
+Endpoints:
+
+```text
+GET  /health
+GET  /v1/status
+GET  /v1/models/roles
+GET  /v1/models/ping
+POST /v1/chat
+POST /v1/tasks
+GET  /v1/tasks
+GET  /v1/tasks/{task_id}
+GET  /v1/tasks/{task_id}/events
+GET  /v1/tasks/{task_id}/stream
+GET  /v1/approvals/pending
+POST /v1/approvals/{approval_id}/allow_once
+POST /v1/approvals/{approval_id}/allow_forever
+POST /v1/approvals/{approval_id}/deny
+GET  /v1/skills
+GET  /v1/skills/{skill_id}
+GET  /v1/experience
+GET  /v1/experience/{id}
+GET  /v1/memory/search?q=...
+```
diff --git a/duck_core/__init__.py b/duck_core/__init__.py
new file mode 100644
index 0000000..a05eb9a
--- /dev/null
+++ b/duck_core/__init__.py
@@ -0,0 +1,3 @@
+__all__ = ["__version__"]
+
+__version__ = "0.1.0"
diff --git a/duck_core/api.py b/duck_core/api.py
new file mode 100644
index 0000000..1abad91
--- /dev/null
+++ b/duck_core/api.py
@@ -0,0 +1,348 @@
+import asyncio
+import json
+import logging
+from pathlib import Path
+from typing import Any
+
+import uvicorn
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.responses import HTMLResponse, StreamingResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from pydantic import BaseModel
+
+from duck_core.approvals.service import ApprovalService
+from duck_core.config import get_settings
+from duck_core.events.store import EventStore
+from duck_core.experience.recorder import ExperienceRecorder
+from duck_core.memory.vector_memory import EmbeddingsUnavailableError, VectorMemory
+from duck_core.model_client import ModelClient
+from duck_core.runtime_loop import RuntimeLoop
+from duck_core.skills.registry import SkillRegistry
+from duck_core.tasks.store import TaskStore
+
+logger = logging.getLogger(__name__)
+
+
+class ChatRequest(BaseModel):
+    message: str
+    workspace: str | None = None
+    debug: bool = False
+
+
+def create_app() -> FastAPI:
+    settings = get_settings()
+    if settings.api_host == "0.0.0.0":
+        logger.warning(
+            "DuckLM API is listening on 0.0.0.0. This may expose local tool execution endpoints."
+        )
+    Path(settings.workspace).mkdir(parents=True, exist_ok=True)
+    Path(settings.db_path).parent.mkdir(parents=True, exist_ok=True)
+
+    app = FastAPI(title="DuckLM", version="0.1.0")
+    templates = Jinja2Templates(directory="duck_core/web/templates")
+    app.mount("/static", StaticFiles(directory="duck_core/web/static"), name="static")
+
+    task_store = TaskStore(settings.db_path)
+    event_store = EventStore(settings.db_path)
+    model_client = ModelClient()
+    approvals = ApprovalService(settings.db_path)
+    runtime = RuntimeLoop(task_store, event_store, model_client, approval_service=approvals)
+    skills = SkillRegistry("skills")
+    experience = ExperienceRecorder(settings.db_path)
+    memory = VectorMemory(settings.qdrant_url, embeddings_base_url=None)
+
+    @app.on_event("startup")
+    async def startup() -> None:
+        await task_store.init()
+        await event_store.init()
+        await approvals.init()
+        await experience.init()
+
+    @app.get("/", response_class=HTMLResponse)
+    async def index(request: Request) -> HTMLResponse:
+        return templates.TemplateResponse(request, "index.html")
+
+    @app.get("/approvals", response_class=HTMLResponse)
+    async def approvals_page(request: Request) -> HTMLResponse:
+        return templates.TemplateResponse(request, "approvals.html")
+
+    @app.get("/skills", response_class=HTMLResponse)
+    async def skills_page(request: Request) -> HTMLResponse:
+        return templates.TemplateResponse(request, "skills.html")
+
+    @app.get("/memory", response_class=HTMLResponse)
+    async def memory_page(request: Request) -> HTMLResponse:
+        return templates.TemplateResponse(request, "memory.html")
+
+    @app.get("/experience", response_class=HTMLResponse)
+    async def experience_page(request: Request) -> HTMLResponse:
+        return templates.TemplateResponse(request, "experience.html")
+
+    @app.get("/health")
+    async def health() -> dict[str, str]:
+        return {"status": "ok"}
+
+    @app.get("/v1/status")
+    async def status() -> dict[str, Any]:
+        return {
+            "name": "DuckLM",
+            "version": "0.1.0",
+            "api_host": settings.api_host,
+            "api_port": settings.api_port,
+            "workspace": settings.workspace,
+            "db_path": settings.db_path,
+        }
+
+    @app.get("/v1/models/roles")
+    async def roles() -> dict[str, Any]:
+        return model_client.list_roles()
+
+    @app.get("/v1/models/ping")
+    async def models_ping() -> dict[str, Any]:
+        return await model_client.ping()
+
+    @app.post("/v1/chat")
+    async def chat(body: ChatRequest) -> dict[str, Any]:
+        result = await runtime.run_chat(body.message, body.workspace or settings.workspace, body.debug)
+        return result.__dict__
+
+    def sse(event: str, payload: dict[str, Any]) -> str:
+        return f"event: {event}\ndata: {json.dumps(payload, ensure_ascii=False)}\n\n"
+
+    async def emit_tool_events(task_id: str, after_sequence: int):
+        events = await event_store.list_events(task_id)
+        visible_types = {
+            "tool_call_started",
+            "tool_call_finished",
+            "tool_approval_requested",
+        }
+        for event in events:
+            if event.sequence > after_sequence and event.event_type in visible_types:
+                yield sse(event.event_type, event.model_dump())
+
+    @app.post("/v1/chat/stream")
+    async def chat_stream(body: ChatRequest) -> StreamingResponse:
+        async def generator():
+            task = await task_store.create_task(
+                body.message, body.workspace or settings.workspace, body.debug
+            )
+            task_event = await event_store.append(
+                task.task_id,
+                "task_created",
+                {
+                    "message": body.message,
+                    "workspace": body.workspace or settings.workspace,
+                    "debug": body.debug,
+                },
+            )
+            yield sse("task_created", task_event.model_dump())
+
+            reasoning_parts: list[str] = []
+            content_parts: list[str] = []
+            try:
+                messages = runtime.context_builder.build_basic_messages(task)
+                tool_observations = await runtime._run_action_tools(
+                    task.task_id, messages, body.workspace or settings.workspace
+                )
+                async for tool_event in emit_tool_events(task.task_id, task_event.sequence):
+                    yield tool_event
+                if any(observation.get("requires_approval") for observation in tool_observations):
+                    await task_store.waiting_for_approval(task.task_id)
+                    await event_store.append(
+                        task.task_id,
+                        "task_waiting_for_approval",
+                        {"observations": tool_observations},
+                    )
+                    yield sse(
+                        "done",
+                        {
+                            "task_id": task.task_id,
+                            "status": "waiting_for_approval",
+                            "final_response": "Waiting for approval.",
+                            "reasoning_content": None,
+                        },
+                    )
+                    return
+                if tool_observations:
+                    messages = [
+                        *messages,
+                        {
+                            "role": "user",
+                            "content": "tool_observations:\n"
+                            + json.dumps(tool_observations, ensure_ascii=False, indent=2),
+                        },
+                    ]
+                await event_store.append(task.task_id, "model_call_started", {"role": "thinker"})
+                async for chunk in model_client.stream_chat("thinker", messages):
+                    delta = str(chunk.get("delta") or "")
+                    if chunk.get("type") == "reasoning_delta":
+                        reasoning_parts.append(delta)
+                        yield sse(
+                            "reasoning_delta",
+                            {"task_id": task.task_id, "delta": delta},
+                        )
+                    elif chunk.get("type") == "content_delta":
+                        content_parts.append(delta)
+                        yield sse(
+                            "content_delta",
+                            {"task_id": task.task_id, "delta": delta},
+                        )
+
+                content = "".join(content_parts)
+                reasoning_content = "".join(reasoning_parts) or None
+                await event_store.append(
+                    task.task_id,
+                    "cognition_response",
+                    {
+                        "role": "thinker",
+                        "content": content,
+                        "reasoning_content": reasoning_content,
+                    },
+                )
+                await event_store.append(
+                    task.task_id,
+                    "model_call_finished",
+                    {
+                        "role": "thinker",
+                        "model": model_client.get_role_config("thinker").model,
+                    },
+                )
+                await task_store.complete_task(task.task_id, content)
+                await event_store.append(
+                    task.task_id,
+                    "task_completed",
+                    {
+                        "final_response": content,
+                        "reasoning_content": reasoning_content,
+                    },
+                )
+                yield sse(
+                    "done",
+                    {
+                        "task_id": task.task_id,
+                        "status": "completed",
+                        "final_response": content,
+                        "reasoning_content": reasoning_content,
+                    },
+                )
+            except Exception as exc:
+                await task_store.fail_task(task.task_id, str(exc))
+                await event_store.append(task.task_id, "task_failed", {"error": str(exc)})
+                yield sse(
+                    "error",
+                    {
+                        "task_id": task.task_id,
+                        "status": "failed",
+                        "error": str(exc),
+                    },
+                )
+
+        return StreamingResponse(generator(), media_type="text/event-stream")
+
+    @app.post("/v1/tasks")
+    async def create_task(body: ChatRequest) -> dict[str, Any]:
+        task = await task_store.create_task(body.message, body.workspace or settings.workspace, body.debug)
+        await event_store.append(task.task_id, "task_created", body.model_dump())
+        return task.model_dump()
+
+    @app.get("/v1/tasks")
+    async def list_tasks() -> list[dict[str, Any]]:
+        return [task.model_dump() for task in await task_store.list_tasks()]
+
+    @app.get("/v1/tasks/{task_id}")
+    async def get_task(task_id: str) -> dict[str, Any]:
+        task = await task_store.get_task(task_id)
+        if task is None:
+            raise HTTPException(status_code=404, detail="Task not found")
+        return task.model_dump()
+
+    @app.get("/v1/tasks/{task_id}/events")
+    async def get_events(task_id: str) -> list[dict[str, Any]]:
+        return [event.model_dump() for event in await event_store.list_events(task_id)]
+
+    @app.get("/v1/tasks/{task_id}/stream")
+    async def stream_events(task_id: str) -> StreamingResponse:
+        async def generator():
+            sent = 0
+            for _ in range(30):
+                events = await event_store.list_events(task_id)
+                for event in events[sent:]:
+                    yield f"data: {json.dumps(event.model_dump())}\n\n"
+                sent = len(events)
+                await asyncio.sleep(1)
+
+        return StreamingResponse(generator(), media_type="text/event-stream")
+
+    @app.post("/v1/tasks/{task_id}/continue")
+    async def continue_task(task_id: str) -> dict[str, str]:
+        task = await task_store.get_task(task_id)
+        if task is None:
+            raise HTTPException(status_code=404, detail="Task not found")
+        await task_store.update_status(task_id, "running")
+        await event_store.append(task_id, "task_continued", {})
+        return {"status": "running"}
+
+    @app.post("/v1/tasks/{task_id}/cancel")
+    async def cancel_task(task_id: str) -> dict[str, str]:
+        await task_store.cancel_task(task_id)
+        await event_store.append(task_id, "task_cancelled", {})
+        return {"status": "cancelled"}
+
+    @app.get("/v1/approvals/pending")
+    async def pending_approvals() -> list[dict[str, Any]]:
+        return [approval.model_dump() for approval in await approvals.pending()]
+
+    @app.post("/v1/approvals/{approval_id}/allow_once")
+    async def allow_once(approval_id: str) -> dict[str, str]:
+        await approvals.allow_once(approval_id)
+        return {"status": "allowed_once"}
+
+    @app.post("/v1/approvals/{approval_id}/allow_forever")
+    async def allow_forever(approval_id: str) -> dict[str, str]:
+        await approvals.allow_forever(approval_id)
+        return {"status": "allowed_forever"}
+
+    @app.post("/v1/approvals/{approval_id}/deny")
+    async def deny(approval_id: str) -> dict[str, str]:
+        await approvals.deny(approval_id)
+        return {"status": "denied"}
+
+    @app.get("/v1/skills")
+    async def list_skills() -> list[dict[str, Any]]:
+        return [skill.model_dump() for skill in skills.load_skills()]
+
+    @app.get("/v1/skills/{skill_id}")
+    async def get_skill(skill_id: str) -> dict[str, Any]:
+        skill = skills.get_skill(skill_id)
+        if skill is None:
+            raise HTTPException(status_code=404, detail="Skill not found")
+        return skill.model_dump()
+
+    @app.get("/v1/experience")
+    async def list_experience() -> list[dict[str, Any]]:
+        return [record.model_dump() for record in await experience.list_records()]
+
+    @app.get("/v1/experience/{record_id}")
+    async def get_experience(record_id: int) -> dict[str, Any]:
+        record = await experience.get_record(record_id)
+        if record is None:
+            raise HTTPException(status_code=404, detail="Experience record not found")
+        return record.model_dump()
+
+    @app.get("/v1/memory/search")
+    async def search_memory(q: str) -> dict[str, Any]:
+        try:
+            return {"results": await memory.search_memory(q)}
+        except EmbeddingsUnavailableError as exc:
+            return {"results": [], "warning": str(exc)}
+
+    return app
+
+
+app = create_app()
+
+
+if __name__ == "__main__":
+    settings = get_settings()
+    uvicorn.run("duck_core.api:app", host=settings.api_host, port=settings.api_port, reload=False)
diff --git a/data/.gitkeep b/duck_core/approvals/__init__.py
similarity index 100%
rename from data/.gitkeep
rename to duck_core/approvals/__init__.py
diff --git a/duck_core/approvals/service.py b/duck_core/approvals/service.py
new file mode 100644
index 0000000..f62eecf
--- /dev/null
+++ b/duck_core/approvals/service.py
@@ -0,0 +1,143 @@
+import hashlib
+import json
+from pathlib import Path
+from typing import Any
+from uuid import uuid4
+
+import aiosqlite
+from pydantic import BaseModel
+
+from duck_core.tasks.store import utc_now
+
+
+class Approval(BaseModel):
+    id: int | None = None
+    approval_id: str
+    task_id: str
+    action_hash: str
+    normalized_action: dict[str, Any]
+    status: str
+    decision: str | None = None
+    created_at: str
+    updated_at: str
+
+
+def normalize_action(action: dict[str, Any]) -> str:
+    return json.dumps(action, sort_keys=True, separators=(",", ":"))
+
+
+def action_hash(action: dict[str, Any]) -> str:
+    return hashlib.sha256(normalize_action(action).encode()).hexdigest()
+
+
+class ApprovalService:
+    def __init__(self, db_path: str):
+        self.db_path = Path(db_path)
+
+    async def init(self) -> None:
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                create table if not exists approvals (
+                  id integer primary key autoincrement,
+                  approval_id text not null unique,
+                  task_id text not null,
+                  action_hash text not null,
+                  normalized_action_json text not null,
+                  status text not null,
+                  decision text,
+                  created_at text not null,
+                  updated_at text not null
+                )
+                """
+            )
+            await db.commit()
+
+    async def create_pending(self, task_id: str, action: dict[str, Any]) -> Approval:
+        await self.init()
+        now = utc_now()
+        approval_id = f"approval_{uuid4().hex[:12]}"
+        normalized = normalize_action(action)
+        digest = action_hash(action)
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                """
+                insert into approvals(
+                  approval_id, task_id, action_hash, normalized_action_json,
+                  status, created_at, updated_at
+                ) values (?, ?, ?, ?, ?, ?, ?)
+                """,
+                (approval_id, task_id, digest, normalized, "pending", now, now),
+            )
+            await db.commit()
+            row_id = cursor.lastrowid
+        return Approval(
+            id=row_id,
+            approval_id=approval_id,
+            task_id=task_id,
+            action_hash=digest,
+            normalized_action=action,
+            status="pending",
+            created_at=now,
+            updated_at=now,
+        )
+
+    async def pending(self) -> list[Approval]:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cursor = await db.execute(
+                "select * from approvals where status = 'pending' order by created_at"
+            )
+            rows = await cursor.fetchall()
+        return [self._row_to_approval(row) for row in rows]
+
+    async def allow_once(self, approval_id: str) -> None:
+        await self._decide(approval_id, "resolved", "allow_once")
+
+    async def allow_forever(self, approval_id: str) -> None:
+        await self._decide(approval_id, "allowed_forever", "allow_forever")
+
+    async def deny(self, approval_id: str) -> None:
+        await self._decide(approval_id, "resolved", "deny")
+
+    async def is_allowed_forever(self, action: dict[str, Any]) -> bool:
+        await self.init()
+        digest = action_hash(action)
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                """
+                select 1 from approvals
+                where action_hash = ? and status = 'allowed_forever'
+                limit 1
+                """,
+                (digest,),
+            )
+            row = await cursor.fetchone()
+        return row is not None
+
+    async def _decide(self, approval_id: str, status: str, decision: str) -> None:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                update approvals set status = ?, decision = ?, updated_at = ?
+                where approval_id = ?
+                """,
+                (status, decision, utc_now(), approval_id),
+            )
+            await db.commit()
+
+    def _row_to_approval(self, row: aiosqlite.Row) -> Approval:
+        return Approval(
+            id=row["id"],
+            approval_id=row["approval_id"],
+            task_id=row["task_id"],
+            action_hash=row["action_hash"],
+            normalized_action=json.loads(row["normalized_action_json"]),
+            status=row["status"],
+            decision=row["decision"],
+            created_at=row["created_at"],
+            updated_at=row["updated_at"],
+        )
diff --git a/duck_core/config.py b/duck_core/config.py
new file mode 100644
index 0000000..0bb0e0e
--- /dev/null
+++ b/duck_core/config.py
@@ -0,0 +1,56 @@
+import os
+from dataclasses import dataclass
+from functools import lru_cache
+from pathlib import Path
+
+from dotenv import load_dotenv
+
+
+@dataclass(frozen=True)
+class Settings:
+    llama_server_bin: str = "llama-server"
+    main_model_path: str = "./models/Qwen3.6/nonMTP/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf"
+    main_port: int = 8081
+    ctx_size: int = 65536
+    n_gpu_layers: str = "auto"
+    host: str = "127.0.0.1"
+    api_host: str = "127.0.0.1"
+    api_port: int = 8000
+    workspace: str = "./workspace"
+    db_path: str = "./data/duck.sqlite3"
+    max_input_tokens: int = 49152
+    max_recent_events_tokens: int = 12000
+    max_memory_tokens: int = 8000
+    max_skill_tokens: int = 6000
+    qdrant_url: str = "http://127.0.0.1:6333"
+    skip_live_llm_tests: int = 0
+
+    @property
+    def db_file(self) -> Path:
+        return Path(self.db_path)
+
+
+@lru_cache
+def get_settings() -> Settings:
+    load_dotenv()
+    return Settings(
+        llama_server_bin=os.getenv("DUCK_LLAMA_SERVER_BIN", "llama-server"),
+        main_model_path=os.getenv(
+            "DUCK_MAIN_MODEL_PATH",
+            "./models/Qwen3.6/nonMTP/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf",
+        ),
+        main_port=int(os.getenv("DUCK_MAIN_PORT", "8081")),
+        ctx_size=int(os.getenv("DUCK_CTX_SIZE", "65536")),
+        n_gpu_layers=os.getenv("DUCK_N_GPU_LAYERS", "auto"),
+        host=os.getenv("DUCK_HOST", "127.0.0.1"),
+        api_host=os.getenv("DUCK_API_HOST", "127.0.0.1"),
+        api_port=int(os.getenv("DUCK_API_PORT", "8000")),
+        workspace=os.getenv("DUCK_WORKSPACE", "./workspace"),
+        db_path=os.getenv("DUCK_DB_PATH", "./data/duck.sqlite3"),
+        max_input_tokens=int(os.getenv("DUCK_MAX_INPUT_TOKENS", "49152")),
+        max_recent_events_tokens=int(os.getenv("DUCK_MAX_RECENT_EVENTS_TOKENS", "12000")),
+        max_memory_tokens=int(os.getenv("DUCK_MAX_MEMORY_TOKENS", "8000")),
+        max_skill_tokens=int(os.getenv("DUCK_MAX_SKILL_TOKENS", "6000")),
+        qdrant_url=os.getenv("QDRANT_URL", "http://127.0.0.1:6333"),
+        skip_live_llm_tests=int(os.getenv("DUCK_SKIP_LIVE_LLM_TESTS", "0")),
+    )
diff --git a/duck_core/context_builder.py b/duck_core/context_builder.py
new file mode 100644
index 0000000..7f14ea7
--- /dev/null
+++ b/duck_core/context_builder.py
@@ -0,0 +1,11 @@
+from duck_core.tasks.state import TaskState
+
+
+class ContextBuilder:
+    def build_basic_messages(self, task: TaskState) -> list[dict[str, str]]:
+        return [
+            {
+                "role": "user",
+                "content": task.user_message,
+            }
+        ]
diff --git a/data/events/.gitkeep b/duck_core/events/__init__.py
similarity index 100%
rename from data/events/.gitkeep
rename to duck_core/events/__init__.py
diff --git a/duck_core/events/store.py b/duck_core/events/store.py
new file mode 100644
index 0000000..a9f8bb3
--- /dev/null
+++ b/duck_core/events/store.py
@@ -0,0 +1,92 @@
+import json
+from pathlib import Path
+from typing import Any
+
+import aiosqlite
+from pydantic import BaseModel
+
+from duck_core.tasks.store import utc_now
+
+
+class Event(BaseModel):
+    id: int
+    task_id: str
+    sequence: int
+    event_type: str
+    payload: dict[str, Any]
+    created_at: str
+
+
+class EventStore:
+    def __init__(self, db_path: str):
+        self.db_path = Path(db_path)
+
+    async def init(self) -> None:
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                create table if not exists events (
+                  id integer primary key autoincrement,
+                  task_id text not null,
+                  sequence integer not null,
+                  event_type text not null,
+                  payload_json text not null,
+                  created_at text not null
+                )
+                """
+            )
+            await db.execute(
+                """
+                create unique index if not exists idx_events_task_sequence
+                on events(task_id, sequence)
+                """
+            )
+            await db.commit()
+
+    async def append(self, task_id: str, event_type: str, payload: dict[str, Any]) -> Event:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                "select coalesce(max(sequence), 0) + 1 from events where task_id = ?",
+                (task_id,),
+            )
+            sequence = (await cursor.fetchone())[0]
+            created_at = utc_now()
+            cursor = await db.execute(
+                """
+                insert into events(task_id, sequence, event_type, payload_json, created_at)
+                values (?, ?, ?, ?, ?)
+                """,
+                (task_id, sequence, event_type, json.dumps(payload), created_at),
+            )
+            await db.commit()
+            event_id = cursor.lastrowid
+        return Event(
+            id=event_id,
+            task_id=task_id,
+            sequence=sequence,
+            event_type=event_type,
+            payload=payload,
+            created_at=created_at,
+        )
+
+    async def list_events(self, task_id: str) -> list[Event]:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cursor = await db.execute(
+                "select * from events where task_id = ? order by sequence", (task_id,)
+            )
+            rows = await cursor.fetchall()
+        return [
+            Event(
+                id=row["id"],
+                task_id=row["task_id"],
+                sequence=row["sequence"],
+                event_type=row["event_type"],
+                payload=json.loads(row["payload_json"]),
+                created_at=row["created_at"],
+            )
+            for row in rows
+        ]
diff --git a/data/memory/.gitkeep b/duck_core/experience/__init__.py
similarity index 100%
rename from data/memory/.gitkeep
rename to duck_core/experience/__init__.py
diff --git a/duck_core/experience/recorder.py b/duck_core/experience/recorder.py
new file mode 100644
index 0000000..c78dea7
--- /dev/null
+++ b/duck_core/experience/recorder.py
@@ -0,0 +1,172 @@
+import json
+from pathlib import Path
+
+import aiosqlite
+from pydantic import BaseModel
+
+from duck_core.tasks.store import utc_now
+
+
+class ExperienceRecord(BaseModel):
+    id: int | None = None
+    task_id: str
+    skill_id: str | None = None
+    summary: str
+    result: str
+    what_worked: list[str] = []
+    what_failed: list[str] = []
+    reusable_lesson: str | None = None
+    suggested_skill_patch: str | None = None
+    confidence: float | None = None
+    created_at: str
+
+
+class ExperienceRecorder:
+    def __init__(self, db_path: str):
+        self.db_path = Path(db_path)
+
+    async def init(self) -> None:
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                create table if not exists experience_records (
+                  id integer primary key autoincrement,
+                  task_id text not null,
+                  skill_id text,
+                  summary text not null,
+                  result text not null,
+                  what_worked_json text,
+                  what_failed_json text,
+                  reusable_lesson text,
+                  suggested_skill_patch text,
+                  confidence real,
+                  created_at text not null
+                )
+                """
+            )
+            await db.commit()
+
+    async def record(
+        self,
+        task_id: str,
+        summary: str,
+        result: str,
+        skill_id: str | None = None,
+        what_worked: list[str] | None = None,
+        what_failed: list[str] | None = None,
+        reusable_lesson: str | None = None,
+        suggested_skill_patch: str | None = None,
+        confidence: float | None = None,
+    ) -> ExperienceRecord:
+        await self.init()
+        now = utc_now()
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                """
+                insert into experience_records(
+                  task_id, skill_id, summary, result, what_worked_json,
+                  what_failed_json, reusable_lesson, suggested_skill_patch,
+                  confidence, created_at
+                ) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """,
+                (
+                    task_id,
+                    skill_id,
+                    summary,
+                    result,
+                    json.dumps(what_worked or []),
+                    json.dumps(what_failed or []),
+                    reusable_lesson,
+                    suggested_skill_patch,
+                    confidence,
+                    now,
+                ),
+            )
+            await db.commit()
+            row_id = cursor.lastrowid
+        if suggested_skill_patch and skill_id:
+            self.write_skill_update_proposal(task_id, skill_id, suggested_skill_patch)
+        return ExperienceRecord(
+            id=row_id,
+            task_id=task_id,
+            skill_id=skill_id,
+            summary=summary,
+            result=result,
+            what_worked=what_worked or [],
+            what_failed=what_failed or [],
+            reusable_lesson=reusable_lesson,
+            suggested_skill_patch=suggested_skill_patch,
+            confidence=confidence,
+            created_at=now,
+        )
+
+    async def list_records(self) -> list[ExperienceRecord]:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cursor = await db.execute(
+                "select * from experience_records order by created_at desc"
+            )
+            rows = await cursor.fetchall()
+        return [self._row_to_record(row) for row in rows]
+
+    async def get_record(self, record_id: int) -> ExperienceRecord | None:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cursor = await db.execute(
+                "select * from experience_records where id = ?", (record_id,)
+            )
+            row = await cursor.fetchone()
+        return self._row_to_record(row) if row else None
+
+    def write_skill_update_proposal(self, task_id: str, skill_id: str, patch: str) -> Path:
+        directory = Path("skills/_proposals")
+        directory.mkdir(parents=True, exist_ok=True)
+        path = directory / f"{utc_now().replace(':', '').replace('+', '_')}_{skill_id}.patch.md"
+        path.write_text(
+            "\n".join(
+                [
+                    "# Skill update proposal",
+                    "",
+                    f"Skill: {skill_id}",
+                    "",
+                    "## Reason",
+                    "",
+                    "Reflection suggested a reusable skill improvement.",
+                    "",
+                    "## Proposed changes",
+                    "",
+                    patch,
+                    "",
+                    "## Evidence",
+                    "",
+                    f"Task id: {task_id}",
+                    "",
+                    "## Risk",
+                    "",
+                    "Low.",
+                    "",
+                    "## Requires human approval",
+                    "",
+                    "Yes.",
+                ]
+            )
+        )
+        return path
+
+    def _row_to_record(self, row: aiosqlite.Row) -> ExperienceRecord:
+        return ExperienceRecord(
+            id=row["id"],
+            task_id=row["task_id"],
+            skill_id=row["skill_id"],
+            summary=row["summary"],
+            result=row["result"],
+            what_worked=json.loads(row["what_worked_json"] or "[]"),
+            what_failed=json.loads(row["what_failed_json"] or "[]"),
+            reusable_lesson=row["reusable_lesson"],
+            suggested_skill_patch=row["suggested_skill_patch"],
+            confidence=row["confidence"],
+            created_at=row["created_at"],
+        )
diff --git a/data/permissions/.gitkeep b/duck_core/memory/__init__.py
similarity index 100%
rename from data/permissions/.gitkeep
rename to duck_core/memory/__init__.py
diff --git a/duck_core/memory/policy.py b/duck_core/memory/policy.py
new file mode 100644
index 0000000..05c29ec
--- /dev/null
+++ b/duck_core/memory/policy.py
@@ -0,0 +1,20 @@
+from pydantic import BaseModel
+
+
+class MemoryDecision(BaseModel):
+    should_store: bool
+    memory_type: str
+    summary: str
+    importance: float
+    metadata: dict[str, str] = {}
+
+
+class MemoryPolicy:
+    async def classify(self, summary: str, task_id: str) -> MemoryDecision:
+        return MemoryDecision(
+            should_store=False,
+            memory_type="event",
+            summary=summary,
+            importance=0.0,
+            metadata={"task_id": task_id, "source": "stub_policy"},
+        )
diff --git a/duck_core/memory/vector_memory.py b/duck_core/memory/vector_memory.py
new file mode 100644
index 0000000..2dba15f
--- /dev/null
+++ b/duck_core/memory/vector_memory.py
@@ -0,0 +1,70 @@
+from typing import Any
+from uuid import uuid4
+
+import httpx
+
+
+class EmbeddingsUnavailableError(RuntimeError):
+    pass
+
+
+class VectorMemory:
+    def __init__(
+        self,
+        qdrant_url: str,
+        collection_name: str = "duck_memory",
+        embeddings_base_url: str | None = "http://127.0.0.1:8081/v1",
+    ):
+        self.qdrant_url = qdrant_url.rstrip("/")
+        self.collection_name = collection_name
+        self.embeddings_base_url = embeddings_base_url.rstrip("/") if embeddings_base_url else None
+
+    async def add_memory(self, text: str, metadata: dict[str, Any] | None = None) -> str:
+        vector = await self._embed(text)
+        point_id = str(uuid4())
+        async with httpx.AsyncClient(timeout=20.0, trust_env=False) as client:
+            await client.put(
+                f"{self.qdrant_url}/collections/{self.collection_name}",
+                json={"vectors": {"size": len(vector), "distance": "Cosine"}},
+            )
+            response = await client.put(
+                f"{self.qdrant_url}/collections/{self.collection_name}/points",
+                json={
+                    "points": [
+                        {
+                            "id": point_id,
+                            "vector": vector,
+                            "payload": {"text": text, **(metadata or {})},
+                        }
+                    ]
+                },
+            )
+            response.raise_for_status()
+        return point_id
+
+    async def search_memory(self, query: str, limit: int = 5) -> list[dict[str, Any]]:
+        vector = await self._embed(query)
+        async with httpx.AsyncClient(timeout=20.0, trust_env=False) as client:
+            response = await client.post(
+                f"{self.qdrant_url}/collections/{self.collection_name}/points/search",
+                json={"vector": vector, "limit": limit, "with_payload": True},
+            )
+            response.raise_for_status()
+        return response.json().get("result", [])
+
+    async def _embed(self, text: str) -> list[float]:
+        if not self.embeddings_base_url:
+            raise EmbeddingsUnavailableError(
+                "Embeddings endpoint is not configured; vector memory is explicit stub."
+            )
+        async with httpx.AsyncClient(timeout=20.0, trust_env=False) as client:
+            response = await client.post(
+                f"{self.embeddings_base_url}/embeddings",
+                json={"model": "local-main", "input": text},
+            )
+        if response.status_code >= 400:
+            raise EmbeddingsUnavailableError(
+                f"Embeddings endpoint unavailable: HTTP {response.status_code}"
+            )
+        data = response.json()["data"][0]["embedding"]
+        return [float(value) for value in data]
diff --git a/duck_core/model_client.py b/duck_core/model_client.py
new file mode 100644
index 0000000..4c01d5d
--- /dev/null
+++ b/duck_core/model_client.py
@@ -0,0 +1,217 @@
+import json
+import logging
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+import httpx
+import yaml
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass(frozen=True)
+class RoleConfig:
+    role: str
+    provider: str
+    base_url: str
+    model: str
+    purpose: str
+    structured_output: bool
+    temperature: float
+    max_output_tokens: int
+    system_prompt: str
+    response_schema: str | None = None
+
+
+@dataclass
+class ModelResponse:
+    role: str
+    model: str
+    content: str
+    reasoning_content: str | None
+    raw: dict[str, Any]
+    latency_ms: float
+    prompt_tokens: int | None = None
+    completion_tokens: int | None = None
+    total_tokens: int | None = None
+
+
+class ModelClient:
+    def __init__(self, config_path: str = "config/models.yaml", timeout: float = 120.0):
+        self.config_path = Path(config_path)
+        self.timeout = timeout
+        data = yaml.safe_load(self.config_path.read_text())
+        self.default_provider = data["default_provider"]
+        self._roles = {
+            role: RoleConfig(role=role, **settings)
+            for role, settings in data["models"].items()
+        }
+
+    def list_roles(self) -> dict[str, dict[str, Any]]:
+        return {
+            role: {
+                "provider": cfg.provider,
+                "base_url": cfg.base_url,
+                "model": cfg.model,
+                "purpose": cfg.purpose,
+                "structured_output": cfg.structured_output,
+                "temperature": cfg.temperature,
+                "max_output_tokens": cfg.max_output_tokens,
+                "system_prompt": cfg.system_prompt,
+                "response_schema": cfg.response_schema,
+            }
+            for role, cfg in self._roles.items()
+        }
+
+    def get_role_config(self, role: str) -> RoleConfig:
+        try:
+            return self._roles[role]
+        except KeyError as exc:
+            raise KeyError(f"Unknown model role: {role}") from exc
+
+    def _system_message(self, cfg: RoleConfig) -> dict[str, str] | None:
+        path = Path(cfg.system_prompt)
+        if not path.exists():
+            return None
+        return {"role": "system", "content": path.read_text()}
+
+    def _response_format(
+        self, cfg: RoleConfig, response_format: dict[str, Any] | None
+    ) -> dict[str, Any] | None:
+        if response_format is not None:
+            return response_format
+        if not cfg.structured_output:
+            return None
+        if cfg.response_schema and Path(cfg.response_schema).exists():
+            schema = json.loads(Path(cfg.response_schema).read_text())
+            return {
+                "type": "json_schema",
+                "json_schema": {"name": "action_directive", "schema": schema, "strict": True},
+            }
+        return {"type": "json_object"}
+
+    async def chat(
+        self,
+        role: str,
+        messages: list[dict[str, str]],
+        temperature: float | None = None,
+        max_output_tokens: int | None = None,
+        response_format: dict[str, Any] | None = None,
+    ) -> ModelResponse:
+        cfg = self.get_role_config(role)
+        outbound = list(messages)
+        system_message = self._system_message(cfg)
+        if system_message and not any(message["role"] == "system" for message in outbound):
+            outbound.insert(0, system_message)
+
+        payload: dict[str, Any] = {
+            "model": cfg.model,
+            "messages": outbound,
+            "temperature": cfg.temperature if temperature is None else temperature,
+            "max_tokens": cfg.max_output_tokens if max_output_tokens is None else max_output_tokens,
+        }
+        fmt = self._response_format(cfg, response_format)
+        if fmt is not None:
+            payload["response_format"] = fmt
+
+        start = time.perf_counter()
+        try:
+            async with httpx.AsyncClient(timeout=self.timeout, trust_env=False) as client:
+                response = await client.post(f"{cfg.base_url}/chat/completions", json=payload)
+                response.raise_for_status()
+                raw = response.json()
+        except httpx.HTTPError as exc:
+            raise ConnectionError(f"Model backend unavailable for role {role}: {exc}") from exc
+
+        latency_ms = (time.perf_counter() - start) * 1000
+        usage = raw.get("usage") or {}
+        message = raw.get("choices", [{}])[0].get("message", {})
+        content = message.get("content") or ""
+        reasoning_content = message.get("reasoning_content")
+        logger.info("model role=%s model=%s latency_ms=%.1f usage=%s", role, cfg.model, latency_ms, usage)
+        return ModelResponse(
+            role=role,
+            model=cfg.model,
+            content=content,
+            reasoning_content=reasoning_content,
+            raw=raw,
+            latency_ms=latency_ms,
+            prompt_tokens=usage.get("prompt_tokens"),
+            completion_tokens=usage.get("completion_tokens"),
+            total_tokens=usage.get("total_tokens"),
+        )
+
+    async def stream_chat(
+        self,
+        role: str,
+        messages: list[dict[str, str]],
+        temperature: float | None = None,
+        max_output_tokens: int | None = None,
+        response_format: dict[str, Any] | None = None,
+    ):
+        cfg = self.get_role_config(role)
+        outbound = list(messages)
+        system_message = self._system_message(cfg)
+        if system_message and not any(message["role"] == "system" for message in outbound):
+            outbound.insert(0, system_message)
+
+        payload: dict[str, Any] = {
+            "model": cfg.model,
+            "messages": outbound,
+            "temperature": cfg.temperature if temperature is None else temperature,
+            "max_tokens": cfg.max_output_tokens if max_output_tokens is None else max_output_tokens,
+            "stream": True,
+        }
+        fmt = self._response_format(cfg, response_format)
+        if fmt is not None:
+            payload["response_format"] = fmt
+
+        try:
+            async with httpx.AsyncClient(timeout=self.timeout, trust_env=False) as client:
+                async with client.stream(
+                    "POST", f"{cfg.base_url}/chat/completions", json=payload
+                ) as response:
+                    response.raise_for_status()
+                    async for line in response.aiter_lines():
+                        if not line.startswith("data: "):
+                            continue
+                        raw_data = line.removeprefix("data: ").strip()
+                        if raw_data == "[DONE]":
+                            break
+                        if not raw_data:
+                            continue
+                        chunk = json.loads(raw_data)
+                        delta = chunk.get("choices", [{}])[0].get("delta", {})
+                        reasoning_delta = delta.get("reasoning_content")
+                        content_delta = delta.get("content")
+                        if reasoning_delta:
+                            yield {"type": "reasoning_delta", "delta": reasoning_delta}
+                        if content_delta:
+                            yield {"type": "content_delta", "delta": content_delta}
+        except httpx.HTTPError as exc:
+            raise ConnectionError(f"Model backend unavailable for role {role}: {exc}") from exc
+
+    async def ping(self) -> dict[str, Any]:
+        results: dict[str, Any] = {}
+        async with httpx.AsyncClient(timeout=10.0, trust_env=False) as client:
+            for role, cfg in self._roles.items():
+                try:
+                    started = time.perf_counter()
+                    response = await client.get(f"{cfg.base_url}/models")
+                    response.raise_for_status()
+                    results[role] = {
+                        "ok": True,
+                        "base_url": cfg.base_url,
+                        "model": cfg.model,
+                        "latency_ms": round((time.perf_counter() - started) * 1000, 1),
+                    }
+                except httpx.HTTPError as exc:
+                    results[role] = {
+                        "ok": False,
+                        "base_url": cfg.base_url,
+                        "model": cfg.model,
+                        "error": str(exc),
+                    }
+        return results
diff --git a/duck_core/reflection.py b/duck_core/reflection.py
new file mode 100644
index 0000000..53cae06
--- /dev/null
+++ b/duck_core/reflection.py
@@ -0,0 +1,29 @@
+from duck_core.experience.recorder import ExperienceRecorder, ExperienceRecord
+from duck_core.model_client import ModelClient
+
+
+class Reflection:
+    def __init__(self, model_client: ModelClient, recorder: ExperienceRecorder):
+        self.model_client = model_client
+        self.recorder = recorder
+
+    async def reflect(self, task_id: str, transcript: str) -> ExperienceRecord:
+        response = await self.model_client.chat(
+            "critic",
+            [
+                {
+                    "role": "user",
+                    "content": (
+                        "Reflect on this DuckLM task. Cover outcome, waste, JSON/tool issues, "
+                        f"and reusable lesson.\n\n{transcript}"
+                    ),
+                }
+            ],
+        )
+        return await self.recorder.record(
+            task_id=task_id,
+            summary=response.content[:500],
+            result="unknown",
+            reusable_lesson=response.content,
+            confidence=0.5,
+        )
diff --git a/duck_core/runtime_loop.py b/duck_core/runtime_loop.py
new file mode 100644
index 0000000..ed6b22c
--- /dev/null
+++ b/duck_core/runtime_loop.py
@@ -0,0 +1,197 @@
+import json
+from dataclasses import dataclass
+from typing import Any
+
+from duck_core.approvals.service import ApprovalService
+from duck_core.context_builder import ContextBuilder
+from duck_core.events.store import EventStore
+from duck_core.model_client import ModelClient
+from duck_core.tasks.store import TaskStore
+from duck_core.tools.gateway import ToolGateway
+
+
+@dataclass
+class ChatResult:
+    task_id: str
+    status: str
+    final_response: str
+    reasoning_content: str | None = None
+
+
+class RuntimeLoop:
+    def __init__(
+        self,
+        task_store: TaskStore,
+        event_store: EventStore,
+        model_client: ModelClient | None = None,
+        context_builder: ContextBuilder | None = None,
+        approval_service: ApprovalService | None = None,
+    ):
+        self.task_store = task_store
+        self.event_store = event_store
+        self.model_client = model_client or ModelClient()
+        self.context_builder = context_builder or ContextBuilder()
+        self.approval_service = approval_service
+
+    async def run_chat(
+        self, message: str, workspace: str | None = None, debug: bool = False
+    ) -> ChatResult:
+        task = await self.task_store.create_task(message, workspace, debug)
+        await self.event_store.append(
+            task.task_id,
+            "task_created",
+            {"message": message, "workspace": workspace, "debug": debug},
+        )
+        try:
+            messages = self.context_builder.build_basic_messages(task)
+            tool_observations = await self._run_action_tools(task.task_id, messages, workspace)
+            if any(observation.get("requires_approval") for observation in tool_observations):
+                await self.task_store.waiting_for_approval(task.task_id)
+                await self.event_store.append(
+                    task.task_id,
+                    "task_waiting_for_approval",
+                    {"observations": tool_observations},
+                )
+                return ChatResult(
+                    task_id=task.task_id,
+                    status="waiting_for_approval",
+                    final_response="Waiting for approval.",
+                    reasoning_content=None,
+                )
+            if tool_observations:
+                messages = [
+                    *messages,
+                    {
+                        "role": "user",
+                        "content": "tool_observations:\n"
+                        + json.dumps(tool_observations, ensure_ascii=False, indent=2),
+                    },
+                ]
+            await self.event_store.append(
+                task.task_id, "model_call_started", {"role": "thinker"}
+            )
+            response = await self.model_client.chat("thinker", messages)
+            await self.event_store.append(
+                task.task_id,
+                "cognition_response",
+                {
+                    "role": response.role,
+                    "content": response.content,
+                    "reasoning_content": response.reasoning_content,
+                },
+            )
+            await self.event_store.append(
+                task.task_id,
+                "model_call_finished",
+                {
+                    "role": response.role,
+                    "model": response.model,
+                    "latency_ms": response.latency_ms,
+                    "prompt_tokens": response.prompt_tokens,
+                    "completion_tokens": response.completion_tokens,
+                    "total_tokens": response.total_tokens,
+                },
+            )
+            await self.task_store.complete_task(task.task_id, response.content)
+            await self.event_store.append(
+                task.task_id,
+                "task_completed",
+                {
+                    "final_response": response.content,
+                    "reasoning_content": response.reasoning_content,
+                },
+            )
+            return ChatResult(
+                task_id=task.task_id,
+                status="completed",
+                final_response=response.content,
+                reasoning_content=response.reasoning_content,
+            )
+        except Exception as exc:
+            await self.task_store.fail_task(task.task_id, str(exc))
+            await self.event_store.append(
+                task.task_id, "task_failed", {"error": str(exc)}
+            )
+            return ChatResult(
+                task_id=task.task_id,
+                status="failed",
+                final_response=str(exc),
+                reasoning_content=None,
+            )
+
+    async def _run_action_tools(
+        self, task_id: str, messages: list[dict[str, str]], workspace: str | None
+    ) -> list[dict[str, Any]]:
+        try:
+            await self.event_store.append(task_id, "model_call_started", {"role": "action"})
+            response = await self.model_client.chat("action", messages)
+            directive = json.loads(response.content)
+        except Exception as exc:
+            await self.event_store.append(
+                task_id,
+                "action_directive_failed",
+                {"error": str(exc)},
+            )
+            return []
+
+        await self.event_store.append(task_id, "action_directive", directive)
+        actions = directive.get("actions") or []
+        if not isinstance(actions, list) or not actions:
+            return []
+
+        gateway = ToolGateway.default(workspace or ".")
+        observations: list[dict[str, Any]] = []
+        for index, action in enumerate(actions, start=1):
+            if not isinstance(action, dict):
+                observations.append(
+                    {"index": index, "ok": False, "error": "Action must be an object"}
+                )
+                continue
+            tool_name = str(action.get("tool", ""))
+            await self.event_store.append(
+                task_id,
+                "tool_call_started",
+                {"index": index, "tool": tool_name, "args": action.get("args") or {}},
+            )
+            result = await gateway.run_action(action)
+            result_payload = result.model_dump()
+            if result.metadata.get("requires_approval"):
+                approval = None
+                if self.approval_service is not None:
+                    approval = await self.approval_service.create_pending(task_id, action)
+                await self.event_store.append(
+                    task_id,
+                    "tool_approval_requested",
+                    {
+                        "index": index,
+                        "tool": tool_name,
+                        "action": action,
+                        "approval_id": approval.approval_id if approval else None,
+                        "reason": result.error,
+                    },
+                )
+                observations.append(
+                    {
+                        "index": index,
+                        "tool": tool_name,
+                        "reason": action.get("reason"),
+                        "requires_approval": True,
+                        "approval_id": approval.approval_id if approval else None,
+                        "result": result_payload,
+                    }
+                )
+                break
+            await self.event_store.append(
+                task_id,
+                "tool_call_finished",
+                {"index": index, "tool": tool_name, "result": result_payload},
+            )
+            observations.append(
+                {
+                    "index": index,
+                    "tool": tool_name,
+                    "reason": action.get("reason"),
+                    "result": result_payload,
+                }
+            )
+        return observations
diff --git a/duck_core/schemas/action_directive.schema.json b/duck_core/schemas/action_directive.schema.json
new file mode 100644
index 0000000..12d2863
--- /dev/null
+++ b/duck_core/schemas/action_directive.schema.json
@@ -0,0 +1,55 @@
+{
+  "type": "object",
+  "required": ["kind", "intent", "risk_level", "actions"],
+  "additionalProperties": false,
+  "properties": {
+    "kind": {
+      "type": "string",
+      "enum": ["action_directive"]
+    },
+    "intent": {
+      "type": "string",
+      "minLength": 1
+    },
+    "risk_level": {
+      "type": "string",
+      "enum": ["none", "low", "medium", "high", "critical"]
+    },
+    "actions": {
+      "type": "array",
+      "minItems": 0,
+      "items": {
+        "type": "object",
+        "required": ["tool", "args"],
+        "additionalProperties": false,
+        "properties": {
+          "tool": {
+            "type": "string",
+            "minLength": 1
+          },
+          "args": {
+            "type": "object"
+          },
+          "reason": {
+            "type": "string"
+          }
+        }
+      }
+    },
+    "memory_hints": {
+      "type": "array",
+      "items": {
+        "type": "string"
+      }
+    },
+    "expected_observations": {
+      "type": "array",
+      "items": {
+        "type": "string"
+      }
+    },
+    "stop_reason": {
+      "type": "string"
+    }
+  }
+}
diff --git a/data/state/.gitkeep b/duck_core/skills/__init__.py
similarity index 100%
rename from data/state/.gitkeep
rename to duck_core/skills/__init__.py
diff --git a/duck_core/skills/registry.py b/duck_core/skills/registry.py
new file mode 100644
index 0000000..323bfd4
--- /dev/null
+++ b/duck_core/skills/registry.py
@@ -0,0 +1,68 @@
+from pathlib import Path
+
+import yaml
+from pydantic import BaseModel
+
+
+class Skill(BaseModel):
+    id: str
+    title: str
+    description: str
+    version: int
+    tags: list[str] = []
+    required_tools: list[str] = []
+    risk_level: str = "low"
+    inputs: list[str] = []
+    outputs: list[str] = []
+    success_criteria: list[str] = []
+    procedure: str = ""
+    examples: str = ""
+    notes: str = ""
+
+
+class SkillCandidate(BaseModel):
+    skill: Skill
+    score: float
+    reason: str
+
+
+class SkillRegistry:
+    def __init__(self, skills_dir: str = "skills"):
+        self.skills_dir = Path(skills_dir)
+        self._cache: dict[str, Skill] | None = None
+
+    def load_skills(self) -> list[Skill]:
+        skills: dict[str, Skill] = {}
+        if not self.skills_dir.exists():
+            self._cache = {}
+            return []
+        for path in sorted(self.skills_dir.glob("*/skill.yaml")):
+            data = yaml.safe_load(path.read_text()) or {}
+            root = path.parent
+            data["procedure"] = self._read_optional(root / "procedure.md")
+            data["examples"] = self._read_optional(root / "examples.md")
+            data["notes"] = self._read_optional(root / "notes.md")
+            skill = Skill(**data)
+            skills[skill.id] = skill
+        self._cache = skills
+        return list(skills.values())
+
+    def get_skill(self, skill_id: str) -> Skill | None:
+        if self._cache is None:
+            self.load_skills()
+        return (self._cache or {}).get(skill_id)
+
+    async def find_candidate_skills(self, user_request: str, limit: int = 3) -> list[SkillCandidate]:
+        terms = set(user_request.lower().split())
+        candidates: list[SkillCandidate] = []
+        for skill in self.load_skills():
+            haystack = " ".join([skill.title, skill.description, " ".join(skill.tags)]).lower()
+            score = sum(1 for term in terms if term in haystack)
+            if score:
+                candidates.append(
+                    SkillCandidate(skill=skill, score=float(score), reason="keyword match")
+                )
+        return sorted(candidates, key=lambda item: item.score, reverse=True)[:limit]
+
+    def _read_optional(self, path: Path) -> str:
+        return path.read_text() if path.exists() else ""
diff --git a/duck_core/tasks/__init__.py b/duck_core/tasks/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/duck_core/tasks/__init__.py
@@ -0,0 +1 @@
+
diff --git a/duck_core/tasks/state.py b/duck_core/tasks/state.py
new file mode 100644
index 0000000..19e8fd8
--- /dev/null
+++ b/duck_core/tasks/state.py
@@ -0,0 +1,12 @@
+from pydantic import BaseModel
+
+
+class TaskState(BaseModel):
+    task_id: str
+    status: str
+    user_message: str
+    workspace: str | None = None
+    debug: bool = False
+    final_response: str | None = None
+    created_at: str
+    updated_at: str
diff --git a/duck_core/tasks/store.py b/duck_core/tasks/store.py
new file mode 100644
index 0000000..f473dc2
--- /dev/null
+++ b/duck_core/tasks/store.py
@@ -0,0 +1,115 @@
+from datetime import UTC, datetime
+from pathlib import Path
+from uuid import uuid4
+
+import aiosqlite
+
+from duck_core.tasks.state import TaskState
+
+
+def utc_now() -> str:
+    return datetime.now(UTC).isoformat()
+
+
+class TaskStore:
+    def __init__(self, db_path: str):
+        self.db_path = Path(db_path)
+
+    async def init(self) -> None:
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                create table if not exists tasks (
+                  task_id text primary key,
+                  status text not null,
+                  user_message text not null,
+                  workspace text,
+                  debug integer not null default 0,
+                  final_response text,
+                  created_at text not null,
+                  updated_at text not null
+                )
+                """
+            )
+            await db.commit()
+
+    async def create_task(self, user_message: str, workspace: str | None, debug: bool) -> TaskState:
+        await self.init()
+        now = utc_now()
+        task_id = f"task_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}_{uuid4().hex[:8]}"
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                insert into tasks(task_id, status, user_message, workspace, debug, created_at, updated_at)
+                values (?, ?, ?, ?, ?, ?, ?)
+                """,
+                (task_id, "running", user_message, workspace, int(debug), now, now),
+            )
+            await db.commit()
+        return TaskState(
+            task_id=task_id,
+            status="running",
+            user_message=user_message,
+            workspace=workspace,
+            debug=debug,
+            created_at=now,
+            updated_at=now,
+        )
+
+    async def update_status(
+        self, task_id: str, status: str, final_response: str | None = None
+    ) -> None:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """
+                update tasks
+                set status = ?, final_response = coalesce(?, final_response), updated_at = ?
+                where task_id = ?
+                """,
+                (status, final_response, utc_now(), task_id),
+            )
+            await db.commit()
+
+    async def complete_task(self, task_id: str, final_response: str) -> None:
+        await self.update_status(task_id, "completed", final_response)
+
+    async def fail_task(self, task_id: str, message: str) -> None:
+        await self.update_status(task_id, "failed", message)
+
+    async def cancel_task(self, task_id: str) -> None:
+        await self.update_status(task_id, "cancelled")
+
+    async def waiting_for_approval(self, task_id: str) -> None:
+        await self.update_status(task_id, "waiting_for_approval")
+
+    async def get_task(self, task_id: str) -> TaskState | None:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cursor = await db.execute("select * from tasks where task_id = ?", (task_id,))
+            row = await cursor.fetchone()
+        return self._row_to_task(row) if row else None
+
+    async def list_tasks(self, limit: int = 50) -> list[TaskState]:
+        await self.init()
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cursor = await db.execute(
+                "select * from tasks order by created_at desc limit ?", (limit,)
+            )
+            rows = await cursor.fetchall()
+        return [self._row_to_task(row) for row in rows]
+
+    def _row_to_task(self, row: aiosqlite.Row) -> TaskState:
+        return TaskState(
+            task_id=row["task_id"],
+            status=row["status"],
+            user_message=row["user_message"],
+            workspace=row["workspace"],
+            debug=bool(row["debug"]),
+            final_response=row["final_response"],
+            created_at=row["created_at"],
+            updated_at=row["updated_at"],
+        )
diff --git a/duck_core/tools/__init__.py b/duck_core/tools/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/duck_core/tools/__init__.py
@@ -0,0 +1 @@
+
diff --git a/duck_core/tools/base.py b/duck_core/tools/base.py
new file mode 100644
index 0000000..abf6cbf
--- /dev/null
+++ b/duck_core/tools/base.py
@@ -0,0 +1,18 @@
+from typing import Any, Protocol
+
+from pydantic import BaseModel, Field
+
+
+class ToolResult(BaseModel):
+    ok: bool
+    output: str | None = None
+    error: str | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+class Tool(Protocol):
+    name: str
+    risk_level: str
+
+    async def run(self, args: dict[str, Any]) -> ToolResult:
+        ...
diff --git a/duck_core/tools/file_read.py b/duck_core/tools/file_read.py
new file mode 100644
index 0000000..d2a879f
--- /dev/null
+++ b/duck_core/tools/file_read.py
@@ -0,0 +1,36 @@
+from pathlib import Path
+from typing import Any
+
+from duck_core.tools.base import ToolResult
+from duck_core.tools.paths import WorkspacePathError, resolve_workspace_path
+
+
+class FileReadTool:
+    name = "file_read"
+    risk_level = "low"
+
+    def __init__(self, workspace: str, max_bytes: int = 1_000_000):
+        self.workspace = workspace
+        self.max_bytes = max_bytes
+
+    async def run(self, args: dict[str, Any]) -> ToolResult:
+        raw_path = str(args.get("path", ""))
+        try:
+            path = resolve_workspace_path(self.workspace, raw_path)
+        except WorkspacePathError as exc:
+            return ToolResult(ok=False, error=str(exc))
+        if self._requires_approval(path):
+            return ToolResult(ok=False, error=f"Reading {raw_path} requires explicit approval")
+        if not path.is_file():
+            return ToolResult(ok=False, error=f"File not found: {raw_path}")
+        if path.stat().st_size > self.max_bytes:
+            return ToolResult(ok=False, error=f"File exceeds max size: {self.max_bytes}")
+        return ToolResult(
+            ok=True,
+            output=path.read_text(errors="replace"),
+            metadata={"path": str(path), "bytes_read": path.stat().st_size},
+        )
+
+    def _requires_approval(self, path: Path) -> bool:
+        parts = set(path.parts)
+        return path.name == ".env" or ".ssh" in parts or str(path) == "/etc/shadow"
diff --git a/duck_core/tools/file_write.py b/duck_core/tools/file_write.py
new file mode 100644
index 0000000..2dc1618
--- /dev/null
+++ b/duck_core/tools/file_write.py
@@ -0,0 +1,40 @@
+from typing import Any
+
+from duck_core.tools.base import ToolResult
+from duck_core.tools.paths import WorkspacePathError, resolve_workspace_path
+
+
+class FileWriteTool:
+    name = "file_write"
+    risk_level = "medium"
+
+    def __init__(self, workspace: str):
+        self.workspace = workspace
+
+    async def run(self, args: dict[str, Any]) -> ToolResult:
+        raw_path = str(args.get("path", ""))
+        content = str(args.get("content", ""))
+        overwrite = bool(args.get("overwrite", False))
+        try:
+            path = resolve_workspace_path(self.workspace, raw_path)
+        except WorkspacePathError as exc:
+            return ToolResult(ok=False, error=str(exc))
+        if path.exists() and not overwrite:
+            return ToolResult(
+                ok=False,
+                error="Refusing to overwrite existing file without overwrite=true or approval",
+                metadata={"path": str(path)},
+            )
+        path.parent.mkdir(parents=True, exist_ok=True)
+        existed = path.exists()
+        path.write_text(content)
+        return ToolResult(
+            ok=True,
+            output=f"Wrote {raw_path}",
+            metadata={
+                "path": str(path),
+                "bytes_written": len(content.encode()),
+                "created": not existed,
+                "updated": existed,
+            },
+        )
diff --git a/duck_core/tools/gateway.py b/duck_core/tools/gateway.py
new file mode 100644
index 0000000..b9df257
--- /dev/null
+++ b/duck_core/tools/gateway.py
@@ -0,0 +1,31 @@
+from typing import Any
+
+from duck_core.tools.base import Tool, ToolResult
+from duck_core.tools.file_read import FileReadTool
+from duck_core.tools.file_write import FileWriteTool
+from duck_core.tools.shell_exec_safe import ShellExecSafeTool
+
+
+class ToolGateway:
+    def __init__(self, tools: list[Tool]):
+        self.tools = {tool.name: tool for tool in tools}
+
+    @classmethod
+    def default(cls, workspace: str) -> "ToolGateway":
+        return cls(
+            [
+                FileReadTool(workspace),
+                FileWriteTool(workspace),
+                ShellExecSafeTool(workspace),
+            ]
+        )
+
+    async def run_action(self, action: dict[str, Any]) -> ToolResult:
+        tool_name = str(action.get("tool", ""))
+        tool = self.tools.get(tool_name)
+        if tool is None:
+            return ToolResult(ok=False, error=f"Unknown tool: {tool_name}")
+        args = action.get("args") or {}
+        if not isinstance(args, dict):
+            return ToolResult(ok=False, error="Tool args must be an object")
+        return await tool.run(args)
diff --git a/duck_core/tools/paths.py b/duck_core/tools/paths.py
new file mode 100644
index 0000000..9b8019d
--- /dev/null
+++ b/duck_core/tools/paths.py
@@ -0,0 +1,13 @@
+from pathlib import Path
+
+
+class WorkspacePathError(ValueError):
+    pass
+
+
+def resolve_workspace_path(workspace: str, relative_path: str) -> Path:
+    root = Path(workspace).resolve()
+    path = (root / relative_path).resolve()
+    if root != path and root not in path.parents:
+        raise WorkspacePathError(f"Path escapes workspace: {relative_path}")
+    return path
diff --git a/duck_core/tools/shell_exec_safe.py b/duck_core/tools/shell_exec_safe.py
new file mode 100644
index 0000000..a015545
--- /dev/null
+++ b/duck_core/tools/shell_exec_safe.py
@@ -0,0 +1,95 @@
+import shlex
+import subprocess
+from typing import Any
+
+from duck_core.tools.base import ToolResult
+
+
+ALLOWLIST = {
+    "pwd",
+    "ls",
+    "cat",
+    "head",
+    "tail",
+    "grep",
+    "find",
+    "pytest",
+    "python -m pytest",
+    "python3 -m pytest",
+    "git status",
+    "git diff",
+    "git log",
+}
+
+BLOCKLIST = {
+    "rm",
+    "sudo",
+    "su",
+    "dd",
+    "mkfs",
+    "mount",
+    "umount",
+    "shutdown",
+    "reboot",
+    "poweroff",
+    "systemctl",
+    "service",
+    "apt install",
+    "apt remove",
+    "pacman -S",
+    "pacman -R",
+    "pip install",
+    "npm install -g",
+    "chmod -R",
+    "chown -R",
+    "curl | sh",
+    "wget | sh",
+}
+
+
+class ShellExecSafeTool:
+    name = "shell_exec_safe"
+    risk_level = "medium"
+
+    def __init__(self, workspace: str, timeout_seconds: int = 30):
+        self.workspace = workspace
+        self.timeout_seconds = timeout_seconds
+
+    async def run(self, args: dict[str, Any]) -> ToolResult:
+        command = str(args.get("command", "")).strip()
+        allowed, reason = self._is_allowed(command)
+        if not allowed:
+            return ToolResult(ok=False, error=reason, metadata={"requires_approval": True})
+        try:
+            completed = subprocess.run(
+                command,
+                cwd=self.workspace,
+                shell=True,
+                text=True,
+                capture_output=True,
+                timeout=self.timeout_seconds,
+                check=False,
+            )
+        except subprocess.SubprocessError as exc:
+            return ToolResult(ok=False, error=str(exc))
+        return ToolResult(
+            ok=completed.returncode == 0,
+            output=completed.stdout,
+            error=completed.stderr if completed.returncode else None,
+            metadata={"returncode": completed.returncode, "command": command},
+        )
+
+    def _is_allowed(self, command: str) -> tuple[bool, str | None]:
+        if not command:
+            return False, "Empty command"
+        lowered = command.lower()
+        for blocked in BLOCKLIST:
+            if lowered.startswith(blocked.lower()) or blocked.lower() in lowered:
+                return False, f"Command is blocked: {blocked}"
+        parts = shlex.split(command)
+        prefix1 = parts[0] if parts else ""
+        prefix2 = " ".join(parts[:2])
+        prefix3 = " ".join(parts[:3])
+        if prefix1 in ALLOWLIST or prefix2 in ALLOWLIST or prefix3 in ALLOWLIST:
+            return True, None
+        return False, "Command is outside allowlist and requires approval"
diff --git a/duck_core/web/static/app.js b/duck_core/web/static/app.js
new file mode 100644
index 0000000..602a5d3
--- /dev/null
+++ b/duck_core/web/static/app.js
@@ -0,0 +1,510 @@
+const state = {
+  running: false,
+  messages: [],
+};
+
+async function jsonFetch(url, options) {
+  const response = await fetch(url, options);
+  if (!response.ok) throw new Error(await response.text());
+  return response.json();
+}
+
+function escapeText(value) {
+  return String(value ?? "");
+}
+
+function setStatus(id, text, tone = "neutral") {
+  const node = document.querySelector(id);
+  if (!node) return;
+  node.textContent = text;
+  node.dataset.tone = tone;
+}
+
+function addMessage(role, content, meta = "", options = {}) {
+  const list = document.querySelector("#messages");
+  if (!list) return;
+
+  const article = document.createElement("article");
+  article.className = `message ${role}`;
+
+  const avatar = document.createElement("div");
+  avatar.className = "avatar";
+  avatar.textContent = role === "user" ? "U" : "D";
+
+  const bubble = document.createElement("div");
+  bubble.className = "bubble";
+
+  const messageMeta = document.createElement("div");
+  messageMeta.className = "message-meta";
+  messageMeta.innerHTML = `${role === "user" ? "You" : "DuckLM"}${escapeText(meta)}`;
+
+  const text = document.createElement("p");
+  text.textContent = content;
+
+  bubble.append(messageMeta);
+  if (role === "assistant" && options.reasoning) {
+    bubble.append(createInlineReasoning());
+  }
+  bubble.append(text);
+  article.append(avatar, bubble);
+  list.append(article);
+  list.scrollTop = list.scrollHeight;
+  return article;
+}
+
+function createInlineReasoning() {
+  const section = document.createElement("section");
+  section.className = "message-reasoning is-collapsed";
+
+  const button = document.createElement("button");
+  button.className = "message-reasoning-toggle";
+  button.type = "button";
+  button.setAttribute("aria-expanded", "false");
+
+  const title = document.createElement("span");
+  title.textContent = "Размышление";
+  const status = document.createElement("span");
+  status.className = "message-reasoning-status";
+  status.textContent = "streaming";
+  button.append(title, status);
+
+  const body = document.createElement("pre");
+  body.hidden = true;
+  body.textContent = "";
+
+  section.append(button, body);
+  return section;
+}
+
+function createToolTerminal(eventPayload) {
+  const payload = eventPayload.payload || eventPayload;
+  const args = payload.args || {};
+  const terminal = document.createElement("section");
+  terminal.className = "tool-terminal";
+  terminal.dataset.toolIndex = String(payload.index || "");
+
+  const header = document.createElement("div");
+  header.className = "tool-terminal-header";
+
+  const dots = document.createElement("span");
+  dots.className = "terminal-dots";
+  dots.innerHTML = "";
+
+  const title = document.createElement("span");
+  title.className = "tool-terminal-title";
+  title.textContent = formatToolCommand(payload.tool, args);
+
+  const status = document.createElement("span");
+  status.className = "tool-terminal-status";
+  status.textContent = "running";
+
+  header.append(dots, title, status);
+
+  const body = document.createElement("pre");
+  body.className = "tool-terminal-body";
+  body.textContent = formatToolStart(payload.tool, args);
+
+  terminal.append(header, body);
+  return terminal;
+}
+
+function formatToolCommand(tool, args) {
+  if (tool === "shell_exec_safe") return `$ ${args.command || tool}`;
+  if (tool === "file_read") return `$ file_read ${args.path || ""}`.trim();
+  if (tool === "file_write") return `$ file_write ${args.path || ""}`.trim();
+  return `$ ${tool || "tool"}`;
+}
+
+function formatToolStart(tool, args) {
+  const lines = [formatToolCommand(tool, args)];
+  const serializedArgs = JSON.stringify(args || {}, null, 2);
+  if (serializedArgs !== "{}") lines.push(serializedArgs);
+  return lines.join("\n");
+}
+
+function appendToolTerminal(article, eventPayload) {
+  const paragraph = article?.querySelector("p");
+  const terminal = createToolTerminal(eventPayload);
+  paragraph?.before(terminal);
+  document.querySelector("#messages").scrollTop = document.querySelector("#messages").scrollHeight;
+}
+
+function updateToolTerminal(article, eventPayload) {
+  const payload = eventPayload.payload || eventPayload;
+  const terminal = article?.querySelector(`.tool-terminal[data-tool-index="${payload.index || ""}"]`);
+  const body = terminal?.querySelector(".tool-terminal-body");
+  const status = terminal?.querySelector(".tool-terminal-status");
+  const result = payload.result || {};
+  if (!body || !status) return;
+  terminal.classList.toggle("is-error", !result.ok);
+  status.textContent = result.ok ? "ok" : "error";
+
+  const parts = [body.textContent.trim()];
+  if (result.output) parts.push("\nstdout\n" + result.output.trimEnd());
+  if (result.error) parts.push("\nstderr\n" + result.error.trimEnd());
+  if (result.metadata && Object.keys(result.metadata).length) {
+    parts.push("\nmetadata\n" + JSON.stringify(result.metadata, null, 2));
+  }
+  body.textContent = parts.join("\n");
+  document.querySelector("#messages").scrollTop = document.querySelector("#messages").scrollHeight;
+}
+
+function appendApprovalTerminal(article, eventPayload) {
+  const payload = eventPayload.payload || eventPayload;
+  appendToolTerminal(article, {
+    payload: {
+      index: payload.index,
+      tool: payload.tool,
+      args: payload.action?.args || {},
+    },
+  });
+  const terminal = article?.querySelector(`.tool-terminal[data-tool-index="${payload.index || ""}"]`);
+  const body = terminal?.querySelector(".tool-terminal-body");
+  const status = terminal?.querySelector(".tool-terminal-status");
+  terminal?.classList.add("is-waiting");
+  if (status) status.textContent = "approval";
+  if (body) body.textContent += `\n\napproval required\n${payload.reason || ""}`;
+}
+
+function setMessagePending(article, text) {
+  const paragraph = article?.querySelector("p");
+  if (paragraph) paragraph.textContent = text;
+}
+
+function appendMessageText(article, delta) {
+  const paragraph = article?.querySelector("p");
+  if (!paragraph) return;
+  paragraph.textContent += delta;
+  document.querySelector("#messages").scrollTop = document.querySelector("#messages").scrollHeight;
+}
+
+function appendInlineReasoning(article, delta) {
+  const block = article?.querySelector(".message-reasoning");
+  const body = block?.querySelector("pre");
+  const status = block?.querySelector(".message-reasoning-status");
+  if (!body) return;
+  body.textContent += delta;
+  if (status) status.textContent = "streaming";
+  document.querySelector("#messages").scrollTop = document.querySelector("#messages").scrollHeight;
+}
+
+function finishInlineReasoning(article, reasoning) {
+  const block = article?.querySelector(".message-reasoning");
+  const body = block?.querySelector("pre");
+  const status = block?.querySelector(".message-reasoning-status");
+  if (!body) return;
+  body.textContent = reasoning?.trim() || body.textContent.trim() || "Размышления не были получены.";
+  if (status) status.textContent = "done";
+}
+
+async function refreshEvents(taskId) {
+  const events = await jsonFetch(`/v1/tasks/${taskId}/events`);
+  const list = document.querySelector("#events");
+  if (!list) return events;
+
+  list.innerHTML = "";
+  for (const event of events) {
+    const item = document.createElement("li");
+    const title = document.createElement("strong");
+    const detail = document.createElement("span");
+    title.textContent = `${event.sequence}. ${event.event_type}`;
+    detail.textContent = summarizeEvent(event.payload);
+    item.append(title, detail);
+    list.appendChild(item);
+  }
+  return events;
+}
+
+function summarizeEvent(payload) {
+  if (!payload || typeof payload !== "object") return "";
+  if (payload.role && payload.latency_ms) {
+    return `${payload.role} · ${Math.round(payload.latency_ms)} ms`;
+  }
+  if (payload.content) {
+    return payload.content.slice(0, 140);
+  }
+  if (payload.final_response) {
+    return payload.final_response.slice(0, 140);
+  }
+  if (payload.error) {
+    return payload.error;
+  }
+  return JSON.stringify(payload);
+}
+
+function toggleInlineReasoning(button) {
+  const block = button.closest(".message-reasoning");
+  const body = block?.querySelector("pre");
+  if (!block || !body) return;
+  const expanded = button.getAttribute("aria-expanded") === "true";
+  button.setAttribute("aria-expanded", String(!expanded));
+  body.hidden = expanded;
+  block.classList.toggle("is-collapsed", expanded);
+}
+
+function parseSseBlock(block) {
+  const event = {name: "message", data: ""};
+  for (const line of block.split("\n")) {
+    if (line.startsWith("event:")) event.name = line.slice(6).trim();
+    if (line.startsWith("data:")) event.data += line.slice(5).trimStart();
+  }
+  if (!event.data) return null;
+  return {name: event.name, data: JSON.parse(event.data)};
+}
+
+async function streamChat(payload, onEvent) {
+  const response = await fetch("/v1/chat/stream", {
+    method: "POST",
+    headers: {"Content-Type": "application/json"},
+    body: JSON.stringify(payload),
+  });
+  if (!response.ok) throw new Error(await response.text());
+  if (!response.body) throw new Error("Streaming response is not available in this browser.");
+
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder();
+  let buffer = "";
+  while (true) {
+    const {value, done} = await reader.read();
+    if (done) break;
+    buffer += decoder.decode(value, {stream: true});
+    const blocks = buffer.split("\n\n");
+    buffer = blocks.pop() || "";
+    for (const block of blocks) {
+      const event = parseSseBlock(block);
+      if (event) await onEvent(event);
+    }
+  }
+  buffer += decoder.decode();
+  if (buffer.trim()) {
+    const event = parseSseBlock(buffer);
+    if (event) await onEvent(event);
+  }
+}
+
+async function sendMessage() {
+  if (state.running) return;
+  const input = document.querySelector("#message");
+  const message = input.value.trim();
+  if (!message) return;
+
+  state.running = true;
+  document.querySelector("#run").disabled = true;
+  setStatus("#task-status", "running", "warn");
+  addMessage("user", message, "submitted");
+  input.value = "";
+  const pending = addMessage("assistant", "", "thinking", {reasoning: true});
+  let taskId = "";
+  let contentStarted = false;
+
+  try {
+    await streamChat({
+      message,
+      workspace: document.querySelector("#workspace").value,
+      debug: document.querySelector("#debug").checked,
+    }, async ({name, data}) => {
+      if (data.task_id) taskId = data.task_id;
+      if (name === "task_created") {
+        taskId = data.task_id;
+        setStatus("#task-status", taskId, "warn");
+        return;
+      }
+      if (name === "reasoning_delta") {
+        pending.querySelector(".message-meta span").textContent = "reasoning";
+        appendInlineReasoning(pending, data.delta || "");
+        return;
+      }
+      if (name === "tool_call_started") {
+        pending.querySelector(".message-meta span").textContent = "tool";
+        appendToolTerminal(pending, data);
+        return;
+      }
+      if (name === "tool_call_finished") {
+        pending.querySelector(".message-meta span").textContent = "tool";
+        updateToolTerminal(pending, data);
+        return;
+      }
+      if (name === "tool_approval_requested") {
+        pending.querySelector(".message-meta span").textContent = "approval";
+        appendApprovalTerminal(pending, data);
+        return;
+      }
+      if (name === "content_delta") {
+        if (!contentStarted) {
+          contentStarted = true;
+          setMessagePending(pending, "");
+        }
+        pending.querySelector(".message-meta span").textContent = "answering";
+        appendMessageText(pending, data.delta || "");
+        return;
+      }
+      if (name === "done") {
+        if (!contentStarted) {
+          setMessagePending(pending, data.final_response || "No final content returned.");
+        }
+        pending.querySelector(".message-meta span").textContent = data.status;
+        setStatus("#task-status", data.task_id, data.status === "completed" ? "ok" : "warn");
+        finishInlineReasoning(pending, data.reasoning_content);
+        await refreshEvents(data.task_id);
+        return;
+      }
+      if (name === "error") {
+        throw new Error(data.error || "Stream failed.");
+      }
+    });
+  } catch (error) {
+    if (!taskId) input.value = message;
+    setMessagePending(pending, error.message);
+    pending.querySelector(".message-meta span").textContent = "failed";
+    setStatus("#task-status", "failed", "bad");
+    if (taskId) await refreshEvents(taskId);
+  } finally {
+    state.running = false;
+    document.querySelector("#run").disabled = false;
+    input.focus();
+  }
+}
+
+async function checkRuntime() {
+  try {
+    await jsonFetch("/health");
+    setStatus("#api-status", "online", "ok");
+  } catch {
+    setStatus("#api-status", "offline", "bad");
+  }
+
+  try {
+    const roles = await jsonFetch("/v1/models/ping");
+    const ok = Object.values(roles).every((item) => item.ok);
+    setStatus("#model-status", ok ? "online" : "degraded", ok ? "ok" : "warn");
+  } catch {
+    setStatus("#model-status", "offline", "bad");
+  }
+}
+
+function bindChat() {
+  const composer = document.querySelector("#composer");
+  const input = document.querySelector("#message");
+  composer?.addEventListener("submit", (event) => {
+    event.preventDefault();
+    sendMessage();
+  });
+  input?.addEventListener("keydown", (event) => {
+    if (event.key === "Enter" && !event.shiftKey) {
+      event.preventDefault();
+      sendMessage();
+    }
+  });
+  document.querySelector("#new-chat")?.addEventListener("click", () => {
+    const messages = document.querySelector("#messages");
+    messages.innerHTML = "";
+    addMessage("assistant", "Новая сессия готова.", "ready");
+    document.querySelector("#events").innerHTML = "";
+    setStatus("#task-status", "none");
+  });
+  document.querySelector("#messages")?.addEventListener("click", (event) => {
+    const button = event.target.closest(".message-reasoning-toggle");
+    if (button) toggleInlineReasoning(button);
+  });
+  document.querySelector("#debug")?.addEventListener("change", (event) => {
+    document.querySelector("#debug-panel").hidden = !event.target.checked;
+  });
+}
+
+async function loadSimplePages() {
+  const skills = document.querySelector("#skills");
+  if (skills) skills.textContent = JSON.stringify(await jsonFetch("/v1/skills"), null, 2);
+  const experience = document.querySelector("#experience");
+  if (experience) experience.textContent = JSON.stringify(await jsonFetch("/v1/experience"), null, 2);
+  const approvals = document.querySelector("#approvals");
+  if (approvals) await renderApprovals(approvals);
+}
+
+async function renderApprovals(container) {
+  const approvals = await jsonFetch("/v1/approvals/pending");
+  container.innerHTML = "";
+  if (!approvals.length) {
+    const empty = document.createElement("p");
+    empty.className = "empty-state";
+    empty.textContent = "No pending approvals.";
+    container.append(empty);
+    return;
+  }
+
+  for (const approval of approvals) {
+    const card = document.createElement("article");
+    card.className = "approval-card";
+    card.dataset.approvalId = approval.approval_id;
+
+    const header = document.createElement("div");
+    header.className = "approval-card-header";
+    const title = document.createElement("h2");
+    title.textContent = approval.normalized_action?.tool || "Tool action";
+    const status = document.createElement("span");
+    status.textContent = approval.status;
+    header.append(title, status);
+
+    const meta = document.createElement("dl");
+    meta.className = "approval-meta";
+    meta.append(metaRow("Task", approval.task_id));
+    meta.append(metaRow("Approval", approval.approval_id));
+    meta.append(metaRow("Created", approval.created_at));
+
+    const action = document.createElement("pre");
+    action.className = "approval-action";
+    action.textContent = JSON.stringify(approval.normalized_action, null, 2);
+
+    const actions = document.createElement("div");
+    actions.className = "approval-actions";
+    actions.append(
+      approvalButton("Allow once", "allow_once"),
+      approvalButton("Allow forever", "allow_forever"),
+      approvalButton("Deny", "deny", "danger"),
+    );
+
+    card.append(header, meta, action, actions);
+    container.append(card);
+  }
+}
+
+function metaRow(label, value) {
+  const row = document.createElement("div");
+  const dt = document.createElement("dt");
+  const dd = document.createElement("dd");
+  dt.textContent = label;
+  dd.textContent = value || "";
+  row.append(dt, dd);
+  return row;
+}
+
+function approvalButton(label, action, tone = "") {
+  const button = document.createElement("button");
+  button.type = "button";
+  button.textContent = label;
+  button.dataset.approvalAction = action;
+  if (tone) button.dataset.tone = tone;
+  return button;
+}
+
+document.querySelector("#approvals")?.addEventListener("click", async (event) => {
+  const button = event.target.closest("[data-approval-action]");
+  if (!button) return;
+  const card = button.closest(".approval-card");
+  const approvalId = card?.dataset.approvalId;
+  if (!approvalId) return;
+
+  button.disabled = true;
+  const action = button.dataset.approvalAction;
+  await jsonFetch(`/v1/approvals/${approvalId}/${action}`, {method: "POST"});
+  await renderApprovals(document.querySelector("#approvals"));
+});
+
+document.querySelector("#memory-search")?.addEventListener("click", async () => {
+  const q = document.querySelector("#memory-query").value;
+  document.querySelector("#memory-results").textContent =
+    JSON.stringify(await jsonFetch(`/v1/memory/search?q=${encodeURIComponent(q)}`), null, 2);
+});
+
+bindChat();
+checkRuntime();
+loadSimplePages().catch(console.error);
diff --git a/duck_core/web/static/style.css b/duck_core/web/static/style.css
new file mode 100644
index 0000000..7429f98
--- /dev/null
+++ b/duck_core/web/static/style.css
@@ -0,0 +1,673 @@
+:root {
+  color-scheme: light;
+  --bg: #eef2f6;
+  --sidebar: #111827;
+  --sidebar-soft: #1f2937;
+  --panel: #ffffff;
+  --panel-strong: #f8fafc;
+  --text: #111827;
+  --muted: #64748b;
+  --border: #d7dee8;
+  --accent: #1f6feb;
+  --accent-strong: #174ea6;
+  --ok: #12805c;
+  --warn: #b7791f;
+  --bad: #b42318;
+  --shadow: 0 18px 50px rgba(15, 23, 42, 0.14);
+}
+
+* { box-sizing: border-box; }
+
+body {
+  margin: 0;
+  min-height: 100vh;
+  font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+  background: var(--bg);
+  color: var(--text);
+}
+
+.simple-page {
+  max-width: 980px;
+  margin: 0 auto;
+  padding: 28px;
+}
+
+.simple-header {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 16px;
+  margin-bottom: 18px;
+}
+
+.simple-header h1,
+.simple-header p {
+  margin: 0;
+}
+
+.simple-header h1 {
+  font-size: 24px;
+}
+
+.simple-header p {
+  margin-top: 4px;
+  color: var(--muted);
+}
+
+.approval-list {
+  display: grid;
+  gap: 14px;
+}
+
+.approval-card {
+  display: grid;
+  gap: 14px;
+  padding: 16px;
+  background: var(--panel);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  box-shadow: var(--shadow);
+}
+
+.approval-card-header {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 12px;
+}
+
+.approval-card h2 {
+  margin: 0;
+  font-size: 17px;
+}
+
+.approval-card-header span {
+  padding: 3px 8px;
+  border-radius: 999px;
+  background: #fef3c7;
+  color: #854d0e;
+  font-size: 12px;
+  font-weight: 800;
+}
+
+.approval-meta {
+  display: grid;
+  gap: 6px;
+}
+
+.approval-meta div {
+  justify-content: flex-start;
+}
+
+.approval-meta dd {
+  max-width: none;
+  color: var(--text);
+}
+
+.approval-action {
+  margin: 0;
+  max-height: 220px;
+  overflow: auto;
+  padding: 12px;
+  background: #0f172a;
+  border-radius: 8px;
+  color: #d1fae5;
+  font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
+  font-size: 12px;
+  line-height: 1.5;
+}
+
+.approval-actions {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 10px;
+}
+
+.approval-actions button {
+  border: 0;
+  border-radius: 8px;
+  padding: 9px 12px;
+  background: var(--accent);
+  color: #ffffff;
+  font-weight: 750;
+}
+
+.approval-actions button[data-tone="danger"] {
+  background: var(--bad);
+}
+
+.approval-actions button:disabled {
+  cursor: wait;
+  opacity: 0.65;
+}
+
+.empty-state {
+  margin: 0;
+  padding: 16px;
+  background: var(--panel);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  color: var(--muted);
+}
+
+button, input, textarea {
+  font: inherit;
+}
+
+button {
+  cursor: pointer;
+}
+
+.app-shell {
+  display: grid;
+  grid-template-columns: 292px minmax(0, 1fr);
+  min-height: 100vh;
+}
+
+.sidebar {
+  display: flex;
+  flex-direction: column;
+  gap: 18px;
+  min-height: 100vh;
+  padding: 22px;
+  background: var(--sidebar);
+  color: #e5edf7;
+}
+
+.brand {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+  padding-bottom: 12px;
+  border-bottom: 1px solid rgba(255,255,255,0.12);
+}
+
+.brand-mark, .avatar {
+  display: grid;
+  place-items: center;
+  width: 36px;
+  height: 36px;
+  border-radius: 8px;
+  font-weight: 800;
+}
+
+.brand-mark {
+  background: #f8fafc;
+  color: #111827;
+}
+
+.brand h1, .brand p,
+.chat-header h2, .chat-header p,
+.settings-panel h2, .status-panel h2 {
+  margin: 0;
+}
+
+.brand h1 {
+  font-size: 18px;
+  line-height: 1.2;
+}
+
+.brand p {
+  margin-top: 2px;
+  color: #9ca3af;
+  font-size: 12px;
+}
+
+.side-nav {
+  display: grid;
+  gap: 6px;
+}
+
+.side-nav a {
+  color: #cbd5e1;
+  text-decoration: none;
+  padding: 10px 12px;
+  border-radius: 7px;
+  font-size: 14px;
+}
+
+.side-nav a:hover,
+.side-nav a.active {
+  background: var(--sidebar-soft);
+  color: #ffffff;
+}
+
+.settings-panel,
+.status-panel {
+  display: grid;
+  gap: 12px;
+  padding: 14px;
+  background: rgba(255,255,255,0.06);
+  border: 1px solid rgba(255,255,255,0.10);
+  border-radius: 8px;
+}
+
+.settings-panel h2,
+.status-panel h2 {
+  font-size: 13px;
+  color: #f8fafc;
+}
+
+label {
+  display: grid;
+  gap: 7px;
+  font-size: 13px;
+  font-weight: 650;
+}
+
+.toggle-row {
+  grid-template-columns: auto 1fr;
+  align-items: center;
+  font-weight: 500;
+  color: #cbd5e1;
+}
+
+input,
+textarea {
+  width: 100%;
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  padding: 11px 12px;
+  background: #ffffff;
+  color: var(--text);
+}
+
+.sidebar input {
+  border-color: rgba(255,255,255,0.16);
+  background: rgba(255,255,255,0.08);
+  color: #ffffff;
+}
+
+dl {
+  display: grid;
+  gap: 9px;
+  margin: 0;
+}
+
+dl div {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 12px;
+}
+
+dt {
+  color: #9ca3af;
+  font-size: 12px;
+}
+
+dd {
+  margin: 0;
+  max-width: 160px;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+  color: #e5edf7;
+  font-size: 12px;
+}
+
+[data-tone="ok"] { color: #86efac; }
+[data-tone="warn"] { color: #fde68a; }
+[data-tone="bad"] { color: #fca5a5; }
+
+.chat-shell {
+  display: grid;
+  grid-template-rows: auto minmax(0, 1fr) auto auto;
+  gap: 16px;
+  min-width: 0;
+  height: 100vh;
+  padding: 22px;
+}
+
+.chat-header {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 16px;
+  padding: 18px 20px;
+  background: var(--panel);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  box-shadow: var(--shadow);
+}
+
+.chat-header h2 {
+  font-size: 20px;
+}
+
+.chat-header p {
+  margin-top: 4px;
+  color: var(--muted);
+  font-size: 13px;
+}
+
+.secondary-button,
+.composer button {
+  border: 0;
+  border-radius: 8px;
+  padding: 10px 14px;
+  font-weight: 750;
+}
+
+.secondary-button {
+  background: #edf2f7;
+  color: #1f2937;
+}
+
+.messages {
+  display: flex;
+  flex-direction: column;
+  gap: 14px;
+  min-height: 0;
+  overflow-y: auto;
+  padding: 18px;
+  background: var(--panel);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  box-shadow: var(--shadow);
+}
+
+.message {
+  display: grid;
+  grid-template-columns: 36px minmax(0, 1fr);
+  gap: 10px;
+  max-width: 860px;
+}
+
+.message.user {
+  align-self: flex-end;
+  grid-template-columns: minmax(0, 1fr) 36px;
+}
+
+.message.user .avatar {
+  grid-column: 2;
+  grid-row: 1;
+  background: #dbeafe;
+  color: #1d4ed8;
+}
+
+.message.assistant .avatar {
+  background: #e5e7eb;
+  color: #111827;
+}
+
+.message.user .bubble {
+  grid-column: 1;
+  grid-row: 1;
+  background: #eff6ff;
+  border-color: #bfdbfe;
+}
+
+.bubble {
+  padding: 12px 14px;
+  background: var(--panel-strong);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+}
+
+.bubble p {
+  margin: 8px 0 0;
+  white-space: pre-wrap;
+  overflow-wrap: anywhere;
+  line-height: 1.5;
+}
+
+.message-reasoning {
+  display: grid;
+  gap: 8px;
+  margin-top: 10px;
+  padding: 9px 10px;
+  background: #f1f5f9;
+  border: 1px solid #dbe3ee;
+  border-radius: 8px;
+}
+
+.message-reasoning.is-collapsed {
+  gap: 0;
+}
+
+.message-reasoning-toggle {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 12px;
+  width: 100%;
+  border: 0;
+  padding: 0;
+  background: transparent;
+  color: #475569;
+  font-size: 12px;
+  font-weight: 750;
+  text-align: left;
+}
+
+.message-reasoning-status {
+  flex: 0 0 auto;
+  padding: 2px 7px;
+  border-radius: 999px;
+  background: #e2e8f0;
+  color: #64748b;
+  font-size: 11px;
+}
+
+.message-reasoning pre {
+  margin: 0;
+  max-height: 220px;
+  overflow: auto;
+  color: #334155;
+  font-size: 12px;
+  line-height: 1.45;
+  white-space: pre-wrap;
+  overflow-wrap: anywhere;
+}
+
+.tool-terminal {
+  margin-top: 10px;
+  overflow: hidden;
+  background: #0f172a;
+  border: 1px solid #1e293b;
+  border-radius: 8px;
+  box-shadow: inset 0 1px 0 rgba(255,255,255,0.05);
+}
+
+.tool-terminal-header {
+  display: grid;
+  grid-template-columns: auto minmax(0, 1fr) auto;
+  align-items: center;
+  gap: 10px;
+  min-height: 34px;
+  padding: 8px 10px;
+  background: #111827;
+  border-bottom: 1px solid #1e293b;
+}
+
+.terminal-dots {
+  display: flex;
+  gap: 5px;
+}
+
+.terminal-dots i {
+  width: 9px;
+  height: 9px;
+  border-radius: 999px;
+}
+
+.terminal-dots i:nth-child(1) { background: #ef4444; }
+.terminal-dots i:nth-child(2) { background: #f59e0b; }
+.terminal-dots i:nth-child(3) { background: #22c55e; }
+
+.tool-terminal-title {
+  min-width: 0;
+  overflow: hidden;
+  color: #d1d5db;
+  font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
+  font-size: 12px;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+
+.tool-terminal-status {
+  padding: 2px 7px;
+  border-radius: 999px;
+  background: #1d4ed8;
+  color: #dbeafe;
+  font-size: 11px;
+  font-weight: 800;
+}
+
+.tool-terminal.is-error .tool-terminal-status {
+  background: #7f1d1d;
+  color: #fecaca;
+}
+
+.tool-terminal.is-waiting .tool-terminal-status {
+  background: #854d0e;
+  color: #fef3c7;
+}
+
+.tool-terminal-body {
+  margin: 0;
+  max-height: 220px;
+  overflow: auto;
+  padding: 10px 12px;
+  color: #d1fae5;
+  font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
+  font-size: 12px;
+  line-height: 1.55;
+  white-space: pre-wrap;
+  overflow-wrap: anywhere;
+}
+
+.message-meta {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 12px;
+  color: var(--muted);
+  font-size: 12px;
+}
+
+.message-meta strong {
+  color: var(--text);
+  font-size: 13px;
+}
+
+.debug-panel {
+  display: grid;
+  grid-template-columns: minmax(0, 1fr);
+  gap: 16px;
+  min-height: 180px;
+}
+
+.debug-column {
+  min-width: 0;
+  padding: 14px;
+  background: var(--panel);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+}
+
+.debug-column h3 {
+  margin: 0 0 10px;
+  font-size: 13px;
+}
+
+pre,
+#events {
+  margin: 0;
+  max-height: 170px;
+  overflow: auto;
+  color: #334155;
+  font-size: 12px;
+  line-height: 1.45;
+  white-space: pre-wrap;
+  overflow-wrap: anywhere;
+}
+
+#events {
+  display: grid;
+  gap: 8px;
+  padding-left: 18px;
+}
+
+#events li strong,
+#events li span {
+  display: block;
+}
+
+#events li span {
+  color: var(--muted);
+}
+
+.composer {
+  display: grid;
+  gap: 10px;
+  padding: 14px;
+  background: var(--panel);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  box-shadow: var(--shadow);
+}
+
+.composer textarea {
+  min-height: 86px;
+  resize: vertical;
+}
+
+.composer-actions {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 12px;
+}
+
+#composer-hint {
+  color: var(--muted);
+  font-size: 12px;
+}
+
+.composer button {
+  min-width: 96px;
+  background: var(--accent);
+  color: #ffffff;
+}
+
+.composer button:hover {
+  background: var(--accent-strong);
+}
+
+.composer button:disabled {
+  cursor: wait;
+  opacity: 0.7;
+}
+
+[hidden] {
+  display: none !important;
+}
+
+@media (max-width: 860px) {
+  .app-shell {
+    grid-template-columns: 1fr;
+  }
+
+  .sidebar {
+    min-height: auto;
+  }
+
+  .chat-shell {
+    height: auto;
+    min-height: 100vh;
+  }
+
+  .chat-header,
+  .debug-panel,
+  .composer-actions {
+    grid-template-columns: 1fr;
+    flex-direction: column;
+    align-items: stretch;
+  }
+
+  .debug-panel {
+    display: grid;
+  }
+}
diff --git a/duck_core/web/templates/approvals.html b/duck_core/web/templates/approvals.html
new file mode 100644
index 0000000..9142e80
--- /dev/null
+++ b/duck_core/web/templates/approvals.html
@@ -0,0 +1,22 @@
+
+
+  
+    
+    
+    DuckLM Approvals
+    
+  
+  
+    
+
+
+

Approvals

+

Review pending local tool actions before DuckLM continues.

+
+ Back to Chat +
+
+
+ + + diff --git a/duck_core/web/templates/experience.html b/duck_core/web/templates/experience.html new file mode 100644 index 0000000..ceadc09 --- /dev/null +++ b/duck_core/web/templates/experience.html @@ -0,0 +1,2 @@ + +DuckLM Experience

Experience

diff --git a/duck_core/web/templates/index.html b/duck_core/web/templates/index.html new file mode 100644 index 0000000..8ffdd9b --- /dev/null +++ b/duck_core/web/templates/index.html @@ -0,0 +1,99 @@ + + + + + + DuckLM WebChat + + + +
+ + +
+
+
+

Chat

+

Messages are processed by the local Qwen role mapping through Duck Core.

+
+ +
+ +
+
+
D
+
+
+ DuckLM + ready +
+

Готов. Напиши задачу, я отправлю её в локальный runtime и покажу ответ, reasoning и timeline.

+
+
+
+ +
+
+

Event Timeline

+
    +
    +
    + +
    + +
    + Enter sends, Shift+Enter inserts a new line + +
    +
    +
    +
    + + + diff --git a/duck_core/web/templates/memory.html b/duck_core/web/templates/memory.html new file mode 100644 index 0000000..2053b08 --- /dev/null +++ b/duck_core/web/templates/memory.html @@ -0,0 +1,2 @@ + +DuckLM Memory

    Memory

    diff --git a/duck_core/web/templates/skills.html b/duck_core/web/templates/skills.html new file mode 100644 index 0000000..f73f5b4 --- /dev/null +++ b/duck_core/web/templates/skills.html @@ -0,0 +1,2 @@ + +DuckLM Skills

    Skills

    diff --git a/duck_core/web/templates/task.html b/duck_core/web/templates/task.html new file mode 100644 index 0000000..4a0b720 --- /dev/null +++ b/duck_core/web/templates/task.html @@ -0,0 +1,2 @@ + +DuckLM Task

    Task

    diff --git a/main.py b/main.py deleted file mode 100644 index f11b955..0000000 --- a/main.py +++ /dev/null @@ -1,5 +0,0 @@ -from app.api.server import app - - -__all__ = ["app"] - diff --git a/prompts/roles/action.md b/prompts/roles/action.md new file mode 100644 index 0000000..41cfab3 --- /dev/null +++ b/prompts/roles/action.md @@ -0,0 +1,16 @@ +You are DuckLM action role. Return only valid JSON matching the requested schema. + +Your job is to decide whether the user request needs local tool execution before +the thinker answers. + +Available tools: +- file_read: read a file inside the current workspace. + Args: {"path": "relative/path.txt"} +- file_write: write a file inside the current workspace. + Args: {"path": "relative/path.txt", "content": "text", "overwrite": false} +- shell_exec_safe: run a safe allowlisted shell command in the current workspace. + Args: {"command": "pwd"} + +Return actions=[] when the user can be answered directly without tools. +Use only the listed tools. Keep actions minimal and directly tied to the user's +request. Do not invent tool names. diff --git a/prompts/roles/coder.md b/prompts/roles/coder.md new file mode 100644 index 0000000..3b575f2 --- /dev/null +++ b/prompts/roles/coder.md @@ -0,0 +1,2 @@ +You are DuckLM, a local cognitive runtime running over a local language model. +Your current logical role is coder. Produce practical code-oriented answers grounded in context. diff --git a/prompts/roles/critic.md b/prompts/roles/critic.md new file mode 100644 index 0000000..82a133e --- /dev/null +++ b/prompts/roles/critic.md @@ -0,0 +1,2 @@ +You are DuckLM, a local cognitive runtime running over a local language model. +Your current logical role is critic. Reflect on results, risks, waste, and reusable lessons. diff --git a/prompts/roles/summary.md b/prompts/roles/summary.md new file mode 100644 index 0000000..1d017de --- /dev/null +++ b/prompts/roles/summary.md @@ -0,0 +1 @@ +You are DuckLM summary role. Compress task context without losing decisions or outcomes. diff --git a/prompts/roles/thinker.md b/prompts/roles/thinker.md new file mode 100644 index 0000000..5fd0dbf --- /dev/null +++ b/prompts/roles/thinker.md @@ -0,0 +1,7 @@ +You are DuckLM, a local cognitive runtime running over a local language model. +Your current logical role is thinker. + +When asked who or what you are, answer as DuckLM. You may mention that DuckLM is +powered by a local model, but do not answer as the base model identity. + +Respond clearly and briefly unless the task needs detail. diff --git a/pyproject.toml b/pyproject.toml index a9f2e9b..5c2c324 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,29 +1,31 @@ [project] name = "ducklm" version = "0.1.0" -description = "Local event-driven multi-model execution runtime" +description = "Local agent runtime with WebChat, API, tools, memory and experience" requires-python = ">=3.11" dependencies = [ - "fastapi>=0.115", - "httpx>=0.28", - "pydantic>=2.7", - "uvicorn>=0.30", - "websockets>=15.0", - "llama-cpp-python>=0.2.0", - "hnswlib>=0.8.0", - "sentence-transformers>=3.0", - "numpy>=1.26", + "fastapi", + "uvicorn", + "httpx", + "pydantic", + "pyyaml", + "jinja2", + "python-dotenv", + "jsonschema", + "aiosqlite", + "qdrant-client" ] -[build-system] -requires = ["setuptools>=68"] -build-backend = "setuptools.build_meta" - -[tool.setuptools] -packages = ["app"] - -[tool.setuptools.package-dir] -"" = "." +[project.optional-dependencies] +dev = [ + "pytest", + "pytest-asyncio", + "ruff" +] [tool.pytest.ini_options] -pythonpath = ["."] +asyncio_mode = "auto" +testpaths = ["tests"] + +[tool.setuptools.packages.find] +include = ["duck_core*"] diff --git a/scripts/bench/bench_runtime.py b/scripts/bench/bench_runtime.py new file mode 100644 index 0000000..2c491b8 --- /dev/null +++ b/scripts/bench/bench_runtime.py @@ -0,0 +1,34 @@ +import asyncio +import time + +from duck_core.model_client import ModelClient + + +TASKS = [ + "Скажи коротко, что ты DuckLM.", + "Создай tmp/duck_test_note.md с текстом hello duck и прочитай его обратно.", + "Посмотри структуру проекта и кратко опиши модули.", + "Найди TODO/FIXME в проекте.", + "Запусти тесты и кратко объясни результат.", +] + + +async def main() -> None: + client = ModelClient() + print("role -> base_url/model") + for role, cfg in client._roles.items(): + print(f"{role} -> {cfg.base_url}/{cfg.model}") + started = time.perf_counter() + print(f"test_tasks={len(TASKS)}") + print("llm_calls=0") + print("tool_calls=0") + print("json_directive_validity=not_run") + print("retry_count=0") + print("memory_writes=0") + print("experience_record_created=no") + print("selected_skill=not_run") + print(f"total_runtime_seconds={time.perf_counter() - started:.3f}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/scripts/llama/build_vulkan.sh b/scripts/llama/build_vulkan.sh new file mode 100755 index 0000000..90273b8 --- /dev/null +++ b/scripts/llama/build_vulkan.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +LLAMA_DIR="${ROOT_DIR}/vendor/llama.cpp" + +if [[ ! -d "${LLAMA_DIR}/.git" ]]; then + git clone --depth 1 https://github.com/ggml-org/llama.cpp "${LLAMA_DIR}" +fi + +cmake -S "${LLAMA_DIR}" -B "${LLAMA_DIR}/build" \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_VULKAN=ON \ + -DGGML_NATIVE=ON \ + -DLLAMA_BUILD_TESTS=OFF + +cmake --build "${LLAMA_DIR}/build" --config Release --target llama-server -j "$(nproc)" + +"${LLAMA_DIR}/build/bin/llama-server" --list-devices diff --git a/scripts/llama/healthcheck.sh b/scripts/llama/healthcheck.sh new file mode 100755 index 0000000..f63dcdb --- /dev/null +++ b/scripts/llama/healthcheck.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="${1:-http://127.0.0.1:8081/v1}" + +curl --noproxy "*" -fsS "${BASE_URL}/models" >/dev/null + +echo "OK: ${BASE_URL}" diff --git a/scripts/llama/start_main.sh b/scripts/llama/start_main.sh new file mode 100755 index 0000000..09c1191 --- /dev/null +++ b/scripts/llama/start_main.sh @@ -0,0 +1,260 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" + +ENV_KEYS=( + DUCK_LLAMA_SERVER_BIN + DUCK_MAIN_MODEL_PATH + DUCK_MAIN_PORT + DUCK_CTX_SIZE + DUCK_N_GPU_LAYERS + DUCK_LLAMA_DEVICE + DUCK_PARALLEL + DUCK_LLAMA_PID_FILE + DUCK_LLAMA_LOG_FILE + DUCK_LLAMA_EXTRA_ARGS + DUCK_HOST +) +declare -A ENV_OVERRIDES=() +for key in "${ENV_KEYS[@]}"; do + if [[ -v "${key}" ]]; then + ENV_OVERRIDES["${key}"]="${!key}" + fi +done + +if [[ -f "${ROOT_DIR}/.env" ]]; then + set -a + # shellcheck disable=SC1091 + source "${ROOT_DIR}/.env" + set +a +fi +for key in "${!ENV_OVERRIDES[@]}"; do + export "${key}=${ENV_OVERRIDES[${key}]}" +done + +ACTION="${1:-start}" +PID_FILE="${DUCK_LLAMA_PID_FILE:-${ROOT_DIR}/data/llama-main.pid}" +LOG_FILE="${DUCK_LLAMA_LOG_FILE:-${ROOT_DIR}/data/llama-main.log}" +BASE_URL="http://${DUCK_HOST:-127.0.0.1}:${DUCK_MAIN_PORT:-8081}/v1" + +resolve_project_path() { + local value="$1" + if [[ "${value}" == /* ]]; then + printf '%s\n' "${value}" + else + printf '%s\n' "${ROOT_DIR}/${value#./}" + fi +} + +usage() { + cat <<'EOF' +Usage: scripts/llama/start_main.sh + +Commands: + start Start llama-server in the background + stop Stop the managed llama-server process + restart Stop and start llama-server + status Print process and HTTP health status + logs Show logs; use --follow/-f and --lines N + help Show this help + +Environment: + DUCK_LLAMA_SERVER_BIN Path to llama-server binary + DUCK_MAIN_MODEL_PATH Path to GGUF model + DUCK_HOST Bind host, default 127.0.0.1 + DUCK_MAIN_PORT Port, default 8081 + DUCK_CTX_SIZE Context size, default 65536 + DUCK_N_GPU_LAYERS GPU layers, default auto + DUCK_LLAMA_DEVICE Device name, for example Vulkan0 + DUCK_PARALLEL Server slots, default 1 + DUCK_LLAMA_PID_FILE PID file path + DUCK_LLAMA_LOG_FILE Log file path + DUCK_LLAMA_EXTRA_ARGS Extra llama-server args +EOF +} + +is_running() { + [[ -f "${PID_FILE}" ]] || return 1 + local pid + pid="$(cat "${PID_FILE}")" + [[ "${pid}" =~ ^[0-9]+$ ]] || return 1 + kill -0 "${pid}" 2>/dev/null +} + +pid_value() { + if [[ -f "${PID_FILE}" ]]; then + cat "${PID_FILE}" + fi +} + +status() { + if is_running; then + local pid + pid="$(pid_value)" + echo "llama-server running: pid=${pid}" + if command -v curl >/dev/null 2>&1 && curl --noproxy "*" -fsS "${BASE_URL}/models" >/dev/null 2>&1; then + echo "HTTP health: ok (${BASE_URL})" + else + echo "HTTP health: not ready (${BASE_URL})" + fi + return 0 + fi + + if [[ -f "${PID_FILE}" ]]; then + echo "llama-server not running; removing stale pid file ${PID_FILE}" + rm -f "${PID_FILE}" + else + echo "llama-server not running" + fi + return 3 +} + +start() { + if is_running; then + echo "llama-server already running: pid=$(pid_value)" + return 0 + fi + + : "${DUCK_MAIN_MODEL_PATH:?DUCK_MAIN_MODEL_PATH is required}" + + mkdir -p "$(dirname "${PID_FILE}")" "$(dirname "${LOG_FILE}")" + rm -f "${PID_FILE}" + + local llama_bin model_path + llama_bin="${DUCK_LLAMA_SERVER_BIN:-llama-server}" + if [[ "${llama_bin}" == */* ]]; then + llama_bin="$(resolve_project_path "${llama_bin}")" + fi + model_path="$(resolve_project_path "${DUCK_MAIN_MODEL_PATH}")" + local command=( + "${llama_bin}" + -m "${model_path}" + --alias local-main + --host "${DUCK_HOST:-127.0.0.1}" + --port "${DUCK_MAIN_PORT:-8081}" + -c "${DUCK_CTX_SIZE:-65536}" + --parallel "${DUCK_PARALLEL:-1}" + -ngl "${DUCK_N_GPU_LAYERS:-auto}" + --flash-attn on + --cache-prompt + --metrics + ) + if [[ -n "${DUCK_LLAMA_DEVICE:-}" ]]; then + command+=(--device "${DUCK_LLAMA_DEVICE}") + fi + if [[ -n "${DUCK_LLAMA_EXTRA_ARGS:-}" ]]; then + # shellcheck disable=SC2206 + local extra_args=( ${DUCK_LLAMA_EXTRA_ARGS} ) + command+=("${extra_args[@]}") + fi + + echo "Starting llama-server..." + echo "Command: ${command[*]}" >> "${LOG_FILE}" + if command -v setsid >/dev/null 2>&1; then + nohup setsid "${command[@]}" >> "${LOG_FILE}" 2>&1 & + else + nohup "${command[@]}" >> "${LOG_FILE}" 2>&1 & + fi + local pid=$! + echo "${pid}" > "${PID_FILE}" + sleep 0.2 + + if is_running; then + echo "llama-server started: pid=${pid}" + echo "Log: ${LOG_FILE}" + return 0 + fi + + echo "llama-server failed to start. See ${LOG_FILE}" >&2 + rm -f "${PID_FILE}" + return 1 +} + +stop() { + if ! is_running; then + rm -f "${PID_FILE}" + echo "llama-server not running" + return 0 + fi + + local pid + pid="$(pid_value)" + echo "Stopping llama-server: pid=${pid}" + kill "${pid}" 2>/dev/null || true + + for _ in {1..30}; do + if ! kill -0 "${pid}" 2>/dev/null; then + rm -f "${PID_FILE}" + echo "llama-server stopped" + return 0 + fi + sleep 0.2 + done + + echo "llama-server did not stop after SIGTERM; sending SIGKILL" + kill -9 "${pid}" 2>/dev/null || true + rm -f "${PID_FILE}" + echo "llama-server stopped" +} + +restart() { + stop + start +} + +logs() { + local follow=0 + local lines=100 + shift || true + while [[ $# -gt 0 ]]; do + case "$1" in + -f|--follow) + follow=1 + shift + ;; + --lines) + lines="${2:?--lines requires a value}" + shift 2 + ;; + *) + echo "Unknown logs argument: $1" >&2 + return 2 + ;; + esac + done + + mkdir -p "$(dirname "${LOG_FILE}")" + touch "${LOG_FILE}" + if [[ "${follow}" == "1" ]]; then + tail -n "${lines}" -f "${LOG_FILE}" + else + tail -n "${lines}" "${LOG_FILE}" + fi +} + +case "${ACTION}" in + start) + start + ;; + stop) + stop + ;; + restart) + restart + ;; + status) + status + ;; + logs) + logs "$@" + ;; + help|-h|--help) + usage + ;; + *) + echo "Unknown command: ${ACTION}" >&2 + usage >&2 + exit 2 + ;; +esac diff --git a/scripts/llama/start_thinker_mtp_experimental.sh b/scripts/llama/start_thinker_mtp_experimental.sh new file mode 100755 index 0000000..e9b4dba --- /dev/null +++ b/scripts/llama/start_thinker_mtp_experimental.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" + +ENV_KEYS=( + DUCK_LLAMA_SERVER_BIN + DUCK_MAIN_MODEL_PATH + DUCK_MTP_MODEL_PATH + DUCK_MAIN_MTP_PORT + DUCK_CTX_SIZE + DUCK_N_GPU_LAYERS + DUCK_LLAMA_DEVICE + DUCK_PARALLEL + DUCK_MTP_FLAGS + DUCK_HOST +) +declare -A ENV_OVERRIDES=() +for key in "${ENV_KEYS[@]}"; do + if [[ -v "${key}" ]]; then + ENV_OVERRIDES["${key}"]="${!key}" + fi +done + +if [[ -f "${ROOT_DIR}/.env" ]]; then + set -a + # shellcheck disable=SC1091 + source "${ROOT_DIR}/.env" + set +a +fi +for key in "${!ENV_OVERRIDES[@]}"; do + export "${key}=${ENV_OVERRIDES[${key}]}" +done + +: "${DUCK_MAIN_MODEL_PATH:?DUCK_MAIN_MODEL_PATH is required}" +ACTION="${1:-start}" + +usage() { + cat <<'EOF' +Usage: scripts/llama/start_thinker_mtp_experimental.sh + +Commands: + start Start experimental MTP/speculative llama-server in foreground + check Check whether the current llama-server binary exposes draft-mtp flags + help Show this help +EOF +} + +resolve_project_path() { + local value="$1" + if [[ "${value}" == /* ]]; then + printf '%s\n' "${value}" + else + printf '%s\n' "${ROOT_DIR}/${value#./}" + fi +} + +LLAMA_BIN="${DUCK_LLAMA_SERVER_BIN:-llama-server}" +if [[ "${LLAMA_BIN}" == */* ]]; then + LLAMA_BIN="$(resolve_project_path "${LLAMA_BIN}")" +fi +MAIN_MODEL_PATH="$(resolve_project_path "${DUCK_MAIN_MODEL_PATH}")" + +HELP_TEXT="$("${LLAMA_BIN}" --help 2>&1 || true)" +if ! grep -qi "draft-mtp" <<< "${HELP_TEXT}"; then + echo "This llama-server build does not expose draft-mtp speculative decoding." + exit 1 +fi + +case "${ACTION}" in + check) + echo "OK: draft-mtp speculative decoding is exposed by ${LLAMA_BIN}" + exit 0 + ;; + help|-h|--help) + usage + exit 0 + ;; + start) + ;; + *) + echo "Unknown command: ${ACTION}" >&2 + usage >&2 + exit 2 + ;; +esac + +command=( + "${LLAMA_BIN}" + -m "${MAIN_MODEL_PATH}" + --alias local-main-mtp + --host "${DUCK_HOST:-127.0.0.1}" + --port "${DUCK_MAIN_MTP_PORT:-8085}" + -c "${DUCK_CTX_SIZE:-65536}" + --parallel "${DUCK_PARALLEL:-1}" + -ngl "${DUCK_N_GPU_LAYERS:-auto}" + --flash-attn on + --cache-prompt + --metrics + --spec-type draft-mtp +) + +if [[ -n "${DUCK_LLAMA_DEVICE:-}" ]]; then + command+=(--device "${DUCK_LLAMA_DEVICE}") +fi + +if [[ -n "${DUCK_MTP_MODEL_PATH:-}" ]]; then + command+=(--model-draft "$(resolve_project_path "${DUCK_MTP_MODEL_PATH}")") +fi + +if [[ -n "${DUCK_MTP_FLAGS:-}" ]]; then + # shellcheck disable=SC2206 + extra_args=( ${DUCK_MTP_FLAGS} ) + command+=("${extra_args[@]}") +fi + +exec "${command[@]}" diff --git a/scripts/server.sh b/scripts/server.sh deleted file mode 100755 index 21a03bb..0000000 --- a/scripts/server.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -VENV_PYTHON="$ROOT_DIR/.venv/bin/python" -PID_FILE="$ROOT_DIR/data/runtime/server.pid" -LOG_FILE="$ROOT_DIR/data/runtime/server.log" -HOST="${HOST:-127.0.0.1}" -PORT="${PORT:-8000}" - -mkdir -p "$ROOT_DIR/data/runtime" - -export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}$ROOT_DIR/.venv/lib/python3.13/site-packages/llama_cpp/lib" -export GGML_VULKAN=1 - -is_running() { - if [[ -f "$PID_FILE" ]]; then - local pid - pid="$(cat "$PID_FILE")" - if kill -0 "$pid" >/dev/null 2>&1; then - return 0 - fi - fi - return 1 -} - -start_server() { - if is_running; then - echo "Server already running with PID $(cat "$PID_FILE")" - exit 0 - fi - nohup "$VENV_PYTHON" -m uvicorn main:app --host "$HOST" --port "$PORT" >"$LOG_FILE" 2>&1 & - echo $! >"$PID_FILE" - echo "Started server on http://$HOST:$PORT with PID $(cat "$PID_FILE")" - echo "Log: $LOG_FILE" -} - -stop_server() { - if ! is_running; then - echo "Server is not running" - rm -f "$PID_FILE" - exit 0 - fi - local pid - pid="$(cat "$PID_FILE")" - kill "$pid" - rm -f "$PID_FILE" - echo "Stopped server PID $pid" -} - -status_server() { - if is_running; then - echo "Server is running with PID $(cat "$PID_FILE") on http://$HOST:$PORT" - else - echo "Server is not running" - fi -} - -case "${1:-}" in - start) - start_server - ;; - stop) - stop_server - ;; - restart) - stop_server || true - start_server - ;; - status) - status_server - ;; - logs) - touch "$LOG_FILE" - tail -n 50 -f "$LOG_FILE" - ;; - *) - echo "Usage: $0 {start|stop|restart|status|logs}" - exit 1 - ;; -esac diff --git a/scripts/verify/verify_basic_chat.sh b/scripts/verify/verify_basic_chat.sh new file mode 100755 index 0000000..cfc8651 --- /dev/null +++ b/scripts/verify/verify_basic_chat.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}" + +curl -fsS "${BASE_URL}/health" + +curl -fsS -X POST "${BASE_URL}/v1/chat" \ + -H "Content-Type: application/json" \ + -d '{ + "message": "Скажи коротко, что ты DuckLM", + "debug": true + }' diff --git a/scripts/verify/verify_experience.sh b/scripts/verify/verify_experience.sh new file mode 100755 index 0000000..9529162 --- /dev/null +++ b/scripts/verify/verify_experience.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}" +curl -fsS "${BASE_URL}/v1/experience" diff --git a/scripts/verify/verify_file_write_read.sh b/scripts/verify/verify_file_write_read.sh new file mode 100755 index 0000000..8dfc4a9 --- /dev/null +++ b/scripts/verify/verify_file_write_read.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}" + +RESPONSE="$(curl -fsS -X POST "${BASE_URL}/v1/chat" \ + -H "Content-Type: application/json" \ + -d '{ + "message": "Создай tmp/duck_test_note.md с текстом hello duck и прочитай его обратно", + "workspace": "./workspace", + "debug": true + }')" + +echo "${RESPONSE}" diff --git a/scripts/verify/verify_memory.sh b/scripts/verify/verify_memory.sh new file mode 100755 index 0000000..1f0c637 --- /dev/null +++ b/scripts/verify/verify_memory.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}" +curl -fsS "${BASE_URL}/v1/memory/search?q=duck" diff --git a/scripts/verify/verify_models_roles.sh b/scripts/verify/verify_models_roles.sh new file mode 100755 index 0000000..0f0f654 --- /dev/null +++ b/scripts/verify/verify_models_roles.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}" +curl -fsS "${BASE_URL}/v1/models/roles" diff --git a/scripts/verify/verify_skills.sh b/scripts/verify/verify_skills.sh new file mode 100755 index 0000000..f849549 --- /dev/null +++ b/scripts/verify/verify_skills.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="${DUCK_API_URL:-http://127.0.0.1:8000}" +curl -fsS "${BASE_URL}/v1/skills" diff --git a/scripts/verify/verify_tool_blocking.sh b/scripts/verify/verify_tool_blocking.sh new file mode 100755 index 0000000..bceaaa2 --- /dev/null +++ b/scripts/verify/verify_tool_blocking.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -euo pipefail + +python3 - <<'PY' +import asyncio +from duck_core.tools.shell_exec_safe import ShellExecSafeTool + +async def main(): + result = await ShellExecSafeTool(".").run({"command": "rm -rf ."}) + assert not result.ok + print("OK: dangerous command blocked") + +asyncio.run(main()) +PY diff --git a/server.err b/server.err deleted file mode 100644 index 58def86..0000000 --- a/server.err +++ /dev/null @@ -1,274 +0,0 @@ - Loading weights: 0%| | 0/103 [00:00... - ) - ^ - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 330, in run_endpoint_function - return await run_in_threadpool(dependant.call, **values) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/concurrency.py", line 32, in run_in_threadpool - return await anyio.to_thread.run_sync(func) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/to_thread.py", line 63, in run_sync - return await get_async_backend().run_sync_in_worker_thread( - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - func, args, abandon_on_cancel=abandon_on_cancel, limiter=limiter - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - ) - ^ - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 2518, in run_sync_in_worker_thread - return await future - ^^^^^^^^^^^^ - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 1002, in run - result = context.run(func, *args) - File "/home/mirivlad/git/ducklm/app/api/server.py", line 103, in resolve_secret - return runtime.resolve_secret(task_id=request.task_id, secret=request.secret) - ~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/mirivlad/git/ducklm/app/runtime/runtime_controller.py", line 408, in resolve_secret - return self.runtime_loop.resolve_secret( - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^ - task_id=task_id, secret=secret - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - ) - ^ - File "/home/mirivlad/git/ducklm/app/runtime/runtime_loop.py", line 378, in resolve_secret - execution_result = self._execution_engine.execute( - task=task, - ...<2 lines>... - secret_override=secret, - ) - File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 87, in execute - return self._execute_plan( - ~~~~~~~~~~~~~~~~~~^ - task=task, - ^^^^^^^^^^ - ...<3 lines>... - password_override=password_override, - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - ) - ^ - File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 211, in _execute_plan - result = self._execute_tool( - task=task, - ...<3 lines>... - password_override=password_override, - ) - File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 824, in _execute_tool - tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args) - File "/home/mirivlad/git/ducklm/app/tools/plugins/shell_exec/__init__.py", line 21, in execute - completed = self._sandbox.run_shell( - command=command, - cwd=str(cwd) if cwd else None, - stdin_data=str(stdin_secret) if stdin_secret is not None else None, - ) - File "/home/mirivlad/git/ducklm/app/tools/sandbox.py", line 29, in run_shell - return subprocess.run( - ~~~~~~~~~~~~~~^ - command, - ^^^^^^^^ - ...<7 lines>... - check=False, - ^^^^^^^^^^^^ - ) - ^ - File "/usr/lib/python3.13/subprocess.py", line 556, in run - stdout, stderr = process.communicate(input, timeout=timeout) - ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^ - File "/usr/lib/python3.13/subprocess.py", line 1222, in communicate - stdout, stderr = self._communicate(input, endtime, timeout) - ~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/usr/lib/python3.13/subprocess.py", line 2129, in _communicate - self._check_timeout(endtime, orig_timeout, stdout, stderr) - ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/usr/lib/python3.13/subprocess.py", line 1269, in _check_timeout - raise TimeoutExpired( - ...<2 lines>... - stderr=b''.join(stderr_seq) if stderr_seq else None) -subprocess.TimeoutExpired: Command 'sudo -S -p '' apt update && apt upgrade -y' timed out after 30.0 seconds -ERROR: Exception in ASGI application -Traceback (most recent call last): - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/uvicorn/protocols/http/h11_impl.py", line 415, in run_asgi - result = await app( # type: ignore[func-returns-value] - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - self.scope, self.receive, self.send - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - ) - ^ - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__ - return await self.app(scope, receive, send) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/applications.py", line 1159, in __call__ - await super().__call__(scope, receive, send) - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/applications.py", line 90, in __call__ - await self.middleware_stack(scope, receive, send) - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/errors.py", line 186, in __call__ - raise exc - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/errors.py", line 164, in __call__ - await self.app(scope, receive, _send) - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/middleware/exceptions.py", line 63, in __call__ - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app - raise exc - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app - await app(scope, receive, sender) - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/middleware/asyncexitstack.py", line 18, in __call__ - await self.app(scope, receive, send) - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 660, in __call__ - await self.middleware_stack(scope, receive, send) - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 680, in app - await route.handle(scope, receive, send) - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/routing.py", line 276, in handle - await self.app(scope, receive, send) - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 134, in app - await wrap_app_handling_exceptions(app, request)(scope, receive, send) - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app - raise exc - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app - await app(scope, receive, sender) - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 120, in app - response = await f(request) - ^^^^^^^^^^^^^^^^ - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 674, in app - raw_response = await run_endpoint_function( - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - ...<3 lines>... - ) - ^ - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/fastapi/routing.py", line 330, in run_endpoint_function - return await run_in_threadpool(dependant.call, **values) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/starlette/concurrency.py", line 32, in run_in_threadpool - return await anyio.to_thread.run_sync(func) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/to_thread.py", line 63, in run_sync - return await get_async_backend().run_sync_in_worker_thread( - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - func, args, abandon_on_cancel=abandon_on_cancel, limiter=limiter - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - ) - ^ - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 2518, in run_sync_in_worker_thread - return await future - ^^^^^^^^^^^^ - File "/home/mirivlad/git/ducklm/.venv/lib/python3.13/site-packages/anyio/_backends/_asyncio.py", line 1002, in run - result = context.run(func, *args) - File "/home/mirivlad/git/ducklm/app/api/server.py", line 103, in resolve_secret - return runtime.resolve_secret(task_id=request.task_id, secret=request.secret) - ~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/mirivlad/git/ducklm/app/runtime/runtime_controller.py", line 408, in resolve_secret - return self.runtime_loop.resolve_secret( - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^ - task_id=task_id, secret=secret - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - ) - ^ - File "/home/mirivlad/git/ducklm/app/runtime/runtime_loop.py", line 378, in resolve_secret - execution_result = self._execution_engine.execute( - task=task, - ...<2 lines>... - secret_override=secret, - ) - File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 87, in execute - return self._execute_plan( - ~~~~~~~~~~~~~~~~~~^ - task=task, - ^^^^^^^^^^ - ...<3 lines>... - password_override=password_override, - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - ) - ^ - File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 211, in _execute_plan - result = self._execute_tool( - task=task, - ...<3 lines>... - password_override=password_override, - ) - File "/home/mirivlad/git/ducklm/app/core/execution_engine.py", line 824, in _execute_tool - tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args) - File "/home/mirivlad/git/ducklm/app/tools/plugins/shell_exec/__init__.py", line 21, in execute - completed = self._sandbox.run_shell( - command=command, - cwd=str(cwd) if cwd else None, - stdin_data=str(stdin_secret) if stdin_secret is not None else None, - ) - File "/home/mirivlad/git/ducklm/app/tools/sandbox.py", line 29, in run_shell - return subprocess.run( - ~~~~~~~~~~~~~~^ - command, - ^^^^^^^^ - ...<7 lines>... - check=False, - ^^^^^^^^^^^^ - ) - ^ - File "/usr/lib/python3.13/subprocess.py", line 556, in run - stdout, stderr = process.communicate(input, timeout=timeout) - ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^ - File "/usr/lib/python3.13/subprocess.py", line 1222, in communicate - stdout, stderr = self._communicate(input, endtime, timeout) - ~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/usr/lib/python3.13/subprocess.py", line 2129, in _communicate - self._check_timeout(endtime, orig_timeout, stdout, stderr) - ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/usr/lib/python3.13/subprocess.py", line 1269, in _check_timeout - raise TimeoutExpired( - ...<2 lines>... - stderr=b''.join(stderr_seq) if stderr_seq else None) -subprocess.TimeoutExpired: Command 'sudo -S -p '' apt update && apt upgrade -y' timed out after 30.0 seconds diff --git a/server.out b/server.out deleted file mode 100644 index 10943e5..0000000 --- a/server.out +++ /dev/null @@ -1,254 +0,0 @@ -Models policy ready -Registered tool: file_write -Registered tool: shell_exec -Registered tool: memory -Registered tool: file_read -Lifespan: Starting model loading... -Lifespan: Loading models... -Loading thinker model... -Thinker loaded: (model: Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf) -Loading json_compiler model... -JSON Compiler loaded: (model: gemma-4-E4B-it-Q4_K_M.gguf) -Loading coder model... -Coder loaded: (model: X-Coder-SFT-Qwen3-8B.Q6_K.gguf) -Loading critic model... -Reusing model instance: gemma-4-E4B-it-Q4_K_M.gguf for critic -Critic loaded: (model: gemma-4-E4B-it-Q4_K_M.gguf) -Loading sys_util model... -Sys_util loaded: (model: Menlo_Lucy-Q4_K_M.gguf) -All models loaded successfully -MemoryRecallService initialized with model: json_compiler -MemoryWritePolicy set: True -Lifespan: Models loaded -Lifespan: Rebuilding vector index (289 entries)... -Lifespan: Vector index rebuilt -INFO: 127.0.0.1:47236 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:47238 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:47240 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:45740 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:45754 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:41296 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:41304 - "GET / HTTP/1.1" 200 OK -INFO: 127.0.0.1:41304 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:41304 - "GET /favicon.ico HTTP/1.1" 404 Not Found -INFO: 127.0.0.1:41318 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:41310 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:40504 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:45288 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:45302 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:47488 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:47498 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:48888 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:48898 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:44008 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:44024 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:44008 - "POST /chat HTTP/1.1" 200 OK -INFO: 127.0.0.1:50236 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:50246 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:57020 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:57032 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:36982 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:36996 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:35350 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:35358 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:38442 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:38456 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:38442 - "POST /permissions/resolve HTTP/1.1" 200 OK -INFO: 127.0.0.1:35664 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:35666 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:41680 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:41682 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:55484 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:55486 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:53136 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:53142 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:50412 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:50412 - "POST /secrets/resolve HTTP/1.1" 200 OK -INFO: 127.0.0.1:50416 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:50384 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:50396 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:35882 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:35890 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:34008 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:34012 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:38358 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:38366 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:39500 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:39516 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:52800 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:52812 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:60246 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:60256 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:55192 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:55208 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:55192 - "POST /secrets/resolve HTTP/1.1" 200 OK -INFO: 127.0.0.1:50170 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:50184 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:60392 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:60404 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:42626 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:42630 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:37478 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:37480 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:59892 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:59902 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:50284 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:50290 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:59488 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:59492 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:53584 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:53590 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:50978 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:50990 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:43110 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:43118 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:39906 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:39908 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:39100 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:39110 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:43436 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:43448 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:60214 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:60228 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:56192 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:45580 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:59680 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:52038 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:34120 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:54374 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:41916 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:48474 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:58570 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:58284 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:47014 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:37884 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:56196 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:60026 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:48534 - "POST /secrets/resolve HTTP/1.1" 200 OK -INFO: 127.0.0.1:48536 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:46114 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:49446 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:33518 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:40316 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:47326 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:36022 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:36806 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:54232 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:54248 - "GET / HTTP/1.1" 200 OK -INFO: 127.0.0.1:54248 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:54248 - "GET /favicon.ico HTTP/1.1" 404 Not Found -INFO: 127.0.0.1:38470 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:54264 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:50474 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:50490 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:44644 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:44652 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:41856 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:57392 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:45778 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:59094 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:39508 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:51214 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:54724 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:41204 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:33686 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:38154 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:44658 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:56664 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:33906 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:36934 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:48746 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:50876 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:38912 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:40786 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:51882 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:40002 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:43176 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:49824 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:44316 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:58994 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:47794 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:37642 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:32882 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:53578 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:35804 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:47732 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:34050 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:55386 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:43992 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:43998 - "GET / HTTP/1.1" 200 OK -INFO: 127.0.0.1:43998 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:43998 - "GET /favicon.ico HTTP/1.1" 404 Not Found -INFO: 127.0.0.1:39194 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:33540 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:53022 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:41056 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:44000 - "POST /chat HTTP/1.1" 200 OK -INFO: 127.0.0.1:44000 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:44000 - "POST /permissions/resolve HTTP/1.1" 200 OK -INFO: 127.0.0.1:57534 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:60834 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:59886 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:42774 - "POST /secrets/resolve HTTP/1.1" 500 Internal Server Error -INFO: 127.0.0.1:50140 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:52360 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:57882 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:44816 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:37956 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:37956 - "GET / HTTP/1.1" 200 OK -INFO: 127.0.0.1:37956 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:37956 - "GET /favicon.ico HTTP/1.1" 404 Not Found -INFO: 127.0.0.1:50254 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:46082 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:56836 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:35716 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:37656 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:45248 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:50242 - "POST /chat HTTP/1.1" 200 OK -INFO: 127.0.0.1:44868 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:44882 - "POST /permissions/resolve HTTP/1.1" 200 OK -INFO: 127.0.0.1:44882 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:48796 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:60814 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:53286 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:44882 - "POST /secrets/resolve HTTP/1.1" 500 Internal Server Error -INFO: 127.0.0.1:53816 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:39450 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:53198 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:58340 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:58686 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:47278 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:46400 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:58580 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:35014 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:43342 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:34798 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:41652 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:36938 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:58066 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:45948 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:45656 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:33986 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:52016 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:55700 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:48468 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:33002 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:43004 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:43014 - "POST /secrets/resolve HTTP/1.1" 200 OK -INFO: 127.0.0.1:43014 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:36870 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:45970 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:60292 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:53738 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:49414 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:56572 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:51224 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:53742 - "POST /secrets/resolve HTTP/1.1" 200 OK -INFO: 127.0.0.1:42496 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:54868 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:57530 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:60898 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:54112 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:44548 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:37414 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:45064 - "GET /health HTTP/1.1" 200 OK diff --git a/server.pid b/server.pid deleted file mode 100644 index 15c7f9e..0000000 --- a/server.pid +++ /dev/null @@ -1 +0,0 @@ -844579 diff --git a/skills/analyze_project/examples.md b/skills/analyze_project/examples.md new file mode 100644 index 0000000..f422d19 --- /dev/null +++ b/skills/analyze_project/examples.md @@ -0,0 +1,5 @@ +# Examples + +User: "Посмотри структуру проекта." + +Output: concise architecture summary, risks, and next actions. diff --git a/skills/analyze_project/notes.md b/skills/analyze_project/notes.md new file mode 100644 index 0000000..c04be65 --- /dev/null +++ b/skills/analyze_project/notes.md @@ -0,0 +1,3 @@ +# Notes + +Use read-only commands unless the user explicitly asks for changes. diff --git a/skills/analyze_project/procedure.md b/skills/analyze_project/procedure.md new file mode 100644 index 0000000..ee24768 --- /dev/null +++ b/skills/analyze_project/procedure.md @@ -0,0 +1,6 @@ +# Procedure + +1. List top-level files and directories. +2. Read manifest and documentation files. +3. Identify major modules and runtime entry points. +4. Summarize risks and next steps using only observed files. diff --git a/skills/analyze_project/skill.yaml b/skills/analyze_project/skill.yaml new file mode 100644 index 0000000..9d7bc67 --- /dev/null +++ b/skills/analyze_project/skill.yaml @@ -0,0 +1,23 @@ +id: analyze_project +title: Analyze project structure +description: Inspect repository structure and summarize architecture. +version: 1 +tags: + - code + - repository + - analysis +required_tools: + - file_read + - shell_exec_safe +risk_level: low +inputs: + - workspace_path +outputs: + - architecture_summary + - risks + - suggested_next_steps +success_criteria: + - repository structure inspected + - major modules identified + - no destructive commands executed + - summary is grounded in actual files diff --git a/test_ducklm.py b/test_ducklm.py deleted file mode 100755 index a4b4e91..0000000 --- a/test_ducklm.py +++ /dev/null @@ -1,314 +0,0 @@ -#!/usr/bin/env python3 -""" -Тестовый скрипт для проверки работы ducklm. -Позволяет ИИ-кодеру тестировать систему через отправку запросов и проверку выполнения. -""" - -import json -import time -import requests -import sys -from typing import Dict, Any, Optional - - -class DuckLMTester: - def __init__(self, base_url: str = "http://127.0.0.1:8000"): - self.base_url = base_url - self.session = requests.Session() - self.test_results = [] - - def log_test(self, test_name: str, passed: bool, details: str = ""): - """Записать результат теста""" - result = { - "test": test_name, - "passed": passed, - "details": details, - "timestamp": time.time() - } - self.test_results.append(result) - status = "✓ PASS" if passed else "✗ FAIL" - print(f"{status}: {test_name}") - if details: - print(f" Details: {details}") - - def test_health(self) -> bool: - """Проверить эндпоинт здоровья""" - try: - response = self.session.get(f"{self.base_url}/health", timeout=5) - if response.status_code == 200: - data = response.json() - if data.get("status") == "ok": - self.log_test("Health Check", True, "Server is healthy") - return True - else: - self.log_test("Health Check", False, f"Unexpected response: {data}") - return False - else: - self.log_test("Health Check", False, f"HTTP {response.status_code}") - return False - except Exception as e: - self.log_test("Health Check", False, f"Connection error: {str(e)}") - return False - - def test_simple_chat(self) -> bool: - """Простой тест чата""" - try: - payload = {"input": "Привет, как дела?"} - response = self.session.post( - f"{self.base_url}/chat", - json=payload, - timeout=30 - ) - - if response.status_code == 200: - data = response.json() - status = data.get("status") - if status in ["completed", "awaiting_permission", "awaiting_input"]: - self.log_test( - "Simple Chat", - True, - f"Status: {status}, Response received" - ) - return True - else: - self.log_test( - "Simple Chat", - False, - f"Unexpected status: {status}" - ) - return False - else: - self.log_test( - "Simple Chat", - False, - f"HTTP {response.status_code}: {response.text}" - ) - return False - except Exception as e: - self.log_test("Simple Chat", False, f"Request error: {str(e)}") - return False - - def test_tool_execution(self) -> bool: - """Тест выполнения инструмента""" - try: - # Тест простой команды shell - payload = { - "input": "Выполни простую команду", - "context": { - "requested_tool": "shell_exec", - "tool_args": {"command": "echo 'test'"} - } - } - response = self.session.post( - f"{self.base_url}/chat", - json=payload, - timeout=30 - ) - - if response.status_code == 200: - data = response.json() - status = data.get("status") - if status == "completed": - output = data.get("result", {}).get("output", "") - if "test" in output: - self.log_test( - "Tool Execution", - True, - f"Command executed successfully: {output.strip()}" - ) - return True - else: - self.log_test( - "Tool Execution", - False, - f"Unexpected output: {output}" - ) - return False - elif status == "awaiting_permission": - self.log_test( - "Tool Execution", - True, - "Permission required (expected for some commands)" - ) - return True - else: - self.log_test( - "Tool Execution", - False, - f"Unexpected status: {status}" - ) - return False - else: - self.log_test( - "Tool Execution", - False, - f"HTTP {response.status_code}: {response.text}" - ) - return False - except Exception as e: - self.log_test("Tool Execution", False, f"Request error: {str(e)}") - return False - - def test_permission_flow(self) -> bool: - """Тест потока разрешений""" - try: - # Сначала отправляем задачу, требующую разрешения - payload = { - "input": "Запусти команду, требующую разрешения", - "context": { - "requested_tool": "shell_exec", - "tool_args": {"command": "whoami"} - } - } - response = self.session.post( - f"{self.base_url}/chat", - json=payload, - timeout=30 - ) - - if response.status_code != 200: - self.log_test( - "Permission Flow", - False, - f"Initial request failed: HTTP {response.status_code}" - ) - return False - - data = response.json() - if data.get("status") == "awaiting_permission": - task_id = data.get("task_id") - if not task_id: - self.log_test( - "Permission Flow", - False, - "No task_id in response" - ) - return False - - # Теперь разрешаем разрешение - resolve_payload = { - "task_id": task_id, - "decision": "allow_once" - } - resolve_response = self.session.post( - f"{self.base_url}/permissions/resolve", - json=resolve_payload, - timeout=10 - ) - - if resolve_response.status_code == 200: - resolve_data = resolve_response.json() - final_status = resolve_data.get("status") - if final_status in ["completed", "failed"]: - self.log_test( - "Permission Flow", - True, - f"Permission resolved, final status: {final_status}" - ) - return True - else: - self.log_test( - "Permission Flow", - False, - f"Unexpected final status: {final_status}" - ) - return False - else: - self.log_test( - "Permission Flow", - False, - f"Permission resolution failed: HTTP {resolve_response.status_code}" - ) - return False - else: - # Если разрешение не потребовалось, это тоже нормально для некоторых систем - self.log_test( - "Permission Flow", - True, - f"No permission required, status: {data.get('status')}" - ) - return True - - except Exception as e: - self.log_test("Permission Flow", False, f"Request error: {str(e)}") - return False - - def run_all_tests(self) -> Dict[str, Any]: - """Запустить все тесты""" - print("Starting ducklm tests...") - print("=" * 50) - - # Ждем немного, чтобы сервер успел запуститься - time.sleep(2) - - tests = [ - self.test_health, - self.test_simple_chat, - self.test_tool_execution, - self.test_permission_flow, - ] - - passed = 0 - total = len(tests) - - for test in tests: - if test(): - passed += 1 - time.sleep(1) # Небольшая пауза между тестами для слабого железа - - print("=" * 50) - print(f"Tests completed: {passed}/{total} passed") - - # Сводка результатов - summary = { - "total_tests": total, - "passed_tests": passed, - "failed_tests": total - passed, - "success_rate": passed / total if total > 0 else 0, - "test_results": self.test_results - } - - return summary - - -def main(): - """Основная функция""" - import argparse - - parser = argparse.ArgumentParser(description="Тест ducklm системы") - parser.add_argument("--url", default="http://127.0.0.1:8000", help="Base URL for ducklm server") - parser.add_argument("--test", choices=["health", "chat", "tool", "permission", "all"], - default="all", help="Specific test to run") - - args = parser.parse_args() - - tester = DuckLMTester(args.url) - - if args.test == "all": - results = tester.run_all_tests() - print("\nFINAL RESULTS:") - print(f"Passed: {results['passed_tests']}/{results['total_tests']}") - print(f"Success Rate: {results['success_rate']*100:.1f}%") - - # Возвращаем код выхода basado на результатах - sys.exit(0 if results['failed_tests'] == 0 else 1) - else: - # Запуск конкретного теста - test_map = { - "health": tester.test_health, - "chat": tester.test_simple_chat, - "tool": tester.test_tool_execution, - "permission": tester.test_permission_flow, - } - - test_func = test_map[args.test] - if test_func(): - print(f"Test {args.test}: PASSED") - sys.exit(0) - else: - print(f"Test {args.test}: FAILED") - sys.exit(1) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/test_ducklm_direct.py b/test_ducklm_direct.py deleted file mode 100644 index e633bac..0000000 --- a/test_ducklm_direct.py +++ /dev/null @@ -1,409 +0,0 @@ -#!/usr/bin/env python3 -""" -Прямой тест ducklm через RuntimeController (без HTTP сервера). -Позволяет ИИ-кодеру тестировать систему через отправку запросов и проверку выполнения. -""" - -import json -import time -import sys -from pathlib import Path -from typing import Dict, Any - -# Добавляем текущую директорию в путь для импорта app -sys.path.insert(0, '.') - -from app.runtime.runtime_controller import RuntimeController -from app.core.contracts import UserTask - - -class DuckLMDirectTester: - def __init__(self, base_dir: str = "."): - self.base_dir = Path(base_dir) - self.test_results = [] - self.controller = None - - def setup(self): - """Инициализировать контроллер""" - try: - print("Инициализация RuntimeController...") - self.controller = RuntimeController(base_dir=self.base_dir) - print("RuntimeController инициализирован успешно") - return True - except Exception as e: - print(f"Ошибка инициализации RuntimeController: {e}") - return False - - def log_test(self, test_name: str, passed: bool, details: str = ""): - """Записать результат теста""" - result = { - "test": test_name, - "passed": passed, - "details": details, - "timestamp": time.time() - } - self.test_results.append(result) - status = "✓ PASS" if passed else "✗ FAIL" - print(f"{status}: {test_name}") - if details: - print(f" Details: {details}") - - def test_health(self) -> bool: - """Проверить что контроллер работает""" - try: - if self.controller is None: - self.log_test("Health Check", False, "Controller not initialized") - return False - - # Проверяем что основные компоненты присутствуют - components = [ - ("event_bus", self.controller.event_bus), - ("permission_service", self.controller.permission_service), - ("task_state_store", self.controller.task_state_store), - ("checkpoint_store", self.controller.checkpoint_store), - ("context_builder", self.controller.context_builder), - ("router", self.controller.router), - ("execution_engine", self.controller.execution_engine), - ] - - missing = [] - for name, component in components: - if component is None: - missing.append(name) - - if missing: - self.log_test("Health Check", False, f"Missing components: {missing}") - return False - else: - self.log_test("Health Check", True, "Все компоненты инициализированы") - return True - - except Exception as e: - self.log_test("Health Check", False, f"Error: {str(e)}") - return False - - def test_simple_task(self) -> bool: - """Простой тест задачи""" - try: - if self.controller is None: - self.log_test("Simple Task", False, "Controller not initialized") - return False - - # Создаем простую задачу - task = UserTask(input="Привет, как дела?") - - # Выполняем задачу через контроллер - result = self.controller.handle_task(task) - - status = result.get("status") - if status in ["completed", "awaiting_permission", "awaiting_input"]: - self.log_test( - "Simple Task", - True, - f"Status: {status}, Task ID: {result.get('task_id')}" - ) - return True - else: - self.log_test( - "Simple Task", - False, - f"Unexpected status: {status}" - ) - return False - - except Exception as e: - self.log_test("Simple Task", False, f"Request error: {str(e)}") - return False - - def test_tool_task(self) -> bool: - """Тест задачи с инструментом""" - try: - if self.controller is None: - self.log_test("Tool Task", False, "Controller not initialized") - return False - - # Тест простой команды shell через контекст - task = UserTask( - input="Выполни простую команду", - context={ - "requested_tool": "shell_exec", - "tool_args": {"command": "echo 'hello from test'"} - } - ) - - result = self.controller.handle_task(task) - - status = result.get("status") - if status == "completed": - output = result.get("result", {}).get("output", "") - if "hello from test" in output: - self.log_test( - "Tool Task", - True, - f"Command executed successfully: {output.strip()}" - ) - return True - else: - self.log_test( - "Tool Task", - False, - f"Unexpected output: {output}" - ) - return False - elif status == "awaiting_permission": - self.log_test( - "Tool Task", - True, - "Permission required (expected for some commands)" - ) - return True - else: - self.log_test( - "Tool Task", - False, - f"Unexpected status: {status}" - ) - return False - - except Exception as e: - self.log_test("Tool Task", False, f"Request error: {str(e)}") - return False - - def test_memory_tools(self) -> bool: - """Тест инструментов памяти""" - try: - if self.controller is None: - self.log_test("Memory Tools", False, "Controller not initialized") - return False - - # Тест вставки в память - task_insert = UserTask( - input="Запомни эту информацию: тестовое значение 123", - context={ - "requested_tool": "memory", - "tool_args": { - "operation": "insert", - "text": "тестовое значение 123", - "kind": "fact", - "weight": 0.8 - } - } - ) - - result_insert = self.controller.handle_task(task_insert) - - if result_insert.get("status") != "completed": - self.log_test( - "Memory Tools Insert", - False, - f"Insert failed: {result_insert.get('status')}" - ) - return False - - # Тест поиска в памяти - task_search = UserTask( - input="Найди запомненную информацию", - context={ - "requested_tool": "memory", - "tool_args": { - "operation": "search", - "query": "тестовое значение", - "limit": 5 - } - } - ) - - result_search = self.controller.handle_task(task_search) - - if result_search.get("status") == "completed": - output = result_search.get("result", {}).get("output", "") - self.log_test( - "Memory Tools", - True, - f"Memory search successful: {output[:100]}..." - ) - return True - else: - self.log_test( - "Memory Tools Search", - False, - f"Search failed: {result_search.get('status')}" - ) - return False - - except Exception as e: - self.log_test("Memory Tools", False, f"Request error: {str(e)}") - return False - - def test_file_operations(self) -> bool: - """Тест операций с файлами""" - try: - if self.controller is None: - self.log_test("File Operations", False, "Controller not initialized") - return False - - import tempfile - import os - - # Создаем временный файл для теста - with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f: - temp_path = f.name - f.write("initial content for testing") - - try: - # Тест чтения файла - task_read = UserTask( - input="Прочитай файл", - context={ - "requested_tool": "file_read", - "tool_args": {"path": temp_path} - } - ) - - result_read = self.controller.handle_task(task_read) - - if result_read.get("status") != "completed": - self.log_test( - "File Read", - False, - f"Read failed: {result_read.get('status')}" - ) - return False - - # Тест записи файла - new_content = "updated content from test" - task_write = UserTask( - input="Запиши в файл", - context={ - "requested_tool": "file_write", - "tool_args": { - "path": temp_path, - "content": new_content - } - } - ) - - result_write = self.controller.handle_task(task_write) - - if result_write.get("status") != "completed": - self.log_test( - "File Write", - False, - f"Write failed: {result_write.get('status')}" - ) - return False - - # Проверяем что файл действительно обновился - with open(temp_path, 'r') as f: - actual_content = f.read() - - if actual_content == new_content: - self.log_test( - "File Operations", - True, - f"File read/write successful: {actual_content}" - ) - return True - else: - self.log_test( - "File Operations", - False, - f"File content mismatch. Expected: {new_content}, Got: {actual_content}" - ) - return False - - finally: - # Очищаем временный файл - if os.path.exists(temp_path): - os.unlink(temp_path) - - except Exception as e: - self.log_test("File Operations", False, f"Request error: {str(e)}") - return False - - def run_all_tests(self) -> Dict[str, Any]: - """Запустить все тесты""" - print("Starting direct ducklm tests...") - print("=" * 50) - - if not self.setup(): - print("Failed to setup controller") - return {"error": "Setup failed"} - - tests = [ - self.test_health, - self.test_simple_task, - self.test_tool_task, - self.test_memory_tools, - self.test_file_operations, - ] - - passed = 0 - total = len(tests) - - for test in tests: - if test(): - passed += 1 - time.sleep(0.5) # Небольшая пауза между тестами - - print("=" * 50) - print(f"Tests completed: {passed}/{total} passed") - - # Сводка результатов - summary = { - "total_tests": total, - "passed_tests": passed, - "failed_tests": total - passed, - "success_rate": passed / total if total > 0 else 0, - "test_results": self.test_results - } - - return summary - - -def main(): - """Основная функция""" - import argparse - - parser = argparse.ArgumentParser(description="Тест ducklm системы (прямой доступ)") - parser.add_argument("--basedir", default=".", help="Base directory for ducklm") - parser.add_argument("--test", choices=["health", "simple", "tool", "memory", "file", "all"], - default="all", help="Specific test to run") - - args = parser.parse_args() - - tester = DuckLMDirectTester(args.basedir) - - if args.test == "all": - results = tester.run_all_tests() - print("\nFINAL RESULTS:") - print(f"Passed: {results['passed_tests']}/{results['total_tests']}") - print(f"Success Rate: {results['success_rate']*100:.1f}%") - - # Возвращаем код выхода basado на результатах - sys.exit(0 if results['failed_tests'] == 0 else 1) - else: - # Запуск конкретного теста - if not tester.setup(): - print("Failed to setup controller") - sys.exit(1) - - test_map = { - "health": tester.test_health, - "simple": tester.test_simple_task, - "tool": tester.test_tool_task, - "memory": tester.test_memory_tools, - "file": tester.test_file_operations, - } - - test_func = test_map[args.test] - if test_func(): - print(f"Test {args.test}: PASSED") - sys.exit(0) - else: - print(f"Test {args.test}: FAILED") - sys.exit(1) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/tests/smoke/test_action_directive_schema.py b/tests/smoke/test_action_directive_schema.py new file mode 100644 index 0000000..b3f9649 --- /dev/null +++ b/tests/smoke/test_action_directive_schema.py @@ -0,0 +1,16 @@ +import json +from pathlib import Path + +from jsonschema import validate + + +def test_action_directive_schema_accepts_minimal_directive(): + schema = json.loads(Path("duck_core/schemas/action_directive.schema.json").read_text()) + directive = { + "kind": "action_directive", + "intent": "No action needed", + "risk_level": "none", + "actions": [], + } + + validate(directive, schema) diff --git a/tests/smoke/test_api_health.py b/tests/smoke/test_api_health.py new file mode 100644 index 0000000..8558ef2 --- /dev/null +++ b/tests/smoke/test_api_health.py @@ -0,0 +1,25 @@ +from fastapi.testclient import TestClient + +from duck_core.api import create_app + + +def test_health_and_status_endpoints(tmp_path, monkeypatch): + monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3")) + app = create_app() + client = TestClient(app) + + assert client.get("/health").json()["status"] == "ok" + status = client.get("/v1/status").json() + assert status["name"] == "DuckLM" + assert status["api_host"] == "127.0.0.1" + + +def test_webchat_index_renders(tmp_path, monkeypatch): + monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3")) + app = create_app() + client = TestClient(app) + + response = client.get("/") + + assert response.status_code == 200 + assert "DuckLM" in response.text diff --git a/tests/smoke/test_api_stream_chat.py b/tests/smoke/test_api_stream_chat.py new file mode 100644 index 0000000..45d3a11 --- /dev/null +++ b/tests/smoke/test_api_stream_chat.py @@ -0,0 +1,103 @@ +from fastapi.testclient import TestClient +import json + +from duck_core.model_client import ModelResponse + +from duck_core.api import create_app + + +def test_stream_chat_endpoint_emits_sse_reasoning_and_content(tmp_path, monkeypatch): + monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3")) + + async def fake_chat(self, role, messages): + return ModelResponse( + role=role, + model="local-main", + content=json.dumps( + { + "kind": "action_directive", + "intent": "answer directly", + "risk_level": "none", + "actions": [], + } + ), + reasoning_content=None, + raw={}, + latency_ms=1.0, + ) + + async def fake_stream_chat(self, role, messages): + yield {"type": "reasoning_delta", "delta": "thinking"} + yield {"type": "content_delta", "delta": "answer"} + + monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat) + monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat) + app = create_app() + client = TestClient(app) + + with client.stream( + "POST", + "/v1/chat/stream", + json={"message": "hello", "workspace": "./workspace", "debug": True}, + ) as response: + body = "".join(response.iter_text()) + + assert response.status_code == 200 + assert "event: reasoning_delta" in body + assert "event: content_delta" in body + assert "event: done" in body + assert "thinking" in body + assert "answer" in body + + +def test_stream_chat_endpoint_executes_tool_before_streaming_answer(tmp_path, monkeypatch): + monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3")) + (tmp_path / "note.txt").write_text("stream tool content") + + async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None): + assert role == "action" + return ModelResponse( + role=role, + model="local-main", + content=json.dumps( + { + "kind": "action_directive", + "intent": "read requested file", + "risk_level": "low", + "actions": [ + { + "tool": "file_read", + "args": {"path": "note.txt"}, + "reason": "User asked for file contents", + } + ], + } + ), + reasoning_content=None, + raw={}, + latency_ms=1.0, + ) + + async def fake_stream_chat(self, role, messages): + assert role == "thinker" + assert any("tool_observations" in message["content"] for message in messages) + yield {"type": "content_delta", "delta": "answer from tool"} + + monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat) + monkeypatch.setattr("duck_core.model_client.ModelClient.stream_chat", fake_stream_chat) + client = TestClient(create_app()) + + with client.stream( + "POST", + "/v1/chat/stream", + json={"message": "read note.txt", "workspace": str(tmp_path), "debug": True}, + ) as response: + body = "".join(response.iter_text()) + + assert response.status_code == 200 + assert "event: tool_call_started" in body + assert "event: tool_call_finished" in body + assert "stream tool content" in body + assert "event: content_delta" in body + assert "answer from tool" in body + assert "event: done" in body diff --git a/tests/smoke/test_approvals.py b/tests/smoke/test_approvals.py new file mode 100644 index 0000000..19d8e9c --- /dev/null +++ b/tests/smoke/test_approvals.py @@ -0,0 +1,18 @@ +import pytest + +from duck_core.approvals.service import ApprovalService + + +@pytest.mark.asyncio +async def test_approval_service_allow_forever_is_exact_hash(tmp_path): + service = ApprovalService(str(tmp_path / "duck.sqlite3")) + await service.init() + + action = {"tool": "shell_exec_safe", "args": {"command": "pytest tests/smoke -v"}} + approval = await service.create_pending("task_1", action) + await service.allow_forever(approval.approval_id) + + assert await service.is_allowed_forever(action) is True + assert await service.is_allowed_forever( + {"tool": "shell_exec_safe", "args": {"command": "pytest -v"}} + ) is False diff --git a/tests/smoke/test_chat_api.py b/tests/smoke/test_chat_api.py new file mode 100644 index 0000000..edc0b85 --- /dev/null +++ b/tests/smoke/test_chat_api.py @@ -0,0 +1,96 @@ +from dataclasses import dataclass +import json + +from fastapi.testclient import TestClient + +from duck_core.api import create_app +from duck_core.model_client import ModelResponse + + +@dataclass +class FakeResponse: + role: str = "thinker" + model: str = "local-main" + content: str = "Я DuckLM, локальная агентная система." + raw: dict = None + latency_ms: float = 1.0 + prompt_tokens: int | None = 1 + completion_tokens: int | None = 1 + total_tokens: int | None = 2 + + +def test_chat_api_uses_runtime_and_records_events(tmp_path, monkeypatch): + monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3")) + monkeypatch.setenv("DUCK_SKIP_LIVE_LLM_TESTS", "1") + + async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None): + return ModelResponse( + role="thinker", + model="local-main", + content="Я DuckLM, локальная агентная система.", + reasoning_content=None, + raw={}, + latency_ms=1.0, + prompt_tokens=1, + completion_tokens=1, + total_tokens=2, + ) + + monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat) + app = create_app() + client = TestClient(app) + + response = client.post("/v1/chat", json={"message": "Кто ты?", "debug": True}) + payload = response.json() + events = client.get(f"/v1/tasks/{payload['task_id']}/events").json() + + assert payload["status"] == "completed" + assert "DuckLM" in payload["final_response"] + assert [event["event_type"] for event in events] == [ + "task_created", + "model_call_started", + "action_directive_failed", + "model_call_started", + "cognition_response", + "model_call_finished", + "task_completed", + ] + + +def test_chat_api_exposes_pending_approval_from_runtime_tool_gate(tmp_path, monkeypatch): + monkeypatch.setenv("DUCK_DB_PATH", str(tmp_path / "duck.sqlite3")) + + async def fake_chat(self, role, messages, temperature=None, max_output_tokens=None, response_format=None): + if role == "action": + return ModelResponse( + role=role, + model="local-main", + content=json.dumps( + { + "kind": "action_directive", + "intent": "run command", + "risk_level": "medium", + "actions": [ + { + "tool": "shell_exec_safe", + "args": {"command": "uname -a"}, + "reason": "needs shell command", + } + ], + } + ), + reasoning_content=None, + raw={}, + latency_ms=1.0, + ) + raise AssertionError("thinker should not run while approval is pending") + + monkeypatch.setattr("duck_core.model_client.ModelClient.chat", fake_chat) + client = TestClient(create_app()) + + response = client.post("/v1/chat", json={"message": "run uname", "debug": True}) + approvals = client.get("/v1/approvals/pending").json() + + assert response.status_code == 200 + assert response.json()["status"] == "waiting_for_approval" + assert approvals[0]["normalized_action"]["tool"] == "shell_exec_safe" diff --git a/tests/smoke/test_event_log.py b/tests/smoke/test_event_log.py new file mode 100644 index 0000000..8e4c073 --- /dev/null +++ b/tests/smoke/test_event_log.py @@ -0,0 +1,25 @@ +import pytest + +from duck_core.events.store import EventStore +from duck_core.tasks.store import TaskStore + + +@pytest.mark.asyncio +async def test_task_and_event_store_round_trip(tmp_path): + db_path = tmp_path / "duck.sqlite3" + tasks = TaskStore(str(db_path)) + events = EventStore(str(db_path)) + await tasks.init() + await events.init() + + task = await tasks.create_task("hello", "./workspace", True) + await events.append(task.task_id, "task_created", {"message": "hello"}) + await tasks.complete_task(task.task_id, "done") + + loaded = await tasks.get_task(task.task_id) + timeline = await events.list_events(task.task_id) + + assert loaded is not None + assert loaded.status == "completed" + assert loaded.final_response == "done" + assert [event.event_type for event in timeline] == ["task_created"] diff --git a/tests/smoke/test_experience_recorder.py b/tests/smoke/test_experience_recorder.py new file mode 100644 index 0000000..bbf9ae2 --- /dev/null +++ b/tests/smoke/test_experience_recorder.py @@ -0,0 +1,24 @@ +import pytest + +from duck_core.experience.recorder import ExperienceRecorder + + +@pytest.mark.asyncio +async def test_experience_recorder_round_trip(tmp_path): + recorder = ExperienceRecorder(str(tmp_path / "duck.sqlite3")) + await recorder.init() + + created = await recorder.record( + task_id="task_1", + skill_id="analyze_project", + summary="Checked project", + result="success", + what_worked=["events"], + what_failed=[], + reusable_lesson="Keep context grounded in files.", + confidence=0.8, + ) + loaded = await recorder.list_records() + + assert created.id is not None + assert loaded[0].summary == "Checked project" diff --git a/tests/smoke/test_llama_server_connection.py b/tests/smoke/test_llama_server_connection.py new file mode 100644 index 0000000..6fc9a49 --- /dev/null +++ b/tests/smoke/test_llama_server_connection.py @@ -0,0 +1,13 @@ +import os + +import pytest + +from duck_core.model_client import ModelClient + + +@pytest.mark.asyncio +async def test_llama_server_connection_live_skip_by_env(monkeypatch): + if os.getenv("DUCK_SKIP_LIVE_LLM_TESTS", "1") == "1": + pytest.skip("Live LLM tests skipped") + result = await ModelClient().ping() + assert any(item["ok"] for item in result.values()) diff --git a/tests/smoke/test_llama_service_script.py b/tests/smoke/test_llama_service_script.py new file mode 100644 index 0000000..b0472c8 --- /dev/null +++ b/tests/smoke/test_llama_service_script.py @@ -0,0 +1,57 @@ +import os +import subprocess +import textwrap +import time +from pathlib import Path + + +def test_start_main_script_manages_pid_status_stop_and_logs(tmp_path): + fake_bin = tmp_path / "llama-server" + fake_bin.write_text( + textwrap.dedent( + """\ + #!/usr/bin/env bash + echo "fake llama-server $*" >&2 + trap 'exit 0' TERM INT + while true; do sleep 1; done + """ + ) + ) + fake_bin.chmod(0o755) + pid_file = tmp_path / "llama.pid" + log_file = tmp_path / "llama.log" + + env = { + **os.environ, + "DUCK_LLAMA_SERVER_BIN": str(fake_bin), + "DUCK_MAIN_MODEL_PATH": str(tmp_path / "model.gguf"), + "DUCK_LLAMA_PID_FILE": str(pid_file), + "DUCK_LLAMA_LOG_FILE": str(log_file), + "DUCK_MAIN_PORT": "18081", + } + Path(env["DUCK_MAIN_MODEL_PATH"]).write_text("fake") + script = "scripts/llama/start_main.sh" + + stopped = subprocess.run([script, "status"], env=env, text=True, capture_output=True) + assert stopped.returncode == 3 + assert "not running" in stopped.stdout + + started = subprocess.run([script, "start"], env=env, text=True, capture_output=True) + assert started.returncode == 0 + assert pid_file.exists() + + try: + running = subprocess.run([script, "status"], env=env, text=True, capture_output=True) + assert running.returncode == 0 + assert "running" in running.stdout + + time.sleep(0.2) + logs = subprocess.run( + [script, "logs", "--lines", "20"], env=env, text=True, capture_output=True + ) + assert logs.returncode == 0 + assert "--alias local-main" in logs.stdout + finally: + stopped = subprocess.run([script, "stop"], env=env, text=True, capture_output=True) + assert stopped.returncode == 0 + assert not pid_file.exists() diff --git a/tests/smoke/test_model_client.py b/tests/smoke/test_model_client.py new file mode 100644 index 0000000..5901b29 --- /dev/null +++ b/tests/smoke/test_model_client.py @@ -0,0 +1,92 @@ +import pytest +import httpx + +from duck_core.model_client import ModelClient + + +def test_model_client_loads_role_settings(): + client = ModelClient("config/models.yaml") + + thinker = client.get_role_config("thinker") + action = client.get_role_config("action") + + assert thinker.model == "local-main" + assert thinker.temperature == 0.4 + assert action.structured_output is True + assert action.response_schema == "duck_core/schemas/action_directive.schema.json" + + +@pytest.mark.asyncio +async def test_model_client_missing_role_is_clear_error(): + client = ModelClient("config/models.yaml") + + with pytest.raises(KeyError, match="Unknown model role"): + await client.chat("missing", [{"role": "user", "content": "hello"}]) + + +@pytest.mark.asyncio +async def test_model_client_preserves_reasoning_content(monkeypatch): + async def fake_post(self, url, json): + return httpx.Response( + 200, + json={ + "choices": [ + { + "message": { + "role": "assistant", + "content": "final answer", + "reasoning_content": "private reasoning", + } + } + ], + "usage": { + "prompt_tokens": 3, + "completion_tokens": 2, + "total_tokens": 5, + }, + }, + request=httpx.Request("POST", url), + ) + + monkeypatch.setattr(httpx.AsyncClient, "post", fake_post) + client = ModelClient("config/models.yaml") + + response = await client.chat("thinker", [{"role": "user", "content": "hello"}]) + + assert response.content == "final answer" + assert response.reasoning_content == "private reasoning" + + +@pytest.mark.asyncio +async def test_model_client_stream_chat_yields_reasoning_then_content(monkeypatch): + class FakeStreamResponse: + def raise_for_status(self): + return None + + async def aiter_lines(self): + yield 'data: {"choices":[{"delta":{"reasoning_content":"thinking "}}]}' + yield 'data: {"choices":[{"delta":{"content":"answer"}}]}' + yield "data: [DONE]" + + class FakeStreamContext: + async def __aenter__(self): + return FakeStreamResponse() + + async def __aexit__(self, exc_type, exc, tb): + return False + + def fake_stream(self, method, url, json): + return FakeStreamContext() + + monkeypatch.setattr(httpx.AsyncClient, "stream", fake_stream) + client = ModelClient("config/models.yaml") + + chunks = [ + chunk + async for chunk in client.stream_chat("thinker", [{"role": "user", "content": "hello"}]) + ] + + assert chunks == [ + {"type": "reasoning_delta", "delta": "thinking "}, + {"type": "content_delta", "delta": "answer"}, + ] diff --git a/tests/smoke/test_models_config.py b/tests/smoke/test_models_config.py new file mode 100644 index 0000000..36f3309 --- /dev/null +++ b/tests/smoke/test_models_config.py @@ -0,0 +1,16 @@ +from pathlib import Path + +import yaml + + +def test_models_config_maps_roles_to_same_qwen_non_mtp_model(): + config = yaml.safe_load(Path("config/models.yaml").read_text()) + + assert config["default_provider"] == "llama_server" + roles = config["models"] + for role in ["thinker", "critic", "coder", "action", "summary"]: + assert roles[role]["base_url"] == "http://127.0.0.1:8081/v1" + assert roles[role]["model"] == "local-main" + + assert roles["action"]["structured_output"] is True + assert roles["thinker"]["max_output_tokens"] == 8192 diff --git a/tests/smoke/test_runtime_reasoning.py b/tests/smoke/test_runtime_reasoning.py new file mode 100644 index 0000000..962a6bd --- /dev/null +++ b/tests/smoke/test_runtime_reasoning.py @@ -0,0 +1,37 @@ +import pytest + +from duck_core.events.store import EventStore +from duck_core.model_client import ModelResponse +from duck_core.runtime_loop import RuntimeLoop +from duck_core.tasks.store import TaskStore + + +class FakeModelClient: + async def chat(self, role, messages): + return ModelResponse( + role=role, + model="local-main", + content="visible answer", + reasoning_content="reasoning trace", + raw={}, + latency_ms=12.0, + prompt_tokens=1, + completion_tokens=2, + total_tokens=3, + ) + + +@pytest.mark.asyncio +async def test_runtime_returns_and_logs_reasoning_content(tmp_path): + db_path = str(tmp_path / "duck.sqlite3") + task_store = TaskStore(db_path) + event_store = EventStore(db_path) + loop = RuntimeLoop(task_store, event_store, FakeModelClient()) + + result = await loop.run_chat("hello", "./workspace", debug=True) + events = await event_store.list_events(result.task_id) + cognition = next(event for event in events if event.event_type == "cognition_response") + + assert result.final_response == "visible answer" + assert result.reasoning_content == "reasoning trace" + assert cognition.payload["reasoning_content"] == "reasoning trace" diff --git a/tests/smoke/test_runtime_tools.py b/tests/smoke/test_runtime_tools.py new file mode 100644 index 0000000..9eca0c2 --- /dev/null +++ b/tests/smoke/test_runtime_tools.py @@ -0,0 +1,112 @@ +import json + +import pytest + +from duck_core.events.store import EventStore +from duck_core.model_client import ModelResponse +from duck_core.approvals.service import ApprovalService +from duck_core.runtime_loop import RuntimeLoop +from duck_core.tasks.store import TaskStore + + +class FakeToolModelClient: + async def chat(self, role, messages): + if role == "action": + return ModelResponse( + role=role, + model="local-main", + content=json.dumps( + { + "kind": "action_directive", + "intent": "read requested file", + "risk_level": "low", + "actions": [ + { + "tool": "file_read", + "args": {"path": "note.txt"}, + "reason": "User asked for file contents", + } + ], + } + ), + reasoning_content=None, + raw={}, + latency_ms=5.0, + ) + assert role == "thinker" + assert any("tool_observations" in message["content"] for message in messages) + return ModelResponse( + role=role, + model="local-main", + content="The file says: hello from tool", + reasoning_content="used file_read", + raw={}, + latency_ms=12.0, + ) + + +@pytest.mark.asyncio +async def test_runtime_executes_action_directive_tool_and_finishes_with_observation(tmp_path): + (tmp_path / "note.txt").write_text("hello from tool") + db_path = str(tmp_path / "duck.sqlite3") + task_store = TaskStore(db_path) + event_store = EventStore(db_path) + loop = RuntimeLoop(task_store, event_store, FakeToolModelClient()) + + result = await loop.run_chat("read note.txt", str(tmp_path), debug=True) + events = await event_store.list_events(result.task_id) + event_types = [event.event_type for event in events] + tool_finished = next(event for event in events if event.event_type == "tool_call_finished") + + assert result.status == "completed" + assert result.final_response == "The file says: hello from tool" + assert "action_directive" in event_types + assert "tool_call_started" in event_types + assert tool_finished.payload["tool"] == "file_read" + assert tool_finished.payload["result"]["ok"] is True + assert tool_finished.payload["result"]["output"] == "hello from tool" + + +class FakeApprovalModelClient: + async def chat(self, role, messages): + if role == "action": + return ModelResponse( + role=role, + model="local-main", + content=json.dumps( + { + "kind": "action_directive", + "intent": "run command", + "risk_level": "medium", + "actions": [ + { + "tool": "shell_exec_safe", + "args": {"command": "uname -a"}, + "reason": "User requested system information", + } + ], + } + ), + reasoning_content=None, + raw={}, + latency_ms=5.0, + ) + raise AssertionError("thinker must not be called while approval is pending") + + +@pytest.mark.asyncio +async def test_runtime_creates_pending_approval_when_tool_requires_it(tmp_path): + db_path = str(tmp_path / "duck.sqlite3") + task_store = TaskStore(db_path) + event_store = EventStore(db_path) + approvals = ApprovalService(db_path) + loop = RuntimeLoop(task_store, event_store, FakeApprovalModelClient(), approval_service=approvals) + + result = await loop.run_chat("run uname", str(tmp_path), debug=True) + pending = await approvals.pending() + events = await event_store.list_events(result.task_id) + + assert result.status == "waiting_for_approval" + assert pending[0].task_id == result.task_id + assert pending[0].normalized_action["tool"] == "shell_exec_safe" + assert any(event.event_type == "tool_approval_requested" for event in events) diff --git a/tests/smoke/test_skill_registry.py b/tests/smoke/test_skill_registry.py new file mode 100644 index 0000000..32b3d3c --- /dev/null +++ b/tests/smoke/test_skill_registry.py @@ -0,0 +1,9 @@ +from duck_core.skills.registry import SkillRegistry + + +def test_skill_registry_loads_analyze_project_skill(): + registry = SkillRegistry("skills") + skills = registry.load_skills() + + assert any(skill.id == "analyze_project" for skill in skills) + assert registry.get_skill("analyze_project").risk_level == "low" diff --git a/tests/smoke/test_tool_gateway.py b/tests/smoke/test_tool_gateway.py new file mode 100644 index 0000000..fb3a596 --- /dev/null +++ b/tests/smoke/test_tool_gateway.py @@ -0,0 +1,42 @@ +import pytest + +from duck_core.tools.file_read import FileReadTool +from duck_core.tools.file_write import FileWriteTool +from duck_core.tools.gateway import ToolGateway +from duck_core.tools.shell_exec_safe import ShellExecSafeTool + + +@pytest.mark.asyncio +async def test_file_tools_stay_inside_workspace(tmp_path): + write = FileWriteTool(str(tmp_path)) + read = FileReadTool(str(tmp_path)) + + result = await write.run({"path": "tmp/note.txt", "content": "hello duck"}) + loaded = await read.run({"path": "tmp/note.txt"}) + escaped = await read.run({"path": "../outside.txt"}) + + assert result.ok is True + assert loaded.output == "hello duck" + assert escaped.ok is False + + +@pytest.mark.asyncio +async def test_shell_tool_blocks_dangerous_commands(tmp_path): + shell = ShellExecSafeTool(str(tmp_path)) + + allowed = await shell.run({"command": "pwd"}) + blocked = await shell.run({"command": "rm -rf ."}) + + assert allowed.ok is True + assert blocked.ok is False + + +@pytest.mark.asyncio +async def test_tool_gateway_runs_allowed_directive(tmp_path): + gateway = ToolGateway.default(str(tmp_path)) + result = await gateway.run_action( + {"tool": "file_write", "args": {"path": "a.txt", "content": "x"}} + ) + + assert result.ok is True + assert result.metadata["path"].endswith("a.txt") diff --git a/tests/smoke/test_vector_memory.py b/tests/smoke/test_vector_memory.py new file mode 100644 index 0000000..ac148ec --- /dev/null +++ b/tests/smoke/test_vector_memory.py @@ -0,0 +1,11 @@ +import pytest + +from duck_core.memory.vector_memory import EmbeddingsUnavailableError, VectorMemory + + +@pytest.mark.asyncio +async def test_vector_memory_stub_is_explicit_when_embeddings_unavailable(): + memory = VectorMemory(qdrant_url="http://127.0.0.1:6333", embeddings_base_url=None) + + with pytest.raises(EmbeddingsUnavailableError): + await memory.add_memory("remember this") diff --git a/tests/test_api_handlers.py b/tests/test_api_handlers.py deleted file mode 100644 index dd98429..0000000 --- a/tests/test_api_handlers.py +++ /dev/null @@ -1,122 +0,0 @@ -import asyncio -import time - -import app.api.server as server -from app.api.server import chat, critic_feedback, health, list_events, resolve_permission, resolve_review, resolve_secret -from app.core.permission_resolution import PermissionResolutionRequest, ReviewResolutionRequest, SecretResolutionRequest -from app.api.server import CriticFeedbackRequest -from app.core.contracts import UserTask - - -def test_health_handler() -> None: - assert health() == {"status": "ok"} - - -def test_events_handler_returns_event_list() -> None: - body = list_events(limit=10) - assert "events" in body - assert isinstance(body["events"], list) - - -def test_chat_handler_returns_runtime_events() -> None: - body = chat(UserTask(input="hello from handler test")) - assert body["status"] in {"accepted", "completed"} - if body["status"] == "completed": - assert body["events"][0]["type"] == "task_received" - - -def test_chat_handler_submits_task_without_waiting_for_completion(monkeypatch) -> None: - class SlowRuntime: - def submit_task(self, task): - return {"task_id": task.task_id, "status": "accepted"} - - def handle_task(self, task): - time.sleep(0.25) - return {"task_id": task.task_id, "status": "completed", "events": []} - - monkeypatch.setattr("app.api.server.runtime", SlowRuntime()) - started = time.monotonic() - body = chat(UserTask(input="long task")) - - assert time.monotonic() - started < 0.1 - assert body["status"] == "accepted" - - -def test_lifespan_loads_models_without_threadpool_executor(monkeypatch) -> None: - class FakeRuntime: - _memory_interface = None - - def __init__(self) -> None: - self.loaded = False - - def load_models_at_startup(self) -> None: - self.loaded = True - - class FailingLoop: - def run_in_executor(self, *args, **kwargs): - raise AssertionError("lifespan must not load llama models via run_in_executor") - - fake_runtime = FakeRuntime() - monkeypatch.setattr(server, "runtime", fake_runtime) - monkeypatch.setattr(server.asyncio, "get_event_loop", lambda: FailingLoop()) - - async def run_lifespan() -> None: - async with server.lifespan(None): - pass - - asyncio.run(run_lifespan()) - assert fake_runtime.loaded is True - - -def test_resolve_permission_handler_allows_completion() -> None: - initial = chat(UserTask(input="запусти pwd")) - if initial["status"] == "awaiting_permission": - body = resolve_permission( - PermissionResolutionRequest(task_id=initial["task_id"], decision="allow_once") - ) - assert body["status"] in {"completed", "failed"} - - -def test_resolve_secret_handler_requires_pending_request() -> None: - body = resolve_secret(SecretResolutionRequest(task_id="missing", secret="x")) - assert body["status"] == "failed" - - -def test_resolve_review_handler_submits_review_resolution(monkeypatch) -> None: - class ReviewRuntime: - def submit_review_resolution(self, task_id, decision, correction=None): - return { - "task_id": task_id, - "status": "accepted", - "decision": decision, - "correction": correction, - } - - monkeypatch.setattr("app.api.server.runtime", ReviewRuntime()) - body = resolve_review( - ReviewResolutionRequest( - task_id="task-1", - decision="wrong_action", - correction="replan", - ) - ) - - assert body["status"] == "accepted" - assert body["decision"] == "wrong_action" - - -def test_structured_feedback_can_be_accepted_without_memory_write() -> None: - initial = chat(UserTask(input="feedback target")) - body = critic_feedback( - CriticFeedbackRequest( - task_id=initial["task_id"], - feedback="wrong answer", - feedback_type="hallucination", - severity="major", - correction="check first", - remember=False, - ) - ) - assert body["status"] == "ok" - assert body["stored"] is False - assert "hallucination" in body["lesson"] diff --git a/tests/test_command_analyzer.py b/tests/test_command_analyzer.py deleted file mode 100644 index f45e615..0000000 --- a/tests/test_command_analyzer.py +++ /dev/null @@ -1,46 +0,0 @@ -from app.core.command_analyzer import CommandAnalyzer -from app.core.permission_service import PermissionService - - -def _permission_service() -> PermissionService: - return PermissionService( - config={ - "settings": {}, - "command_categories": { - "no_always": { - "allow_once": True, - "allow_always": False, - "commands": ["apt", "apt-get", "dpkg", "systemctl"], - } - }, - "path_settings": {}, - } - ) - - -def test_detects_unelevated_root_required_segment_after_sudo_chain() -> None: - analyzer = CommandAnalyzer(_permission_service()) - - diagnosis = analyzer.analyze( - command="sudo apt update && apt upgrade -y", - task_id="task-1", - session_id="session-1", - ) - - assert diagnosis["type"] == "privilege_scope_error" - assert diagnosis["root_required_segments"] == ["apt update", "apt upgrade -y"] - assert diagnosis["elevated_segments"] == ["apt update"] - assert diagnosis["unelevated_root_segments"] == ["apt upgrade -y"] - - -def test_accepts_each_root_required_segment_when_each_is_elevated() -> None: - analyzer = CommandAnalyzer(_permission_service()) - - diagnosis = analyzer.analyze( - command="sudo apt update && sudo apt upgrade -y", - task_id="task-1", - session_id="session-1", - ) - - assert diagnosis["type"] == "ok" - assert diagnosis["unelevated_root_segments"] == [] diff --git a/tests/test_contracts.py b/tests/test_contracts.py deleted file mode 100644 index 7dee997..0000000 --- a/tests/test_contracts.py +++ /dev/null @@ -1,67 +0,0 @@ -import asyncio - -from app.core.async_router import AsyncRouter -from app.core.contracts import CriticScore, ExecutionDirective, PlanStep, UserTask - - -class _FakeAdapter: - def __init__(self, responses: list[str]) -> None: - self._responses = responses - - async def generate(self, prompt: str, max_tokens: int | None = None) -> str: - return self._responses.pop(0) - - -def test_user_task_defaults() -> None: - task = UserTask(input="hello") - assert task.task_id - assert task.session_id - - -def test_plan_step_supports_dependencies() -> None: - step = PlanStep( - id="step-1", - kind="tool", - tool="shell_exec", - description="run command", - depends_on=[], - ) - assert step.tool == "shell_exec" - - -def test_critic_score_bounds() -> None: - score = CriticScore( - correctness=1.0, - usefulness=0.5, - safety=0.0, - memory_store=False, - weight=0.2, - explanation="ok", - ) - assert score.weight == 0.2 - - -def test_execution_directive_defaults() -> None: - directive = ExecutionDirective(type="noop") - assert directive.payload == {} - assert directive.confidence == 0.0 - - -def test_router_compiles_tool_plan_even_when_classifier_says_conversation() -> None: - router = AsyncRouter( - thinker=_FakeAdapter([ - "conversation", - "ПЛАН:\nШаг 1: [shell_exec] выполнить `uptime`", - ]), - json_compiler=_FakeAdapter([ - '{"type":"plan","payload":{"steps":[{"id":"1","tool":"shell_exec","args":{"command":"uptime"},"depends_on":[]}]}}' - ]), - ) - directive = asyncio.run( - router.decide( - state={}, - context={"task_summary": "Проверь аптайм ПК", "task_context": {}}, - ) - ) - assert directive.type == "plan" - assert directive.payload["steps"][0]["tool"] == "shell_exec" diff --git a/tests/test_runtime_loop.py b/tests/test_runtime_loop.py deleted file mode 100644 index dfb3dc2..0000000 --- a/tests/test_runtime_loop.py +++ /dev/null @@ -1,38 +0,0 @@ -from app.core.contracts import UserTask -from app.runtime.runtime_controller import RuntimeController - - -def test_runtime_loop_emits_basic_events() -> None: - controller = RuntimeController() - result = controller.handle_task(UserTask(input="hello runtime")) - event_types = [event["type"] for event in result["events"]] - assert result["status"] == "completed" - assert "message" in result["result"] - assert "task_received" in event_types - assert "context_built" in event_types - assert "task_completed" in event_types - - -def test_runtime_loop_routes_natural_language_shell_request_to_permission_flow() -> None: - import os, shutil - # Clear permission cache to ensure clean state - cache_file = os.path.join(os.path.dirname(__file__), '..', 'data', 'runtime', 'allowed_commands.json') - if os.path.exists(cache_file): - os.remove(cache_file) - - controller = RuntimeController() - result = controller.handle_task(UserTask(input="запусти sudo apt update")) - event_types = [event["type"] for event in result["events"]] - # sudo commands require both permission and password - # First step: permission request - assert result["status"] == "awaiting_permission" - assert result["directive"]["type"] == "tool" - assert result["directive"]["payload"]["tool"] == "shell_exec" - assert "permission_requested" in event_types - assert "task_awaiting_permission" in event_types - assert result["result"]["error"] == "Permission required before execution." - - # After granting permission, should request sudo password - resumed = controller.resolve_permission(task_id=result["task_id"], decision="allow_once") - assert resumed["status"] == "awaiting_input" - assert resumed["result"]["secret_request"]["kind"] == "sudo_password" diff --git a/tests/test_tools_flow.py b/tests/test_tools_flow.py deleted file mode 100644 index 203a343..0000000 --- a/tests/test_tools_flow.py +++ /dev/null @@ -1,489 +0,0 @@ -import json -from pathlib import Path - -from app.core.contracts import ExecutionDirective, UserTask -from app.core.contracts import PermissionDecision -from app.core.contracts import ToolResult -from app.events.event_types import TOOL_OUTPUT_CHUNK -from app.runtime.runtime_controller import RuntimeController -from app.tools.sandbox import ToolSandbox - - -def _write_config_tree(base_dir: Path) -> None: - (base_dir / "config").mkdir() - (base_dir / "data" / "events").mkdir(parents=True, exist_ok=True) - (base_dir / "data" / "state").mkdir(parents=True, exist_ok=True) - (base_dir / "data" / "permissions").mkdir(parents=True, exist_ok=True) - (base_dir / "models").mkdir(exist_ok=True) - - configs = { - "models.json": { - "orchestrator_path": "models/llama.gguf", - "coder_path": "models/xcoder.gguf", - "critic_path": "models/gemma.gguf", - "embeddings_path": "models/all-MiniLM-L6-v2", - "inference": {}, - }, - "prompts.json": { - "orchestration_prompt": "", - "planning_prompt": "", - "coder_prompt": "", - "critic_prompt": "", - }, - "permissions.json": { - "settings": { - "allow_caching": True, - "cache_file": str(base_dir / "data/runtime/allowed_commands.json"), - "normalize_commands": True, - "split_chained": True - }, - "command_categories": { - "hard_stop": { - "commands": ["rm -rf /", "rm -rf /*", "dd if=/dev/zero of=/dev/sd*"] - }, - "no_always": { - "allow_once": True, - "allow_always": False, - "commands": [ - "rm -rf *", "rm -rf .*", "shutdown", "reboot", "halt", - "apt", "apt-get", "dpkg", "yum", "dnf", "pacman", - "systemctl stop", "systemctl start", "systemctl restart", - "service stop", "service start", "killall", "pkill -9" - ] - }, - "normal": { - "allow_once": True, - "allow_always": True, - "commands": ["shell_exec", "file_write"] - } - }, - "path_settings": { - "allow_read_outside": True, - "allow_write_paths": [str(base_dir), "/tmp"], - "require_confirmation_for_write": True, - "require_confirmation_for_shell": True - } - }, - "runtime.json": { - "step_timeout_ms": 5000, - "task_timeout_ms": 30000, - "planner_retry_limit": 1, - "tool_retry_limit": 0, - "replan_limit": 0, - "max_execution_steps": 5, - "retrieval_top_k": 3, - "memory_thresholds": {}, - "critic_fallback_policy": "continue_without_critic", - "checkpoint_policy": {"save_on_transition": True}, - "event_retention_policy": {"keep_all": True}, - "streaming_settings": {"enabled": True}, - }, - } - for name, payload in configs.items(): - (base_dir / "config" / name).write_text(json.dumps(payload), encoding="utf-8") - - -def test_file_write_and_read_tool_flow(tmp_path: Path) -> None: - _write_config_tree(tmp_path) - controller = RuntimeController(base_dir=tmp_path) - target = tmp_path / "notes" / "test.txt" - - write_result = controller.handle_task( - UserTask( - input="write a file", - context={ - "requested_tool": "file_write", - "tool_args": {"path": str(target), "content": "hello from ducklm"}, - }, - ) - ) - assert write_result["status"] == "completed" - assert target.read_text(encoding="utf-8") == "hello from ducklm" - - read_result = controller.handle_task( - UserTask( - input="read the file", - context={ - "requested_tool": "file_read", - "tool_args": {"path": str(target)}, - }, - ) - ) - assert read_result["status"] == "completed" - assert read_result["result"]["output"] == "hello from ducklm" - - -def test_shell_exec_requires_permission_for_dangerous_command(tmp_path: Path) -> None: - _write_config_tree(tmp_path) - controller = RuntimeController(base_dir=tmp_path) - result = controller.handle_task( - UserTask( - input="run dangerous shell command", - context={ - "requested_tool": "shell_exec", - "tool_args": {"command": "rm -rf /tmp/nonexistent"}, - }, - ) - ) - # rm -rf /tmp/nonexistent is not hard_stop (only exact "rm -rf /" is) - # but it matches "rm -rf *" in no_always category - assert result["status"] == "awaiting_permission" - assert "permission_request" in result["result"] - - -def test_shell_exec_allows_safe_command(tmp_path: Path) -> None: - _write_config_tree(tmp_path) - controller = RuntimeController(base_dir=tmp_path) - result = controller.handle_task( - UserTask( - input="run safe shell command", - context={ - "requested_tool": "shell_exec", - "tool_args": {"command": "pwd"}, - }, - ) - ) - # Even safe commands require permission in the new permission model - assert result["status"] == "awaiting_permission" - assert "permission_request" in result["result"] - # Grant permission and verify execution - resumed = controller.resolve_permission(task_id=result["task_id"], decision="allow_once") - assert resumed["status"] == "completed" - assert str(tmp_path) in resumed["result"]["output"] - - -def test_shell_exec_publishes_output_chunks_before_completion(tmp_path: Path) -> None: - _write_config_tree(tmp_path) - controller = RuntimeController(base_dir=tmp_path) - perm_override = PermissionDecision( - action_type="shell_command", - pattern="printf", - decision="allow_always", - ) - - task = UserTask( - input="stream shell output", - context={ - "requested_tool": "shell_exec", - "tool_args": {"command": "printf 'first\\n'; sleep 0.1; printf 'second\\n'"}, - }, - ) - result = controller.execution_engine.execute( - task, - ExecutionDirective( - type="tool", - payload={ - "tool": "shell_exec", - "args": {"command": "printf 'first\\n'; sleep 0.1; printf 'second\\n'"}, - }, - ), - permission_override=perm_override, - ) - - events = controller.event_bus.list_for_task(task.task_id) - chunk_events = [event for event in events if event.type == TOOL_OUTPUT_CHUNK] - completed_index = next(index for index, event in enumerate(events) if event.type == "tool_completed") - first_chunk_index = next(index for index, event in enumerate(events) if event.type == TOOL_OUTPUT_CHUNK) - assert result["status"] == "completed" - assert [event.payload["chunk"] for event in chunk_events] == ["first\n", "second\n"] - assert first_chunk_index < completed_index - - -def test_streaming_shell_uses_idle_timeout_not_step_timeout(tmp_path: Path) -> None: - sandbox = ToolSandbox( - allowed_root=tmp_path, - timeout_ms=100, - command_timeout_ms=2000, - idle_timeout_ms=500, - ) - chunks: list[str] = [] - - result = sandbox.run_shell( - command="printf 'first\\n'; sleep 0.2; printf 'second\\n'", - output_callback=lambda _stream, chunk: chunks.append(chunk), - ) - - assert result.returncode == 0 - assert result.stdout == "first\nsecond\n" - assert chunks == ["first\n", "second\n"] - - -def test_streaming_shell_timeout_kills_child_process_group(tmp_path: Path) -> None: - marker = tmp_path / "child-survived" - sandbox = ToolSandbox( - allowed_root=tmp_path, - timeout_ms=100, - command_timeout_ms=100, - idle_timeout_ms=1000, - ) - - result = sandbox.run_shell( - command=f"sh -c 'sleep 1; touch {marker}'", - output_callback=lambda _stream, _chunk: None, - ) - - assert result.returncode == -9 - assert not marker.exists() - - -class _RecoveryCritic: - async def generate(self, prompt: str, max_tokens: int | None = None) -> str: - return '{"action":"continue","reason":"No matches is acceptable information for this exploratory check."}' - - -def test_failed_shell_step_can_recover_and_continue(tmp_path: Path) -> None: - _write_config_tree(tmp_path) - controller = RuntimeController(base_dir=tmp_path) - controller.execution_engine.set_critic(_RecoveryCritic()) - controller.execution_engine._recovery_limit = 1 - # Bypass permission check for this test — we're testing recovery, not permissions - from app.core.contracts import PermissionDecision - perm_override = PermissionDecision( - action_type="shell_command", - pattern="grep", - decision="allow_always", - ) - result = controller.execution_engine.execute( - UserTask( - input="run grep with no matches and recover", - ), - ExecutionDirective( - type="plan", - payload={ - "steps": [ - { - "id": "1", - "tool": "shell_exec", - "args": {"command": "printf 'abc\\n' | grep definitely_missing"}, - "depends_on": [], - } - ] - }, - ), - permission_override=perm_override, - ) - assert result["status"] == "completed" - failed_result = result["result"]["step_results"][0]["result"]["result"] - assert failed_result["metadata"]["exit_code"] == 1 - - -def test_privilege_scope_failure_awaits_user_review_before_replan(tmp_path: Path) -> None: - _write_config_tree(tmp_path) - controller = RuntimeController(base_dir=tmp_path) - task = UserTask( - input="обнови систему", - context={ - "requested_tool": "shell_exec", - "tool_args": {"command": "sudo apt update && apt upgrade -y"}, - }, - ) - class FailingShellTool: - def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: - return ToolResult( - tool="shell_exec", - ok=False, - output="Error: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?", - error="Command failed with exit code 100", - metadata={"exit_code": 100}, - ) - - controller.tool_registry._tools["shell_exec"] = FailingShellTool() - - initial = controller.handle_task(task) - assert initial["status"] == "awaiting_permission" - controller.resolve_permission(task_id=task.task_id, decision="allow_once") - result = controller.resolve_secret(task_id=task.task_id, secret="secret") - - assert result["status"] == "awaiting_review" - assert result["result"]["review"]["diagnosis"]["type"] == "privilege_scope_error" - assert result["result"]["review"]["critic_assessment"]["classification"] == "model_planning_error" - - -def test_plan_pauses_on_privilege_scope_review_instead_of_completing(tmp_path: Path) -> None: - _write_config_tree(tmp_path) - controller = RuntimeController(base_dir=tmp_path) - - class FailingShellTool: - def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: - return ToolResult( - tool="shell_exec", - ok=False, - output="Error: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?", - error="Command failed with exit code 100", - metadata={"exit_code": 100}, - ) - - controller.tool_registry._tools["shell_exec"] = FailingShellTool() - result = controller.execution_engine.execute( - UserTask(input="обнови систему"), - ExecutionDirective( - type="plan", - payload={ - "steps": [ - { - "id": "1", - "tool": "shell_exec", - "args": {"command": "sudo apt update && apt upgrade -y"}, - "depends_on": [], - } - ] - }, - ), - permission_override=PermissionDecision( - action_type="shell_command", - pattern="apt", - decision="allow_once", - ), - secret_override="secret", - ) - - assert result["status"] == "awaiting_review" - assert result["result"]["review"]["diagnosis"]["type"] == "privilege_scope_error" - - -def test_sudo_auth_failure_requests_secret_retry_not_review(tmp_path: Path) -> None: - _write_config_tree(tmp_path) - controller = RuntimeController(base_dir=tmp_path) - - class BadPasswordShellTool: - def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: - return ToolResult( - tool="shell_exec", - ok=False, - output="Sorry, try again.\nsudo: no password was provided\nsudo: 1 incorrect password attempt\n", - error="Command failed with exit code 1", - metadata={"exit_code": 1, "sudo_auth_failed": True}, - ) - - controller.tool_registry._tools["shell_exec"] = BadPasswordShellTool() - result = controller.execution_engine.execute( - UserTask(input="обнови систему"), - ExecutionDirective( - type="plan", - payload={ - "steps": [ - { - "id": "1", - "tool": "shell_exec", - "args": {"command": "sudo apt update && apt upgrade -y"}, - "depends_on": [], - } - ] - }, - ), - permission_override=PermissionDecision( - action_type="shell_command", - pattern="apt", - decision="allow_once", - ), - secret_override="wrong", - ) - - assert result["status"] == "awaiting_input" - assert result["result"]["secret_request"]["kind"] == "sudo_password" - assert result["result"]["secret_request"]["prompt"] == "Sudo password incorrect. Try again" - assert result["result"]["attempt_failed"] is True - - -def test_runtime_keeps_secret_state_after_bad_sudo_password(tmp_path: Path) -> None: - _write_config_tree(tmp_path) - controller = RuntimeController(base_dir=tmp_path) - - class RetryPasswordShellTool: - calls = 0 - - def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: - self.calls += 1 - if self.calls == 1: - return ToolResult( - tool="shell_exec", - ok=False, - output="Sorry, try again.\nsudo: no password was provided\nsudo: 1 incorrect password attempt\n", - error="Command failed with exit code 1", - metadata={"exit_code": 1, "sudo_auth_failed": True}, - ) - return ToolResult( - tool="shell_exec", - ok=True, - output="root\n", - metadata={"exit_code": 0}, - ) - - controller.tool_registry._tools["shell_exec"] = RetryPasswordShellTool() - task = UserTask( - input="кто root", - context={ - "requested_tool": "shell_exec", - "tool_args": {"command": "sudo whoami"}, - }, - ) - initial = controller.handle_task(task) - assert initial["status"] == "awaiting_permission" - allowed = controller.resolve_permission(task_id=task.task_id, decision="allow_once") - assert allowed["status"] == "awaiting_input" - - retry = controller.resolve_secret(task_id=task.task_id, secret="wrong") - assert retry["status"] == "awaiting_input" - assert retry["result"]["attempt_failed"] is True - - final = controller.resolve_secret(task_id=task.task_id, secret="correct") - assert final["status"] == "completed" - assert final["result"]["output"] == "root\n" - - -def test_permission_resolution_can_resume_task(tmp_path: Path) -> None: - _write_config_tree(tmp_path) - controller = RuntimeController(base_dir=tmp_path) - initial = controller.handle_task( - UserTask( - input="запусти sudo apt update", - ) - ) - assert initial["status"] == "awaiting_permission" - resumed = controller.resolve_permission(task_id=initial["task_id"], decision="deny") - assert resumed["status"] == "failed" - assert resumed["result"]["error"] == "Permission denied by user." - - -def test_sudo_permission_resolution_requests_secret_input(tmp_path: Path) -> None: - _write_config_tree(tmp_path) - controller = RuntimeController(base_dir=tmp_path) - initial = controller.handle_task(UserTask(input="запусти sudo apt update")) - assert initial["status"] == "awaiting_permission" - resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once") - assert resumed["status"] == "awaiting_input" - assert resumed["result"]["secret_request"]["kind"] == "sudo_password" - - -def test_implicit_sudo_command_requests_password(tmp_path: Path) -> None: - """Commands like 'apt list --upgradable' that require sudo but don't start with 'sudo' - should also trigger password request after permission is granted.""" - _write_config_tree(tmp_path) - controller = RuntimeController(base_dir=tmp_path) - # apt list --upgradable requires root but doesn't start with 'sudo' - initial = controller.handle_task( - UserTask( - input="проверь обновления", - context={ - "requested_tool": "shell_exec", - "tool_args": {"command": "apt list --upgradable"}, - }, - ) - ) - assert initial["status"] == "awaiting_permission" - # Grant permission — should request sudo password since apt requires root - resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once") - assert resumed["status"] == "awaiting_input" - assert resumed["result"]["secret_request"]["kind"] == "sudo_password" - - -def test_secret_resolution_continues_after_pending_secret_saved(tmp_path: Path) -> None: - _write_config_tree(tmp_path) - controller = RuntimeController(base_dir=tmp_path) - initial = controller.handle_task(UserTask(input="запусти sudo apt update")) - assert initial["status"] == "awaiting_permission" - resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once") - assert resumed["status"] == "awaiting_input" - final = controller.resolve_secret(task_id=initial["task_id"], secret="wrongpass") - assert final["status"] in {"completed", "failed", "awaiting_input"} - assert "error" in final["result"] or "output" in final["result"]