diff --git a/.gitignore b/.gitignore index 3e91fed..5000599 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,7 @@ config/.env.* # Runtime state data/**/*.sqlite3 data/**/*.sqlite3-* +data/runtime/*.json data/runtime/*.pid data/runtime/*.log logs/ diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 0000000..139ca70 --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,347 @@ +# ARCHITECTURE + +Этот документ фиксирует целевую архитектуру `ducklm` как локального event-driven multi-model execution runtime. + +`TASK_3.md` — это директива для ИИ-кодера. +`ARCHITECTURE.md` — это короткая инженерная карта системы: что является ядром, какие есть слои, как течёт управление, где принимаются решения, а где только исполняются переходы. + +## 1. Core Principle + +Система строится вокруг `Runtime Loop Controller`. + +Центр системы: + +- не `router` +- не `orchestrator` +- не `execution engine` + +Центр системы: + +- `runtime loop` + +Именно он замыкает жизненный цикл задачи: + +```text +task + -> state load/create + -> context build + -> orchestration decision + -> plan/directive + -> execution + -> critic + -> memory policy + -> checkpoint + -> next step / complete / fail +``` + +## 2. Layer Model + +Целевая форма системы: + +```text +Client / CLI / API + | + v +Runtime Loop Controller + | + +--> State Store / Checkpoints + +--> Context Builder + +--> Router + +--> Orchestrator / Planner + +--> Execution Engine / Scheduler + | | + | +--> Tool Layer + | +--> Coder + | + +--> Critic + +--> Memory Write Policy + +--> Memory Store + Vector Index + +--> Event Bus + Event Store + +--> Streaming Projection +``` + +Принцип: + +- `runtime loop` координирует +- `router` рекомендует +- `orchestrator` думает +- `execution engine` исполняет +- `tools/coder` делают работу +- `critic` оценивает +- `memory policy` решает запись +- `event bus` фиксирует историю +- `state store` даёт resume + +## 3. Responsibility Boundaries + +### Runtime Loop Controller + +Отвечает за: + +- task lifecycle +- state transitions +- вызов компонентов в правильном порядке +- применение decision objects +- checkpointing +- completion / failure path + +Не отвечает за: + +- policy reasoning +- raw tool execution +- prompt assembly inline + +### Router + +Это `policy evaluator + decision suggester`. + +Контракт: + +```text +(input state + assembled context) -> ExecutionDirective +``` + +Свойства: + +- pure function +- no side effects +- no tool execution +- no state mutation + +### Orchestrator / Planner + +Отвечает за: + +- orchestration reasoning +- deciding whether planning is needed +- generating plan JSON +- returning structured directives + +Не отвечает за: + +- execution +- direct state mutation +- tool invocation + +### Execution Engine / Scheduler + +Отвечает за: + +- step scheduling +- task graph traversal +- step execution coordination +- calling tool/coder adapters +- reporting structured results + +Не отвечает за: + +- ownership of global lifecycle +- high-level policy + +### Critic + +Отвечает за: + +- evaluation of tool/coder outputs +- returning structured scores and explanation + +Не отвечает за: + +- final memory write decision +- execution retry policy + +### Memory Write Policy + +Отвечает за: + +- deterministic decision about storing memory +- dedup / merge / skip behavior + +Не отвечает за: + +- semantic retrieval +- critic scoring + +## 4. Decision Model + +Все decision-producing components должны возвращать структурированные объекты. + +Базовый контракт: + +```json +{ + "type": "plan|tool|coder|respond|replan|store_memory|request_permission|complete|fail|noop", + "payload": {}, + "requires_permission": false, + "confidence": 0.0, + "reason": "string" +} +``` + +Это главный антихаосный инвариант системы. + +Следствие: + +- компоненты не исполняют решения напрямую +- компоненты не мутируют state напрямую +- runtime loop применяет решения и переводит систему дальше + +## 5. Execution Flow + +Нормальный путь выполнения: + +1. Клиент отправляет task. +2. Runtime loop создаёт или загружает task state. +3. Публикуется `task_received`. +4. Context builder собирает execution context. +5. Router возвращает decision object. +6. Orchestrator возвращает direct action или plan. +7. План валидируется и преобразуется в task graph. +8. Execution engine выбирает следующий шаг. +9. Tool или coder исполняет шаг через adapter. +10. Result возвращается в runtime loop. +11. Critic возвращает evaluation suggestion. +12. Memory policy возвращает decision по записи. +13. State checkpoint сохраняется. +14. Event bus фиксирует события. +15. Runtime loop выбирает `continue / replan / complete / fail`. + +## 6. Task Graph Model + +Внешний planner может вернуть список шагов. + +Внутри runtime план должен жить как task graph: + +```json +{ + "nodes": [ + { + "id": "step-1", + "kind": "tool", + "tool": "shell_exec", + "args": {"command": "hostnamectl"}, + "depends_on": [] + } + ] +} +``` + +Сейчас допускается sequential DAG execution. +В будущем это даёт путь к parallel scheduling без переписывания модели. + +## 7. Event Backbone + +Система event-driven. + +`EventBus` нужен не только для стриминга, а как внутренняя хребтовая шина. + +Минимальные свойства: + +- ordering per task +- monotonic sequence per task +- durable append to event store +- replay capability +- consumer idempotency + +Минимальная модель доставки: + +- `at least once` + +Правило идемпотентности: + +- событие дедуплицируется по `task_id + sequence` + +Streaming layer — это projection от event bus, а не источник правды. + +## 8. State Persistence + +Так как runtime задуман как long-running autonomous system, in-memory lifecycle недостаточен. + +Нужны: + +- task state store +- checkpoint store +- resume from crash/restart + +Минимальная стратегия: + +- checkpoint after critical transitions +- latest valid checkpoint is resumable + +Primary choice для MVP: + +- `SQLite` + +## 9. Async and Isolation + +LLM loop не должен блокироваться долгими tool operations. + +Поэтому нужны: + +- async execution adapters +- timeout wrappers +- cancellation handling +- bounded concurrency + +Для опасных или тяжёлых операций нужен отдельный sandbox layer. + +Особенно для: + +- `shell_exec` +- browser/web fallback +- generated helper scripts + +## 10. Memory Architecture + +Memory — отдельная подсистема хранения, а не JSON dump. + +Рекомендуемая форма: + +- metadata store: `SQLite` +- vector index: `FAISS` или `hnswlib` + +Два разных процесса: + +- retrieval +- write decision + +Это специально разделено. + +`critic` только оценивает. +`memory write policy` принимает финальное решение. + +Минимальная логика записи должна быть детерминированной: + +```text +(critic_score + memory_type + runtime_weight + dedup_state + safety_state) -> decision +``` + +## 11. Failure Model + +Система должна быть устойчивой к частичным сбоям. + +Ожидаемые controlled failure paths: + +- invalid planner output -> replan or fail +- tool timeout -> retry or fail +- critic failure -> fallback policy +- memory failure -> skip write and continue where safe +- streaming failure -> sync fallback + +Главный принцип: + +- subsystem failure не должен автоматически означать runtime collapse + +## 12. Why This Shape + +Эта архитектура нужна, чтобы система не деградировала в один из плохих вариантов: + +- `router-god-object` +- `runtime loop with hidden policy logic` +- `LLM that directly executes tools` +- `streaming instead of event model` +- `critic as memory authority` +- `in-memory only autonomous runtime` + +Если держать эти границы жёстко, проект остаётся расширяемым. +Если границы размыть, система быстро превратится в трудноотлаживаемый procedural agent. diff --git a/FOR_AI_REVIEW.md b/FOR_AI_REVIEW.md new file mode 100644 index 0000000..46f3544 --- /dev/null +++ b/FOR_AI_REVIEW.md @@ -0,0 +1,249 @@ +# DuckLM Runtime Architecture Review + +## 🧠 1. System Overview + +**What is runtime?** +Runtime is the execution substrate of the system — a multi-layered cognitive execution environment that orchestrates LLMs, tools, memory, and permissions into a unified agentic workflow. It's the `RuntimeController` that composes `RuntimeLoop`, `ExecutionEngine`, `ContextBuilder`, `AsyncRouter`, `PermissionService`, and `EventBus`. + +**What is the core loop?** +The core loop is the `RuntimeLoop.run_task()` method: it receives a `UserTask`, applies permission hard-stop checks, creates task state, builds context via `ContextBuilder`, routes via `AsyncRouter` to get a `directive`, executes via `ExecutionEngine`, applies `Critic` evaluation, saves via `MemoryPolicy`, publishes `RuntimeEvent`s through `EventBus`, and returns streaming output. + +**Models (Orchestrator / Coder / Critic / Utility)** +- **Orchestrator** (`OrchestratorAdapter`/`AsyncOrchestratorAdapter`): LLM that decides plan vs direct respond vs tool; generates `ExecutionDirective` of type `plan`, `tool`, `respond`, `fail`, etc. +- **Coder** (`CoderAdapter`/`AsyncCoderAdapter`): LLM specialized for code generation and manipulation. +- **Critic** (`CriticAdapter`/`AsyncCriticAdapter`): Evaluates tool outputs with JSON scoring (correctness, usefulness, safety, memory_store, weight). +- **Utility**: The `sys_util` orchestrator — a fallback/orchestration layer for system-level operations. + +**What is "truth"? (Event Store / State Store)** +- **Event Store** (`SQLiteEventStore`): Immutable append-only log of `RuntimeEvent`s per task. Source of truth for "what happened." +- **State Store** (`SQLiteTaskStateStore`): Current mutable task state (status, last_directive, pending requests). "Current truth" of task progress. +- **Checkpoint Store** (`SQLiteCheckpointStore`): Snapshots of task state + context at milestones. +- **Memory Store** (`MemoryStore` + `VectorIndex`): Long-term knowledge base with weighted entries. + +--- + +## 🔁 2. End-to-End Flow + +### High-Level Flow (as seen in logs) +``` +User Input +→ Router (AsyncRouter.decide) +→ Context Builder (ContextBuilder.build) +→ Orchestrator (decides plan vs direct) +→ Plan / Direct Action +→ Execution Engine +→ Tool Layer (ToolRegistry + ToolSandbox) +→ Critic (AsyncCriticAdapter) +→ Memory Policy (MemoryWritePolicy) +→ Event Bus (SQLiteEventStore) +→ Streaming Output (via WebSocket / SSE) +``` + +### Conversation Flow +1. **Router** decides `plan` vs `respond` vs `tool` vs `fail` based on orchestrator output or intent parser. +2. **Context Builder** enriches task with memory context, tool context, execution context, and safety constraints. +3. **Orchestrator** (or direct respond) produces the initial `ExecutionDirective`. +4. **Execution Engine** schedules via `ExecutionScheduler`, then executes: + - `plan` → parse into `PlanStep`s, build task graph, execute ready steps + - `tool` → validate tool existence, check permissions, execute via `ToolRegistry` + - `respond` → direct response + - `fail` → immediate failure +5. **Tool Layer** (`ToolRegistry` + `ToolSandbox`): + - Plugin discovery via `ToolDiscovery` + - Manifest-based tool registration + - Sandboxed execution with timeout +6. **Critic** evaluates tool results (if enabled), outputs `CriticScore` JSON. +7. **Memory Policy** decides whether to insert `tool_result`, `critique`, `plan`, `fact`, `summary`, or `user_preference` into memory. +8. **Event Bus** (`SQLiteEventBus`) publishes `RuntimeEvent` with sequence ordering. +9. **Streaming Output** replays events via WebSocket and sends incremental responses. + +### Failure Flow +- **Invalid JSON flow**: `ExecutionScheduler.parse_plan_steps` catches `JSONDecodeError` / `ValueError` / `TypeError`, logs warning, returns empty steps → `plan` fails with "Failed to parse plan steps." +- **Tool failure flow**: Tool execution returns `{"status": "failed", "result": {"error": "..."}}` → ExecutionEngine returns failed status → task state updated → event `TASK_FAILED` published → stops further plan steps. +- **Critic failure flow**: `_evaluate_with_critic` catches exception, logs warning, publishes `CRITIC_RESULT` with error → critic_score is `None` → execution continues without critique. +- **Orchestrator fallback flow**: If primary orchestrator fails or missing, `AsyncRouter` has `sys_util` fallback (utility orchestrator) for system-level decisions. +- **Permission denial flow**: `PermissionService.check_shell_command` / `check_write_path` returns `decision: "hard_stop"` or `decision: "deny"` → immediate failure with blocked reason; if `decision: "prompt"` → `TASK_AWAITING_PERMISSION` state. + +### Repair Flow (JSON / Tool-call) +- Repair is triggered via `resolve_permission` or `resolve_secret` endpoints. +- Permission repair: user provides `decision` ("allow_once"/"allow_always"/"deny"/"ask_always") → `PermissionService.resolve_permission` → updates state → retries original directive. +- Secret repair: user provides secret string → `ExecutionEngine.execute` with `secret_override` → continues execution. + +--- + +## ⚙️ 3. Component Breakdown + +### `runtime_loop` (`RuntimeLoop`) +- **Responsibility**: Central task coordination; state management; event publishing. +- **Input**: `UserTask` +- **Output**: `{"task_id", "status", "directive", "result", "events"}` +- **Must NOT do**: Direct LLM calls (delegates to router/execution_engine); bypass state store. + +### `execution_engine` (`ExecutionEngine`) +- **Responsibility**: Execute directives (plan/tool/respond/fail); integrate critic; interface with tool registry. +- **Input**: `UserTask`, `ExecutionDirective`, optional `permission_override`, `secret_override` +- **Output**: `{"status", "result", "step_results"}` +- **Must NOT do**: Bypass permission checks; skip critic evaluation when enabled; leak secrets in logs. + +### `scheduler` (`ExecutionScheduler`) +- **Responsibility**: Parse plan JSON, build task dependency graph, yield ready steps, detect cycles. +- **Input**: JSON plan string, `task_id` +- **Output**: `list[PlanStep]` +- **Must NOT do**: Execute anything; modify task state directly. + +### `tool_registry` (`ToolRegistry`) +- **Responsibility**: Register/manifest tools; execute via `ToolSandbox`; provide schema metadata. +- **Input**: tool name, args dict +- **Output**: `ToolResult` +- **Must NOT do**: Bypass sandbox; execute privileged host commands without sandbox. + +### `event_bus` (`EventBus` → `SQLiteEventStore`) +- **Responsibility**: Append-only event persistence; sequence numbering; per-task query. +- **Input**: `RuntimeEvent` +- **Output**: event stream +- **Must NOT do**: Modify state store directly (state is separate); delete or mutate events. + +### `memory` (`MemoryInterface` → `MemoryStore` + `VectorIndex`) +- **Responsibility**: Store/retrieve weighted memory entries; vector similarity search; integrate with context builder. +- **Input**: text, kind, source, weight, metadata +- **Output**: search results or insertion confirmation +- **Must NOT do**: Expose raw embeddings without access control; store secrets. + +--- + +## 🧩 4. Data Contracts + +### `PlanStep` +```python +id: str +kind: Literal["tool", "coder", "memory", "respond"] +tool: str | None +args: dict[str, Any] +description: str +requires_confirmation: bool +depends_on: list[str] +``` +**Real example** (from `router` prompt engineering): +`{"id":"step-0","kind":"tool","tool":"shell_exec","args":{"command":"ls -la"},"description":"List directory","requires_confirmation":false,"depends_on":[]}` + +### `ToolCall` +```python +tool: str +args: dict[str, Any] +task_id: str +step_id: str +``` +**Real log**: `TOOL_CALLED` event with `{"tool":"shell_exec","args":{"command":"pwd"},"task_id":"xyz","step_id":"step-0"}` + +### `ToolResult` +```python +tool: str +ok: bool +output: Any +error: str | None +metadata: dict[str, Any] +``` +**Real output**: `{"tool":"shell_exec","ok":true,"output":"/app","error":null,"metadata":{}}` + +### `RuntimeEvent` +```python +event_id: str +task_id: str +session_id: str +sequence: int +type: str # e.g. TASK_RECEIVED, TOOL_CALLED, TASK_COMPLETED +payload: dict[str, Any] +causation_id: str | None +correlation_id: str +``` +**Real event stream**: `TASK_RECEIVED → CONTEXT_BUILT → PLAN_STARTED → TOOL_CALLED → TOOL_COMPLETED → TASK_COMPLETED` + +### `MemoryEntry` +```python +id: str +text: str +kind: Literal["tool_result","plan","critique","fact","summary","user_preference"] +source: Literal["tool","critic","user","system"] +weight: float +task_id: str | None +session_id: str | None +metadata: dict[str, Any] +embedding_model: str +embedding_dim: int +``` +**Real insertion**: After critic evaluation, `kind="critique"`, `source="critic"`, `weight=0.85`, metadata includes scores. + +--- + +## 🔥 5. Failure Modes + +### Invalid JSON Flow +- **Trigger**: Malformed plan JSON (e.g., missing braces, non-JSON string). +- **Detection**: `parse_plan_steps` catches `JSONDecodeError` / `ValueError` / `TypeError`. +- **Result**: Warning logged, empty steps returned → `PLAN_FAILED` with `"Failed to parse plan steps from directive"`. + +### Tool Failure Flow +- **Trigger**: Tool returns `ok=False` or raises exception in sandbox. +- **Detection**: `_execute_tool` checks `tool_result.ok`. +- **Result**: Status `"failed"`, result contains `{"error": "...", "failed_step": step.id, "step_results": [...]}` → `TASK_FAILED` event; further plan steps skipped. + +### Critic Failure Flow +- **Trigger**: Critic adapter raises exception or returns non-JSON output. +- **Detection**: `_evaluate_with_critic` catches exception, logs warning. +- **Result**: Event `CRITIC_RESULT` with error payload → `critic_score = None` → execution continues without critique; memory write skipped. + +### Orchestrator Fallback Flow +- **Trigger**: Primary orchestrator model unavailable or returns invalid directive. +- **Detection**: `_ensure_orchestrator` returns `None`; router falls back to `sys_util` orchestrator. +- **Result**: Utility orchestrator handles system-level decisions (e.g., file operations, environment queries). + +### Permission Denial Flow +- **Trigger**: `PermissionService` returns `decision: "hard_stop"` or `"deny"`. +- **Detection**: `_execute_tool` checks `permission_result`. +- **Result**: Immediate failure with `"Command blocked: ..."` → `TASK_FAILED`; no tool execution. + +--- + +## 🧠 6. "Decision Logic Map" + +### Orchestrator vs Direct Respond +- **Use orchestrator** when: task requires planning, multi-step tool usage, or unknown intent. Orchestrator decides to emit `plan` or `tool` directive. +- **Direct respond** when: intent parser classifies as simple query (`TASK_RECEIVED` → `router.intent_parser` → `respond` directive) or `respond` directive explicitly set. + +### Utility Model Call +- Invoked when `sys_util` orchestrator is loaded (configurable). Used for system-level operations: environment inspection, file system queries, or when primary orchestrator fails and fallback is needed. + +### Retry Logic +- **Planner retry**: `ExecutionScheduler` has `retry_limit=2`; on parse/validation failure, retries up to limit before failing plan. +- **Tool retry**: Not implemented natively; retry must be encoded in plan steps (`depends_on`, manual replan). + +### Plan Creation +- **Trigger**: Orchestrator output contains `{type: "plan", ...}` or explicit `plan` directive. +- **Process**: `parse_plan_steps` → `validate_no_cycles` → `build_task_graph` → ready steps execution. +- **No plan**: Orchestrator outputs `respond` or `tool` → direct execution. + +--- + +## 🧰 7. Tool System Architecture + +### Plugin Discovery +- `ToolDiscovery` scans `app/tools/plugins/` for modules exporting `Tool` classes. +- Discovers: `shell_exec`, `file_read`, `file_write`, `memory` (search/insert/list). + +### Manifest-Based Tools +- Each plugin has a `manifest.json` with: + - `description`: human-readable docstring. + - `args_schema`: JSON schema for validation. + - `requires_permission`: boolean for privileged tools (`shell_exec`, `file_write`). +- On discovery, registry registers tool and stores schema for permission/routing. + +### Registry Bootstrap +- `RuntimeController._create_tool_registry()` initializes discovery, loads plugins, registers with init mapping (sandbox, permissions). +- Tools are initialized once at startup; `tool_registry` is shared across executions. + +### Execution Isolation +- **ToolSandbox** (`ToolSandbox`): + - Restricts filesystem to `allowed_root` (project base dir). + - Timeout per execution (`step_timeout_ms`). + - Blocks `sudo` without secret override; requires secret injection for sudo commands. +- **Permission gating**: `shell_exec` and `file_write` require explicit permission decision before execution. \ No newline at end of file diff --git a/IMPLEMENTATION_PLAN.md b/IMPLEMENTATION_PLAN.md new file mode 100644 index 0000000..6cddf0a --- /dev/null +++ b/IMPLEMENTATION_PLAN.md @@ -0,0 +1,534 @@ +# IMPLEMENTATION PLAN + +Этот документ описывает рекомендуемый порядок реализации `ducklm` от пустого репозитория до рабочего локального runtime с тестовым веб-чатом. + +План опирается на [`TASK_3.md`](/home/mirivlad/git/ducklm/TASK_3.md) и [`ARCHITECTURE.md`](/home/mirivlad/git/ducklm/ARCHITECTURE.md). + +## 1. Goal + +Собрать систему по этапам так, чтобы после каждого этапа оставался рабочий, проверяемый инкремент, а не набор недоделанных слоёв. + +Главный принцип: + +- сначала каркас и контракты +- потом runtime core +- потом execution path +- потом memory / critic / recovery +- потом удобные интерфейсы проверки + +## 2. Milestones Overview + +1. Project skeleton and typed contracts +2. Config system and dependency wiring +3. Runtime loop skeleton +4. Event bus and event store +5. State persistence and checkpointing +6. Context builder and orchestrator adapter +7. Router and directive flow +8. Execution engine and task graph +9. Permission system and tool sandbox +10. MVP tools +11. FastAPI API and health surface +12. Web chat test client +13. Coder integration +14. Critic integration +15. Memory system +16. Memory write policy +17. Retry, recovery, replay +18. CLI and operator utilities +19. Hardening and tests + +## 3. Detailed Stages + +### Stage 1. Project Skeleton and Typed Contracts + +Цель: + +- создать структуру директорий +- завести базовые модели данных +- убрать двусмысленность интерфейсов между слоями + +Сделать: + +- создать `app/`, `config/`, `data/`, `tests/` +- добавить core contracts: + - `UserTask` + - `PlanStep` + - `ToolCall` + - `ToolResult` + - `CriticScore` + - `RuntimeEvent` + - `TaskCheckpoint` + - `ExecutionDirective` + +Результат этапа: + +- проект компилируется +- типы и схемы являются source of truth для остальных модулей + +Проверка: + +- unit tests на валидацию схем + +### Stage 2. Config System and Dependency Wiring + +Цель: + +- вынести runtime behavior в конфиги +- зафиксировать единый способ загрузки настроек + +Сделать: + +- `config/models.json` +- `config/prompts.json` +- `config/permissions.json` +- `config/runtime.json` +- loader и typed config models + +Результат этапа: + +- runtime можно запускать с консистентной конфигурацией + +Проверка: + +- config load smoke test + +### Stage 3. Runtime Loop Skeleton + +Цель: + +- создать heart of system без полной бизнес-логики + +Сделать: + +- `runtime_loop.py` +- `runtime_controller.py` +- минимальный lifecycle: + - receive task + - create state + - build empty context + - emit initial event + - return placeholder directive/result + +Результат этапа: + +- есть центральный control loop +- остальные слои начинают подстраиваться под него, а не наоборот + +Проверка: + +- smoke test на прохождение задачи через loop skeleton + +### Stage 4. Event Bus and Event Store + +Цель: + +- создать внутреннюю event backbone + +Сделать: + +- `event_bus.py` +- `event_types.py` +- `event_store.py` +- monotonic sequence per task +- append-only storage +- базовый replay reader + +Результат этапа: + +- у каждой задачи есть воспроизводимая хронология + +Проверка: + +- event ordering tests +- dedup/idempotency tests + +### Stage 5. State Persistence and Checkpointing + +Цель: + +- убрать зависимость task lifecycle от памяти процесса + +Сделать: + +- `task_state_store.py` +- `checkpoint_store.py` +- SQLite backend +- checkpoint after critical transitions +- resume loading primitives + +Результат этапа: + +- runtime готов к recovery после падения + +Проверка: + +- save/load checkpoint tests + +### Stage 6. Context Builder and Orchestrator Adapter + +Цель: + +- зафиксировать правильный вход в reasoning path + +Сделать: + +- `context_builder.py` +- token-budget-aware assembly +- orchestrator adapter abstraction +- planning mode / orchestration mode interfaces + +Результат этапа: + +- все будущие вызовы reasoning model идут через один нормализованный путь + +Проверка: + +- tests на context assembly priorities + +### Stage 7. Router and Directive Flow + +Цель: + +- зафиксировать router как pure decision layer + +Сделать: + +- `router.py` +- `state + context -> ExecutionDirective` +- no side effects +- routing rules for: + - retrieval needed + - planning needed + - permission needed + - critic needed + +Результат этапа: + +- runtime loop применяет решения, а не изобретает их сам + +Проверка: + +- unit tests на routing decisions + +### Stage 8. Execution Engine and Task Graph + +Цель: + +- получить управляемое исполнение шагов, а не “вызовы по месту” + +Сделать: + +- `execution_engine.py` +- `execution_scheduler.py` +- task graph validation +- sequential DAG scheduler +- adapters for tool/coder execution + +Результат этапа: + +- runtime может исполнять direct action и multi-step plans + +Проверка: + +- task graph validation tests +- step ordering tests + +### Stage 9. Permission System and Tool Sandbox + +Цель: + +- не дать runtime выполнять опасные действия напрямую + +Сделать: + +- permission rules +- persistent approval store +- shell safety classifier +- sandbox execution adapter +- timeout/resource/path restrictions + +Результат этапа: + +- опасные команды требуют policy decision до запуска + +Проверка: + +- permission flow tests +- sandbox boundary smoke tests + +### Stage 10. MVP Tools + +Цель: + +- сделать минимально полезный execution path + +Сделать: + +- `shell_exec` +- `file_read` +- `file_write` +- unified tool registry +- unified `ToolResult` + +Результат этапа: + +- runtime уже может выполнять реальные локальные задачи + +Проверка: + +- integration tests для трёх базовых tools + +### Stage 11. FastAPI API and Health Surface + +Цель: + +- открыть runtime наружу через стабильный backend interface + +Сделать: + +- `POST /chat` +- `WS /stream` +- `GET /health` +- базовый request/response models +- error handling + +Результат этапа: + +- систему уже можно дергать из внешнего клиента + +Проверка: + +- API smoke tests + +### Stage 12. Web Chat Test Client + +Цель: + +- получить быстрый способ руками проверить поведение всей системы через браузер + +Сделать: + +- минимальный локальный веб-чат +- простую страницу с: + - вводом задачи + - окном сообщений + - панелью streaming events + - индикацией permission requests + - отображением final result +- подключение к `POST /chat` и `WS /stream` + +Требования: + +- это не production UI +- это не отдельный продуктовый frontend +- это thin test client для ручной проверки runtime + +Лучше всего разместить как: + +- `app/api/static/` или отдельный `web/` модуль с минимальным стеком + +Результат этапа: + +- можно открыть браузер и увидеть, как runtime планирует, исполняет шаги и стримит события + +Проверка: + +- ручной e2e smoke test через браузер + +### Stage 13. Coder Integration + +Цель: + +- подключить отдельную coding model без смешивания ролей + +Сделать: + +- `core/coder.py` +- `generate_code` +- `fix_code` +- `refactor_code` +- structured coder result + +Результат этапа: + +- runtime может делегировать кодогенерацию специализированной модели + +Проверка: + +- tests на coder request/response flow + +### Stage 14. Critic Integration + +Цель: + +- получить formal evaluation layer после tools/coder + +Сделать: + +- critic adapter +- `CriticScore` +- fallback policy when critic unavailable + +Результат этапа: + +- результаты можно оценивать единообразно + +Проверка: + +- critic scoring contract tests + +### Stage 15. Memory System + +Цель: + +- добавить долговременную retrieval memory + +Сделать: + +- SQLite metadata store +- FAISS/hnswlib vector index +- insert/search/delete/reindex +- embedding versioning + +Результат этапа: + +- runtime получает semantic retrieval вместо контекста “только текущая задача” + +Проверка: + +- memory insert/search tests + +### Stage 16. Memory Write Policy + +Цель: + +- не допустить хаотичной записи всего подряд + +Сделать: + +- deterministic write policy +- threshold model +- dedup / merge rules +- conflict handling + +Результат этапа: + +- память пополняется контролируемо, а не по одному score cutoff + +Проверка: + +- memory policy decision tests + +### Stage 17. Retry, Recovery, Replay + +Цель: + +- довести runtime до устойчивого long-running поведения + +Сделать: + +- planner retry +- tool retry for allowed cases +- partial failure recovery +- replay path from event store +- resume from checkpoint + +Результат этапа: + +- система может переживать ошибки без полной потери исполнения + +Проверка: + +- recovery smoke tests +- replay tests + +### Stage 18. CLI and Operator Utilities + +Цель: + +- дать локальный интерфейс помимо API/веб-чата + +Сделать: + +- send task +- show result +- follow events +- memory search +- replay task history + +Результат этапа: + +- разработчик может проверять runtime без браузера + +Проверка: + +- CLI smoke tests + +### Stage 19. Hardening and Tests + +Цель: + +- довести проект до инженерно приемлемого состояния + +Сделать: + +- structured logging refinement +- failure-path tests +- concurrency edge cases +- docs refresh +- cleanup of temporary stubs + +Результат этапа: + +- проект становится пригодным для реальной итеративной разработки + +Проверка: + +- full critical-path smoke suite + +## 4. Recommended First Working Demo + +Первый нормальный demo checkpoint должен быть на этапе `Stage 12`. + +Что должно работать к этому моменту: + +- браузерный веб-чат открывается локально +- пользователь отправляет задачу +- runtime принимает task +- событие начала работы видно в UI +- если нужен plan, это видно в events panel +- tool execution видно в events panel +- final response возвращается в чат + +На этом этапе memory, critic и recovery ещё могут быть частично stubbed, но: + +- runtime loop +- event bus +- state persistence +- router +- execution engine +- permissions +- базовые tools +- API +- web chat + +должны быть уже реальными. + +## 5. Order Rationale + +Почему веб-чат не в самом конце: + +- он нужен как live inspection surface для runtime +- через него проще проверять streaming, permissions и event ordering +- он быстрее выявляет архитектурные проблемы, чем голые unit tests + +Но веб-чат ставится только после: + +- runtime core +- event bus +- persistence +- basic execution path +- API + +Иначе он станет красивой оболочкой над несуществующей системой. diff --git a/MVP_CHECKLIST.md b/MVP_CHECKLIST.md new file mode 100644 index 0000000..cdcf84a --- /dev/null +++ b/MVP_CHECKLIST.md @@ -0,0 +1,83 @@ +# MVP CHECKLIST + +Этот чеклист фиксирует минимальный рабочий объём для первого демонстрационного запуска `ducklm`. + +## 1. Core Runtime + +- [x] Есть модульная структура проекта `app/`, `config/`, `data/`, `tests/` +- [x] Есть typed contracts для core entities +- [x] Есть `Runtime Loop Controller` +- [x] Runtime loop умеет принять задачу и создать task state +- [x] Runtime loop публикует стартовые и финальные события + +## 2. Events and State + +- [x] Есть `EventBus` +- [x] Есть `EventStore` +- [x] События имеют `task_id + sequence` +- [x] Есть `TaskStateStore` +- [x] Есть `CheckpointStore` +- [x] Есть сохранение checkpoint после critical transitions +- [ ] Есть базовый resume path + +## 3. Decision and Execution + +- [x] Есть `ContextBuilder` +- [x] Есть `Router` как pure decision layer +- [x] Есть `ExecutionDirective` +- [x] Есть `ExecutionEngine` +- [x] Есть `ExecutionScheduler` +- [ ] План валидируется и преобразуется в task graph + +## 4. Tools and Safety + +- [x] Есть `PermissionService` +- [x] Есть persistent store для user approvals +- [x] Есть `ToolSandbox` +- [x] Есть `ToolRegistry` +- [x] Работает `shell_exec` +- [x] Работает `file_read` +- [x] Работает `file_write` + +## 5. Models and Evaluation + +- [ ] Есть orchestrator adapter +- [ ] Есть planning mode interface +- [ ] Есть coder adapter +- [ ] Есть critic adapter +- [ ] Есть fallback policy при critic failure + +## 6. Memory + +- [ ] Есть SQLite metadata store +- [ ] Есть vector index adapter +- [ ] Работает memory insert/search +- [ ] Есть `MemoryWritePolicy` +- [ ] Запись в память не зависит только от critic score + +## 7. Interfaces + +- [x] Есть `POST /chat` +- [ ] Есть `WS /stream` +- [x] Есть `GET /health` +- [x] Есть локальный веб-чат для ручной проверки runtime +- [ ] Есть CLI для отправки задач и просмотра событий + +## 8. Reliability + +- [ ] Есть structured logging +- [ ] Есть retry/recovery policy skeleton +- [ ] Есть replay path from event store +- [ ] Есть critical-path smoke tests + +## 9. Demo Definition + +MVP считается достигнутым, если: + +- [ ] можно открыть локальный веб-чат в браузере +- [ ] можно отправить задачу +- [ ] видно streaming events +- [ ] видно планирование или direct action +- [ ] видно выполнение tool step +- [ ] опасная команда требует подтверждения +- [ ] финальный ответ возвращается пользователю diff --git a/TASK_3.md b/TASK_3.md new file mode 100644 index 0000000..2f9461c --- /dev/null +++ b/TASK_3.md @@ -0,0 +1,1255 @@ +Ты — senior AI systems engineer и principal backend architect. + +Твоя задача: спроектировать и реализовать полноценный локальный multi-model AI agent runtime. + +Это НЕ чат-бот. +Это НЕ demo script. +Это НЕ один большой файл с вызовами моделей и shell. + +Это автономная локальная система исполнения задач с: + +- central runtime loop +- несколькими локальными GGUF-моделями с жёсткими ролями +- tools +- planning +- critic loop +- долговременной memory +- permission gating +- event bus +- state persistence +- streaming +- конфигурируемым runtime + +Система должна быть расширяемой, тестируемой, отказоустойчивой и пригодной для дальнейшего развития. + +================================================== +1. PRODUCT GOAL +================================================== + +Построить локальный AI runtime, который: + +- принимает пользовательскую задачу +- извлекает релевантную память +- собирает контекст +- принимает orchestration-решение +- при необходимости строит план +- исполняет шаги через tools и coder +- оценивает результаты через critic +- сохраняет полезные результаты в memory +- публикует события исполнения +- поддерживает streaming клиенту +- требует подтверждения на опасные действия +- умеет восстанавливаться после сбоя +- полностью управляется через конфиги + +Система должна быть local-first. + +================================================== +2. NON-GOALS +================================================== + +На первом этапе НЕ нужно: + +- строить UI frontend +- делать distributed execution +- делать multi-user auth +- делать Kubernetes deployment +- делать сложный scheduler для множества параллельных задач +- делать self-modifying runtime + +Telegram bot допускается только как thin stub. + +================================================== +3. CENTRAL ARCHITECTURAL PRINCIPLE +================================================== + +Центр системы — `Runtime Loop Controller`. + +Не router. +Не отдельная LLM. +Не execution engine. + +Именно runtime loop замыкает полный цикл: + +`task -> state load -> context build -> orchestrator -> plan/decision -> execute -> critic -> memory policy -> state checkpoint -> next step` + +Целевая форма архитектуры: + +```text +Runtime Loop Controller + -> State Store / Checkpoints + -> Context Builder + -> Orchestrator / Planner + -> Router (policy + decision suggestion) + -> Execution Engine / Scheduler + -> Tools / Coder / Critic + -> Memory System + -> Event Bus / Event Store + -> Streaming Projection + -> back into Runtime Loop +``` + +Любой critical transition должен проходить через runtime loop. + +================================================== +4. MODELS AND HARD ROLES +================================================== + +Используй отдельные модели с жёстким разделением ответственности. + +4.1 Orchestrator / Planner + +Модель: +- LLaMA-family GGUF + +Роль: +- orchestration reasoning +- decomposition of user task +- decision whether planning is needed +- plan generation in strict JSON format +- next-step suggestion + +Ограничения: +- не выполняет tools напрямую +- не пишет итоговый код, кроме инструкций для coder +- не оценивает финальную корректность результата + +4.2 Coder + +Модель: +- X-CODER GGUF + +Роль: +- generate_code +- fix_code +- refactor_code +- generate helper scripts when explicitly requested by runtime + +Ограничения: +- не принимает orchestration-решения +- не строит execution plan +- не вызывает tools напрямую + +4.3 Critic + +Модель: +- Gemma-family GGUF + +Роль: +- оценивает результаты tools +- оценивает результаты coder +- предлагает memory usefulness score +- предлагает safety/usefulness judgment + +Ограничения: +- не планирует +- не исполняет действия +- не принимает финальное решение о memory write + +4.4 Embeddings Engine + +Модель: +- MiniLM или совместимая embeddings model + +Роль: +- embeddings generation +- semantic retrieval + +Ограничения: +- не участвует в reasoning +- не участвует в planning + +================================================== +5. GLOBAL RULES +================================================== + +Обязательные правила: + +- Все execution transitions проходят через runtime loop controller. +- Все tool calls проходят через execution layer, permission layer и sandbox layer. +- Все prompts и model settings вынесены в `config/`. +- Все межмодульные контракты оформлены через типы/Pydantic models/dataclasses. +- Все важные действия публикуются как события. +- Task lifecycle не должен храниться только in-memory. +- Система должна корректно деградировать при сбое отдельных подсистем. + +Hard decision rule: + +- Все decision-producing components должны возвращать только структурированные decision objects. +- Ни один decision-producing component не должен напрямую исполнять tools. +- Ни один decision-producing component не должен напрямую мутировать task state. +- Ни один decision-producing component не должен неявно вызывать другие компоненты в обход runtime loop. + +Обязательная деградация: + +- если critic недоступен, runtime продолжает работу по fallback policy +- если memory retrieval недоступен, задача выполняется без retrieval +- если streaming недоступен, система возвращает sync response +- если planner вернул невалидный план, runtime делает controlled replan или graceful fail + +================================================== +6. IMPLEMENTATION ORDER +================================================== + +Реализация должна идти итерациями в таком порядке: + +1. project skeleton +2. typed contracts +3. runtime loop skeleton +4. event bus + event schema +5. state persistence + checkpoints +6. config loader +7. context builder skeleton +8. FastAPI skeleton +9. router +10. execution engine / scheduler +11. permission system +12. tool sandbox layer +13. local tools +14. coder integration +15. critic integration +16. memory system +17. memory write policy engine +18. streaming projection +19. CLI +20. optional Telegram stub + +После каждого шага ты обязан: + +- показать изменённые файлы +- показать структуру директорий +- кратко объяснить, что уже работает +- явно указать, что ещё stub + +================================================== +7. MVP BOUNDARY +================================================== + +Первая рабочая версия обязана поддерживать end-to-end сценарий: + +- пользователь отправляет задачу +- runtime loop создаёт task state +- context builder собирает контекст +- orchestrator решает direct action или planning +- execution engine исполняет шаги +- shell/file tools реально работают +- опасная команда требует подтверждения +- critic оценивает результат +- memory policy принимает решение о записи +- события пишутся в event store +- task state чекпоинтится +- клиент получает streaming или sync результат + +Минимальный набор tools для MVP: + +- `shell_exec` +- `file_read` +- `file_write` + +Второй приоритет: + +- `web_search` +- `web_fetch` + +================================================== +8. REQUIRED PROJECT STRUCTURE +================================================== + +Ожидаемая структура: + +```text +ducklm/ + app/ + api/ + core/ + runtime/ + events/ + state/ + tools/ + memory/ + permissions/ + streaming/ + cli/ + models/ + services/ + config/ + models.json + prompts.json + permissions.json + runtime.json + data/ + memory/ + state/ + events/ + permissions/ + tests/ + main.py +``` + +Допускается разумная адаптация, но separation of concerns обязателен. + +================================================== +9. REQUIRED DOMAIN CONTRACTS +================================================== + +Сначала зафиксируй typed contracts. + +Минимально обязательны: + +9.1 `UserTask` + +```json +{ + "task_id": "uuid", + "session_id": "uuid", + "input": "string", + "context": {}, + "created_at": "iso-datetime" +} +``` + +9.2 `PlanStep` + +```json +{ + "id": "step-1", + "kind": "tool|coder|memory|respond", + "tool": "shell_exec", + "args": {}, + "description": "human readable step description", + "requires_confirmation": false, + "depends_on": [] +} +``` + +Rules: + +- `kind` обязателен +- `args` всегда объект +- `depends_on` обязателен, даже если пустой +- `tool` обязателен только для `kind=tool` + +9.3 `ToolCall` + +```json +{ + "tool": "shell_exec", + "args": {}, + "task_id": "uuid", + "step_id": "step-1" +} +``` + +9.4 `ToolResult` + +```json +{ + "tool": "shell_exec", + "ok": true, + "output": "stdout/stderr/parsed data", + "error": null, + "metadata": { + "exit_code": 0, + "duration_ms": 120 + } +} +``` + +9.5 `CoderRequest` + +```json +{ + "mode": "generate|fix|refactor", + "instruction": "string", + "context": {}, + "task_id": "uuid" +} +``` + +9.6 `CriticScore` + +```json +{ + "correctness": 0.0, + "usefulness": 0.0, + "safety": 0.0, + "memory_store": true, + "weight": 0.0, + "explanation": "string" +} +``` + +Rules: + +- все numeric scores в диапазоне `0..1` +- `weight` используется как сигнал, а не как безусловная команда записи + +9.7 `MemoryEntry` + +```json +{ + "id": "uuid", + "text": "string", + "kind": "tool_result|plan|critique|fact|summary|user_preference", + "source": "tool|critic|user|system", + "weight": 0.85, + "task_id": "uuid", + "session_id": "uuid", + "metadata": {}, + "created_at": "iso-datetime", + "embedding_model": "string", + "embedding_dim": 384 +} +``` + +9.8 `PermissionDecision` + +```json +{ + "action_type": "shell_command", + "pattern": "rm", + "decision": "allow_once|allow_always|deny|ask_always", + "created_at": "iso-datetime" +} +``` + +9.9 `RuntimeEvent` + +```json +{ + "event_id": "uuid", + "task_id": "uuid", + "session_id": "uuid", + "sequence": 42, + "type": "task_received", + "timestamp": "iso-datetime", + "payload": {}, + "causation_id": "uuid|null", + "correlation_id": "uuid" +} +``` + +9.10 `TaskCheckpoint` + +```json +{ + "task_id": "uuid", + "status": "executing_step", + "active_step_id": "step-2", + "plan_snapshot": {}, + "context_snapshot": {}, + "updated_at": "iso-datetime" +} +``` + +9.11 `ExecutionDirective` + +```json +{ + "type": "plan|tool|coder|respond|replan|store_memory|request_permission|complete|fail|noop", + "payload": {}, + "requires_permission": false, + "confidence": 0.0, + "reason": "string" +} +``` + +Rules: + +- все decision-producing components должны возвращать либо `ExecutionDirective`, либо коллекцию совместимых директив +- `confidence` находится в диапазоне `0..1` +- `payload` всегда объект +- директива описывает намерение, а не исполняет действие сама + +================================================== +10. RUNTIME LOOP CONTROLLER +================================================== + +Создай: + +- `app/runtime/runtime_loop.py` +- `app/runtime/runtime_controller.py` + +`Runtime Loop Controller` — heart of system. + +Он обязан: + +- принять task +- загрузить или создать task state +- опубликовать стартовые события +- инициировать context assembly +- вызвать orchestrator +- определить `direct action / planning / replan / fail` +- передать исполнение в execution engine +- принять результаты tools/coder +- вызвать critic +- передать результат в memory write policy engine +- сохранить checkpoint +- опубликовать события +- решить `continue / replan / complete / fail` + +Runtime loop не должен: + +- собирать prompts inline вручную +- содержать raw tool logic +- подменять собой router +- подменять собой execution engine +- принимать policy-level решения вместо других компонентов + +Runtime loop обязан: + +- применять уже возвращённые decision objects +- переводить систему между состояниями +- координировать вызовы между компонентами + +Runtime loop не должен содержать скрытую бизнес-логику policy-уровня. + +================================================== +11. CONTEXT BUILDER +================================================== + +Создай: + +- `app/core/context_builder.py` + +Context builder обязан собирать: + +- user input +- session context +- retrieved memory +- current task state +- current plan or active step +- recent tool results +- permission state +- runtime constraints and safety limits + +Rules: + +- любой вызов orchestrator/planner идёт только через context builder +- context builder должен быть token-budget aware +- low-priority context должен отбрасываться при переполнении +- prompt assembly не должна дублироваться по проекту + +Минимальный результат: + +```json +{ + "system_prompt": "string", + "task_summary": "string", + "memory_context": [], + "execution_context": {}, + "tool_context": [], + "safety_context": {}, + "constraints": {} +} +``` + +================================================== +12. ORCHESTRATION, PLANNING, ROUTER +================================================== + +Planning — это режим orchestration model, а не отдельная модель. + +Router должен быть только: + +- policy evaluator +- decision suggester + +Создай: + +- `app/core/router.py` + +Router обязан определять: + +- нужен ли retrieval +- нужен ли planning +- direct step vs multi-step flow +- когда нужен coder +- когда нужен critic +- когда нужен replan +- когда требуется permission gate + +Rules: + +- router должен быть pure function по контракту +- router принимает input state + assembled context +- router возвращает только structured decision object +- router не имеет side effects +- router не мутирует state +- router не вызывает tools +- router не управляет execution lifecycle +- router не владеет task lifecycle +- router не исполняет шаги +- runtime loop применяет router decisions + +Planner rules: + +- planner mode возвращает только строгий JSON +- невалидный план не исполняется +- runtime делает bounded retry или graceful fail + +================================================== +13. TASK GRAPH MODEL +================================================== + +План не должен жить только как список шагов. + +Даже если MVP исполняет шаги последовательно, внутренняя модель должна быть graph-compatible. + +Используй внутреннюю task graph representation: + +```json +{ + "nodes": [ + { + "id": "step-1", + "kind": "tool", + "tool": "shell_exec", + "args": {"command": "hostnamectl"}, + "depends_on": [] + }, + { + "id": "step-2", + "kind": "respond", + "depends_on": ["step-1"] + } + ] +} +``` + +Rules: + +- scheduler валидирует отсутствие циклов +- planner может возвращать `PlanStep[]` как transport format +- после валидации план преобразуется во внутренний task graph +- MVP может использовать sequential DAG scheduler + +================================================== +14. EXECUTION ENGINE AND SCHEDULER +================================================== + +Создай: + +- `app/core/execution_engine.py` +- `app/core/execution_scheduler.py` + +Execution engine работает под управлением runtime loop. + +Execution engine обязан: + +- принимать валидированный task graph +- поддерживать execution cursor +- выбирать следующий исполнимый шаг +- учитывать зависимости шагов +- вызывать tools/coder через adapters +- возвращать структурированные результаты в runtime loop +- публиковать execution events + +Минимальные состояния: + +- `received` +- `retrieving_memory` +- `orchestrating` +- `planning` +- `awaiting_permission` +- `executing_step` +- `critic_evaluating` +- `storing_memory` +- `completed` +- `failed` + +Execution engine не должен заменять runtime loop. + +================================================== +15. EVENT BUS, EVENT STORE, REPLAY +================================================== + +Streaming events недостаточно. +Нужен внутренний event backbone. + +Создай: + +- `app/events/event_bus.py` +- `app/events/event_types.py` +- `app/events/event_store.py` + +EventBus обязан: + +- принимать runtime domain events +- гарантировать ordering per task +- выдавать monotonic sequence number per task +- публиковать события подписчикам +- писать события в durable store +- поддерживать projection в streaming layer + +Delivery guarantees: + +- ordering guarantee per task обязателен +- delivery model минимально `at least once` +- consumer-side idempotency обязательна +- deduplication key: `task_id + sequence` +- replay не должен ломать состояние при повторном применении уже известных событий + +Минимальные event types: + +- `task_received` +- `context_built` +- `llm_called` +- `llm_result_received` +- `plan_created` +- `step_started` +- `tool_called` +- `tool_completed` +- `coder_called` +- `coder_completed` +- `critic_called` +- `critic_completed` +- `memory_write_suggested` +- `memory_write_decided` +- `memory_written` +- `permission_requested` +- `permission_resolved` +- `checkpoint_saved` +- `task_completed` +- `task_failed` + +Event sourcing baseline: + +- каждое значимое действие должно порождать событие +- execution history должна быть воспроизводимой +- должна быть replay capability step-by-step + +Каждое событие должно быть idempotent и deduplicatable по: + +- `task_id + sequence` + +Streaming transport не является source of truth. + +================================================== +16. STATE PERSISTENCE AND CHECKPOINTING +================================================== + +In-memory only state запрещён для autonomous mode. + +Создай: + +- `app/state/task_state_store.py` +- `app/state/checkpoint_store.py` + +Используй: + +- SQLite как минимум для MVP + +State persistence layer обязан поддерживать: + +- task creation +- current task status +- active step +- current plan/task graph snapshot +- latest context summary +- latest safe checkpoint +- resume after restart/crash + +Обязательные правила: + +- checkpoint после critical transitions +- periodic checkpointing +- resume from last valid checkpoint + +================================================== +17. ASYNC EXECUTION ISOLATION +================================================== + +Нужна явная изоляция между LLM loop и tool execution. + +Обязательные требования: + +- долгие tool operations не должны блокировать runtime loop +- блокирующие операции должны идти через async adapter / isolated runner +- streaming и event publishing должны продолжаться во время исполнения tool + +Минимум: + +- async tool runner +- timeout wrapper +- cancellation handling +- bounded concurrency policy + +================================================== +18. TOOL SANDBOX LAYER +================================================== + +Помимо permission checks нужен sandbox layer. + +Особенно для: + +- `shell_exec` +- `web_fetch` with browser fallback +- generated helper scripts + +Минимальные требования: + +- execution context isolation +- resource caps +- timeout enforcement +- working directory restrictions +- optional environment variable allowlist + +Для shell нужно предусмотреть: + +- CPU / wall time limits +- path restrictions where possible +- запрет неявного escalation + +================================================== +19. TOOLS SYSTEM +================================================== + +Нужен tool registry и единый tool interface. + +Обязательные tools для MVP: + +- `shell_exec` +- `file_read` +- `file_write` + +Второй этап: + +- `web_search` +- `web_fetch` + +Требования: + +- единый base tool interface +- единый `ToolResult` +- централизованный logging +- timeout/error isolation +- tool execution только через tool layer + +================================================== +20. TOOL SAFETY AND PERMISSIONS +================================================== + +Перед потенциально опасным действием система обязана проверить policy. + +Источники policy: + +- `config/permissions.json` +- persistent store пользовательских решений + +Поддерживаемые режимы: + +- `allow_once` +- `allow_always` +- `deny` +- `ask_always` + +Минимум опасных shell patterns: + +- `rm` +- `mv` в sensitive paths +- `chmod` +- `chown` +- package managers +- `curl | bash` +- `sudo` +- `shutdown` +- `reboot` + +Rules: + +- опасная команда не исполняется до решения пользователя +- решения пользователя сохраняются +- execution layer получает уже разрешённое или отклонённое действие + +================================================== +21. MEMORY SYSTEM +================================================== + +JSON file не использовать как primary memory store. + +Используй: + +- SQLite как primary metadata store +- FAISS или hnswlib как vector index + +Memory обязана поддерживать: + +- insert +- semantic search +- delete +- update weight +- filtering by kind/session/task/source +- embedding versioning +- reindex + +Минимальные таблицы или эквивалент: + +`memory_items` +- id +- text +- kind +- source +- weight +- task_id +- session_id +- metadata_json +- created_at +- updated_at + +`embeddings_index_map` +- memory_id +- embedding_model +- embedding_dim +- vector_slot +- created_at + +Rules: + +- retrieval учитывает semantic score и memory weight +- low-value memories не должны загрязнять context +- смена embedding model требует reindex path + +================================================== +22. MEMORY WRITE POLICY ENGINE +================================================== + +Critic только предлагает. +Memory write policy engine решает. + +Создай: + +- `app/memory/write_policy.py` + +Policy engine должен учитывать: + +- critic score +- thresholds из config +- kind/source memory candidate +- deduplication signals +- session/task scope +- safety constraints +- runtime weight modifiers + +Решения policy engine: + +- `store` +- `store_with_weight` +- `skip` +- `merge_with_existing` + +Policy engine должен быть детерминированной функцией. + +Минимальная форма: + +`(critic_score + memory_type + runtime_weight + dedup_state + safety_state) -> decision` + +Нельзя ограничиваться примитивным правилом вида: + +- `if score > 0.7 then store` + +Нужно зафиксировать: + +- threshold model +- scoring formula or weighted rule set +- conflict resolution for near-duplicate memories +- merge policy for same-fact updates + +================================================== +23. CRITIC LOOP +================================================== + +Critic получает: + +- tool result +- coder output +- optional execution context + +Возвращает: + +```json +{ + "correctness": 0.91, + "usefulness": 0.77, + "safety": 1.0, + "memory_store": true, + "weight": 0.84, + "explanation": "Result is correct and safe, useful for future similar tasks" +} +``` + +Critic должен вызываться: + +- после tool execution +- после coder output +- перед memory write suggestion + +Critic failure не должен ломать execution path. +Critic возвращает suggestion, а не final write decision. + +================================================== +24. RETRY AND RECOVERY POLICY +================================================== + +Нужна явная retry/recovery стратегия. + +Обязательные политики: + +Planning retry: + +- ограниченное число replan attempts +- каждый retry логируется как событие + +Tool retry: + +- только для idempotent operations или явно разрешённых tools +- policy зависит от типа ошибки + +Partial failure recovery: + +- `fail task` +- `retry step` +- `skip step` +- `replan` + +Critic recovery: + +- critic failure переводится в fallback policy + +Минимальные поля в `config/runtime.json`: + +- `planner_retry_limit` +- `tool_retry_limit` +- `replan_limit` +- `step_timeout_ms` +- `task_timeout_ms` +- `allow_recovery_replan` +- `checkpoint_policy` +- `event_retention_policy` + +================================================== +25. STREAMING SYSTEM +================================================== + +Требуется FastAPI WebSocket streaming. + +Но streaming должен быть projection from event bus, а не отдельным источником правды. + +Минимальные внешние события: + +```json +{ "type": "status", "data": "planning" } +{ "type": "token", "data": "..." } +{ "type": "plan", "data": [...] } +{ "type": "tool_start", "tool": "shell_exec", "step_id": "step-1" } +{ "type": "tool_result", "tool": "shell_exec", "data": {...} } +{ "type": "critic", "data": {...} } +{ "type": "permission_required", "data": {...} } +{ "type": "final", "data": {...} } +``` + +================================================== +26. CONFIG SYSTEM +================================================== + +Всё должно жить в `config/`. + +Обязательные файлы: + +`config/models.json` +- model paths +- model roles +- inference params +- context sizes + +`config/prompts.json` +- orchestration prompt +- planning prompt +- coder prompt +- critic prompt + +`config/permissions.json` +- dangerous command policies +- sensitive paths +- default approval behavior + +`config/runtime.json` +- timeouts +- streaming settings +- critic fallback policy +- memory thresholds +- retrieval top_k +- replan limits +- max execution steps +- checkpoint policy +- event retention policy + +Hard rule: + +- никаких хардкодов prompts и critical thresholds в коде + +================================================== +27. API SERVER +================================================== + +Сделай FastAPI backend. + +Минимальные endpoints: + +- `POST /chat` +- `WS /stream` +- `POST /tool/execute` +- `GET /memory/search` +- `DELETE /memory/item/{id}` +- `GET /health` + +Требования: + +- Pydantic request/response models +- единый error handling +- dependency injection where разумно + +================================================== +28. CODER MODULE +================================================== + +Создай: + +- `app/core/coder.py` + +Минимальный интерфейс: + +- `generate_code()` +- `fix_code()` +- `refactor_code()` + +Используется только coder model. + +================================================== +29. CLI +================================================== + +Добавить CLI для локального использования. + +Минимум: + +- отправить задачу +- получить sync result +- показать streaming mode +- выполнить memory search + +================================================== +30. TELEGRAM BOT +================================================== + +Только optional stub. + +Если реализуешь: + +- не связывай core runtime с Telegram-specific code +- делай только thin adapter layer + +================================================== +31. RELIABILITY AND TESTING +================================================== + +Обязательные инженерные требования: + +- structured logging +- typed exceptions +- timeout handling +- graceful failures +- no silent pass +- no giant mixed-responsibility files + +Минимальные тесты: + +- runtime loop transitions +- event ordering +- checkpoint save/load +- replay path +- plan validation +- permission policy checks +- tool registry +- shell safety path +- memory insert/search +- memory write policy +- router basic flow + +================================================== +32. FORBIDDEN SHORTCUTS +================================================== + +Запрещено: + +- single-model architecture +- hardcoded prompts in code +- bypassing runtime loop +- bypassing router for policy decisions +- tool execution outside tool layer +- dangerous command execution without permission check +- JSON file as primary memory store +- in-memory-only task lifecycle for autonomous mode +- direct streaming transport as substitute for event bus +- critic-only memory write decision path +- accepting invalid planner JSON as-is +- giant monolithic runtime file + +================================================== +33. DEFINITION OF DONE +================================================== + +Работа считается выполненной, если: + +1. Есть модульная структура проекта. +2. Есть typed contracts для core entities. +3. Есть Runtime Loop Controller как центральный control loop. +4. Есть Context Builder. +5. Есть Router как policy evaluator / decision suggester. +6. Есть Execution Engine / Scheduler. +7. Есть EventBus + EventStore + replay-capable history. +8. Есть state persistence + checkpointing + resume. +9. Есть permission-gated tools. +10. Есть tool sandbox layer. +11. Есть coder integration. +12. Есть critic integration. +13. Есть memory на SQLite + vector index. +14. Есть memory write policy engine. +15. Есть FastAPI API. +16. Есть streaming как projection от event bus. +17. Есть CLI. +18. Есть базовые тесты critical path. + +================================================== +34. REQUIRED DELIVERY STYLE +================================================== + +Работай итеративно. + +После каждого шага: + +- показывай код +- показывай структуру файлов +- кратко объясняй решение +- явно отмечай допущения +- прямо помечай stubs + +Не перескакивай к финальному “всё готово”, если каркас ещё не выстроен. + +Начни с: + +1. project structure +2. typed contracts +3. runtime loop skeleton +4. event bus skeleton +5. state persistence skeleton +6. config loader +7. context builder skeleton +8. FastAPI skeleton +9. router +10. execution engine / scheduler + +Сначала построй правильный каркас. +Потом наполняй его логикой. + +КОНЕЦ ЗАДАНИЯ. diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..9b29354 --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,2 @@ +"""ducklm application package.""" + diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000..92d0da2 --- /dev/null +++ b/app/api/__init__.py @@ -0,0 +1,2 @@ +"""API layer.""" + diff --git a/app/api/server.py b/app/api/server.py new file mode 100644 index 0000000..4938dab --- /dev/null +++ b/app/api/server.py @@ -0,0 +1,118 @@ +from __future__ import annotations + +import asyncio +from contextlib import asynccontextmanager +from pathlib import Path + +from fastapi import FastAPI, WebSocket, WebSocketDisconnect +from fastapi.responses import FileResponse +from pydantic import BaseModel + + +class CriticFeedbackRequest(BaseModel): + feedback: str + task_id: str | None = None + session_id: str | None = None + correctness_override: float | None = None + usefulness_override: float | None = None + safety_override: float | None = None + +from app.core.permission_resolution import PermissionResolutionRequest, SecretResolutionRequest, PasswordResolutionRequest +from app.core.contracts import UserTask +from app.runtime.runtime_controller import RuntimeController +from app.streaming.manager import StreamingManager + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Load models on startup.""" + print("Lifespan: Starting model loading...") + loop = asyncio.get_event_loop() + + def load_models(): + try: + print("Lifespan: Loading models...") + runtime.load_models_at_startup() + print("Lifespan: Models loaded") + except Exception as e: + print(f"Lifespan: Failed to load models: {e}") + import traceback + traceback.print_exc() + + await loop.run_in_executor(None, load_models) + + yield # Server runs here + + print("Lifespan: Shutting down...") + + +app = FastAPI(title="ducklm", lifespan=lifespan) +runtime = RuntimeController(base_dir=Path(__file__).resolve().parents[2]) +streaming = StreamingManager(runtime.event_bus) + + +@app.get("/") +def index() -> FileResponse: + return FileResponse(Path(__file__).resolve().parent / "static" / "index.html") + + +@app.get("/health") +def health() -> dict[str, str]: + return {"status": "ok"} + + +@app.post("/chat") +def chat(task: UserTask) -> dict[str, object]: + return runtime.handle_task(task) + + +@app.post("/permissions/resolve") +def resolve_permission(request: PermissionResolutionRequest) -> dict[str, object]: + return runtime.resolve_permission(task_id=request.task_id, decision=request.decision) + + +@app.post("/secrets/resolve") +def resolve_secret(request: SecretResolutionRequest) -> dict[str, object]: + return runtime.resolve_secret(task_id=request.task_id, secret=request.secret) + + +@app.post("/password/resolve") +def resolve_password(request: PasswordResolutionRequest) -> dict[str, object]: + return runtime.resolve_password(task_id=request.task_id, password=request.password) + + +@app.post("/critic/feedback") +def critic_feedback(request: CriticFeedbackRequest) -> dict[str, object]: + feedback = runtime.handle_critic_feedback( + feedback=request.feedback, + task_id=request.task_id, + session_id=request.session_id, + correctness_override=request.correctness_override, + usefulness_override=request.usefulness_override, + safety_override=request.safety_override, + ) + return feedback + + +@app.websocket("/stream/{task_id}") +async def stream_task(websocket: WebSocket, task_id: str) -> None: + await websocket.accept() + replayed_events = streaming.replay_events(task_id) + for event in replayed_events: + await websocket.send_json(event.model_dump(mode="json")) + if replayed_events and replayed_events[-1].type in {"task_completed", "task_failed"}: + await websocket.close() + return + + queue = streaming.subscribe(task_id) + try: + while True: + event = await asyncio.wait_for(queue.get(), timeout=15) + await websocket.send_json(event.model_dump(mode="json")) + if event.type in {"task_completed", "task_failed", "task_awaiting_permission", "task_awaiting_input"}: + break + except (asyncio.TimeoutError, WebSocketDisconnect): + pass + finally: + streaming.unsubscribe(task_id, queue) + await websocket.close() diff --git a/app/api/static/index.html b/app/api/static/index.html new file mode 100644 index 0000000..b381c08 --- /dev/null +++ b/app/api/static/index.html @@ -0,0 +1,510 @@ + + + + + + ducklm runtime test chat + + + +
+

ducklm runtime test chat

+

Thin browser client for checking task submission, tool execution and event replay.

+
+
+
+
+ + +
+
+ +
+
+ + + diff --git a/app/cli/__init__.py b/app/cli/__init__.py new file mode 100644 index 0000000..43164b3 --- /dev/null +++ b/app/cli/__init__.py @@ -0,0 +1,2 @@ +"""CLI layer.""" + diff --git a/app/core/__init__.py b/app/core/__init__.py new file mode 100644 index 0000000..b18a4b9 --- /dev/null +++ b/app/core/__init__.py @@ -0,0 +1,2 @@ +"""Core orchestration components.""" + diff --git a/app/core/async_router.py b/app/core/async_router.py new file mode 100644 index 0000000..1e63ae2 --- /dev/null +++ b/app/core/async_router.py @@ -0,0 +1,509 @@ +from __future__ import annotations + +import asyncio +import json +import logging +from typing import Any + +from app.core.contracts import ExecutionDirective +from app.core.intent_parser import IntentParser +from app.events.event_bus import EventBus +from app.events.event_types import ( + ORCHESTRATOR_CALLED, + ORCHESTRATOR_FALLBACK_USED, + ORCHESTRATOR_RETRY, + ORCHESTRATOR_RESULT, + ORCHESTRATOR_UNAVAILABLE, + THINKER_CALLED, + THINKER_RESULT, + JSON_COMPILER_CALLED, + JSON_COMPILER_RESULT, +) +from app.models.async_adapters import AsyncOrchestratorAdapter + +logger = logging.getLogger(__name__) + + +class AsyncRouter: + """Async router using Thinker + JSON Compiler pipeline.""" + + def __init__( + self, + thinker: AsyncOrchestratorAdapter | None = None, + json_compiler: AsyncOrchestratorAdapter | None = None, + intent_parser: IntentParser | None = None, + prompts: dict[str, str] | None = None, + event_bus: EventBus | None = None, + tool_registry=None, + retry_limit: int = 2, + debug: bool = False, + log_length: int = 500, + json_fix_retry_limit: int = 2, + json_fix_use_sys_util: bool = True, + intent_classifier: str = "thinker", + ) -> None: + self._thinker = thinker + self._json_compiler = json_compiler + self._intent_classifier = intent_classifier + self._sys_util = None + self._intent_parser = intent_parser or IntentParser() + self._prompts = prompts or {} + self._event_bus = event_bus + self._tool_registry = tool_registry + self._retry_limit = retry_limit + self._debug = debug + self._log_length = log_length + self._json_fix_retry_limit = json_fix_retry_limit + self._json_fix_use_sys_util = json_fix_use_sys_util + self._orchestrator = None # Set separately if needed for classification + + def set_event_bus(self, event_bus: EventBus) -> None: + self._event_bus = event_bus + + def set_thinker(self, thinker: AsyncOrchestratorAdapter) -> None: + self._thinker = thinker + + def set_json_compiler(self, json_compiler: AsyncOrchestratorAdapter) -> None: + self._json_compiler = json_compiler + + def set_sys_util(self, sys_util: AsyncOrchestratorAdapter) -> None: + self._sys_util = sys_util + + def set_orchestrator(self, orchestrator: AsyncOrchestratorAdapter) -> None: + self._orchestrator = orchestrator + + def set_tool_registry(self, tool_registry) -> None: + self._tool_registry = tool_registry + + async def decide( + self, + state: dict[str, Any], + context: dict[str, Any], + task_id: str | None = None, + session_id: str | None = None, + ) -> ExecutionDirective: + task_context = context.get("task_context", {}) + requested_tool = task_context.get("requested_tool") + task_summary = str(context.get("task_summary", "")) + + if requested_tool: + self._emit_event( + ORCHESTRATOR_RESULT, + {"reason": "explicit_tool_request", "tool": requested_tool}, + task_id, + session_id, + ) + return ExecutionDirective( + type="tool", + payload={ + "tool": requested_tool, + "args": task_context.get("tool_args", {}), + }, + requires_permission=requested_tool in {"shell_exec", "file_write"}, + confidence=0.9, + reason="Task context explicitly requested a tool execution.", + ) + + if self._thinker is None: + fallback = self._fallback_directive(task_summary) + self._emit_event( + ORCHESTRATOR_FALLBACK_USED, + {"reason": "thinker_unavailable", "directive": fallback.model_dump(mode="json")}, + task_id, + session_id, + ) + return fallback + + if self._json_compiler is None: + fallback = self._fallback_directive(task_summary) + self._emit_event( + ORCHESTRATOR_FALLBACK_USED, + {"reason": "json_compiler_unavailable", "directive": fallback.model_dump(mode="json")}, + task_id, + session_id, + ) + return fallback + + mode_hint = await self._classify_intent(task_summary) + thinker_prompt = self._build_thinker_prompt(task_summary, context, mode_hint) + + for thinker_attempt in range(self._retry_limit + 1): + if thinker_attempt > 0: + self._emit_event( + ORCHESTRATOR_RETRY, + {"attempt": thinker_attempt, "prompt": thinker_prompt}, + task_id, + session_id, + ) + thinker_prompt = self._add_thinker_feedback(thinker_prompt, last_thinker_error, thinker_attempt) + + self._emit_event( + THINKER_CALLED, + {"attempt": thinker_attempt, "mode": mode_hint}, + task_id, + session_id, + ) + + try: + thinker_result = await self._thinker.generate(thinker_prompt) + except Exception as e: + logger.warning(f"Thinker generate failed: {e}") + last_thinker_error = str(e) + continue + + logger.info(f"Thinker result (attempt {thinker_attempt + 1}): {thinker_result}") + self._emit_event( + THINKER_RESULT, + {"result": thinker_result, "attempt": thinker_attempt}, + task_id, + session_id, + ) + + # If mode_hint is conversation, only allow respond type + if mode_hint == "conversation" and not self._is_simple_response(thinker_result): + # Check if Thinker is trying to create an execution plan instead + if any(word in thinker_result.lower() for word in ["шаг", "step", "выполнить", "execute", "shell", "команда"]): + # Override to conversation-only response + respond_text = self._extract_conversation_response(thinker_result) + self._emit_event( + ORCHESTRATOR_RESULT, + {"directive": {"type": "respond", "payload": {"text": respond_text}}, "mode_violation": True}, + task_id, + session_id, + ) + return ExecutionDirective( + type="respond", + payload={"text": respond_text}, + requires_permission=False, + reason="Mode violation: conversation only", + ) + + if self._is_simple_response(thinker_result): + json_compiler_prompt = self._build_json_compiler_prompt(thinker_result) + else: + json_compiler_prompt = self._build_json_compiler_prompt(thinker_result) + + for compiler_attempt in range(self._json_fix_retry_limit + 1): + self._emit_event( + JSON_COMPILER_CALLED, + {"attempt": compiler_attempt, "plan": thinker_result}, + task_id, + session_id, + ) + + try: + compiler_result = await self._json_compiler.generate(json_compiler_prompt) + except Exception as e: + logger.warning(f"JSON Compiler generate failed: {e}") + compiler_result = None + + if compiler_result: + logger.info(f"JSON Compiler result (attempt {compiler_attempt + 1}): {compiler_result}") + self._emit_event( + JSON_COMPILER_RESULT, + {"result": compiler_result, "attempt": compiler_attempt}, + task_id, + session_id, + ) + + directive = self._validate_directive(compiler_result, mode_hint) if compiler_result else None + if directive is not None: + directive = self._guard_rail_check(directive) + self._emit_event( + ORCHESTRATOR_RESULT, + {"directive": directive.model_dump(mode="json"), "thinker_attempt": thinker_attempt, "compiler_attempt": compiler_attempt}, + task_id, + session_id, + ) + return directive + + if compiler_result: + logger.warning(f"JSON Compiler validation failed, attempting fix (attempt {compiler_attempt + 1})") + fix_result = await self._fix_invalid_json(compiler_result, compiler_attempt, task_id, session_id) + if fix_result: + fixed_directive = self._validate_directive(fix_result, mode_hint) + if fixed_directive is not None: + fixed_directive = self._guard_rail_check(fixed_directive) + self._emit_event( + ORCHESTRATOR_RESULT, + {"directive": fixed_directive.model_dump(mode="json"), "fixed": True}, + task_id, + session_id, + ) + return fixed_directive + + last_thinker_error = f"JSON Compiler failed after {self._json_fix_retry_limit + 1} attempts" + + self._emit_event( + ORCHESTRATOR_UNAVAILABLE, + {"reason": "retry_exhausted", "last_error": last_thinker_error}, + task_id, + session_id, + ) + raise RuntimeError(f"Thinker/Compiler pipeline failed after {self._retry_limit + 1} attempts") + + def _fallback_directive(self, task_summary: str) -> ExecutionDirective: + parsed = self._intent_parser.parse(task_summary) + if parsed: + return parsed + + return ExecutionDirective( + type="respond", + payload={"text": f"Runtime accepted task: {task_summary}"}, + requires_permission=False, + confidence=0.4, + reason="Fallback response because local orchestration models are not loaded.", + ) + + def _is_simple_response(self, thinker_result: str) -> bool: + result_lower = thinker_result.lower().strip() + return result_lower.startswith("ответ:") or result_lower.startswith("response:") or "не нужно" in result_lower + + def _extract_conversation_response(self, thinker_result: str) -> str: + """Extract text response from thinker result for conversation mode.""" + result_lower = thinker_result.lower() + + # Skip the ПЛАН lines, just get the ОТВЕТ part + lines = thinker_result.split('\n') + response_lines = [] + capture = False + + for line in lines: + if line.strip().lower().startswith('ответ:') or line.strip().lower().startswith('response:'): + capture = True + response_lines.append(line) + elif capture and line.strip(): + # Check if this is a new ПЛАН or step + if line.strip().lower().startswith('план') or line.strip().lower().startswith('step'): + break + response_lines.append(line) + + if response_lines: + return '\n'.join(response_lines).replace('ответ:', '').replace('response:', '').strip() + + # Fallback: return first few sentences + sentences = thinker_result.split('.')[:3] + return '. '.join(sentences).strip() + + def _build_thinker_prompt( + self, task_summary: str, context: dict[str, Any], mode_hint: str + ) -> str: + base_prompt = self._prompts.get("thinker", "") + memory_context = context.get("memory_context", []) + + tools_json = "[]" + if self._tool_registry: + schemas = self._tool_registry.list_schemas() + tools_json = json.dumps(schemas, ensure_ascii=False, indent=2) + + prompt_lines = [ + base_prompt, + "", + f"Task: {task_summary}", + f"Mode hint: {mode_hint}", + ] + + if memory_context: + memory_text = "\n".join([f"- {m.get('text', '')}" for m in memory_context[:5]]) + prompt_lines.append(f"\nRelevant memory:\n{memory_text}") + + session_history = context.get("session_history", []) + if session_history: + history_text = "\n".join([f"- {h.get('text', '')}" for h in session_history[:3]]) + prompt_lines.append(f"\nPrevious requests in this session:\n{history_text}") + + prompt_lines.extend([ + "", + f"AVAILABLE TOOLS (JSON):", + tools_json, + "", + ]) + + return "\n".join(prompt_lines) + + def _build_json_compiler_prompt(self, thinker_result: str) -> str: + base_prompt = self._prompts.get("json_compiler", "") + + prompt_lines = [ + base_prompt, + "", + "Thinker's plan:", + thinker_result, + "", + ] + + return "\n".join(prompt_lines) + + def _determine_mode_from_context(self, context: dict[str, Any]) -> str: + """Legacy method - kept for compatibility""" + task_summary = str(context.get("task_summary", "")).lower() + keywords = ["запусти", "выполни", "создай", "напиши", "удали", "run", "execute", "create"] + for kw in keywords: + if kw in task_summary: + return "execution" + return "conversation" + + async def _classify_intent(self, task_summary: str) -> str: + """LLM-based intent classification""" + if self._intent_classifier == "orchestrator" and self._orchestrator: + classifier_model = self._orchestrator + else: + classifier_model = self._thinker + + if not classifier_model: + logger.warning("No classifier model available, using default") + return "conversation" + + classification_prompt = f"""Классифицируй запрос пользователя: "{task_summary}" + +Правила: +- execution: пользователь ХОЧЕТ выполнить действие (проверить, запустить, создать, удалить, найти, прочитать, записать) +- conversation: пользователь просто отвечает, задаёт вопрос или хочет информацию +- clarification_needed: непонятно что делать + +Ответь ОДНИМ словом: execution / conversation / clarification_needed""" + + try: + result = await classifier_model.generate(classification_prompt) + result = result.strip().lower() + + # Extract first word - LLM often adds explanation + first_word = result.split()[0] if result.split() else "" + + # Validate result is one of allowed values + allowed = {"execution", "conversation", "clarification_needed"} + if first_word in allowed: + logger.info(f"Intent classified: {first_word} for task: {task_summary}") + return first_word + + if result in allowed: + logger.info(f"Intent classified: {result} for task: {task_summary}") + return result + + logger.warning(f"Invalid classification result: {result}, defaulting to conversation") + return "conversation" + except Exception as e: + logger.warning(f"Intent classification failed: {e}, defaulting to conversation") + return "conversation" + + def _validate_directive(self, output: str, mode_hint: str) -> ExecutionDirective | None: + if not output: + return None + + try: + json_start = output.find("{") + json_end = output.rfind("}") + 1 + if json_start < 0 or json_end <= 0: + return None + + json_str = output[json_start:json_end] + data = json.loads(json_str) + + if "type" not in data: + return None + + msg_type = data.get("type", "") + payload = data.get("payload", {}) + + if msg_type == "step" and "tool" in payload: + tool = payload.get("tool", "") + args = payload.get("args", {}) + payload = {"tool": tool, "args": args} + + if msg_type == "plan": + payload = {"steps": payload.get("steps", [])} + + return ExecutionDirective( + type=msg_type, + payload=payload, + confidence=data.get("confidence", 0.9), + reason=data.get("reason", ""), + ) + except (json.JSONDecodeError, ValueError, TypeError) as e: + logger.warning(f"Directive JSON validation failed: {e}") + return None + + def _guard_rail_check(self, directive: ExecutionDirective) -> ExecutionDirective: + tool_name = directive.payload.get("tool", "") + if tool_name in {"shell_exec", "file_write", "file_delete"}: + return ExecutionDirective( + type=directive.type, + payload=directive.payload, + requires_permission=True, + confidence=directive.confidence, + reason=directive.reason, + ) + return directive + + def _add_thinker_feedback(self, prompt: str, error: str, attempt: int) -> str: + feedback = f"\n[ATTEMPT {attempt + 1} FAILED: {error}]\n" + feedback += "Provide a valid semantic plan.\n" + return prompt + feedback + + def _emit_event( + self, + event_type: str, + payload: dict[str, Any], + task_id: str | None, + session_id: str | None, + ) -> None: + if self._event_bus and task_id: + from app.core.contracts import RuntimeEvent + event = RuntimeEvent( + task_id=task_id, + session_id=session_id or "unknown", + sequence=self._event_bus.next_sequence(task_id), + type=event_type, + payload=payload, + ) + self._event_bus.publish(event) + + SYS_UTIL_PROMPT = None + + async def _fix_invalid_json(self, invalid_result: str, attempt: int, task_id: str | None, session_id: str | None) -> str | None: + """Try to fix invalid JSON using sys_util model.""" + if not self._sys_util: + return None + + first_brace = invalid_result.find('{') + last_brace = invalid_result.rfind('}') + if first_brace < 0 or last_brace <= first_brace: + return None + + truncated_json = invalid_result[first_brace:last_brace + 1] + + error_msg = "" + try: + json.loads(truncated_json) + except json.JSONDecodeError as e: + error_msg = str(e) + + sys_util_prompt = ( + self._prompts.get("sys_util") + if self._prompts + else self.SYS_UTIL_PROMPT or ( + "You are a STRICT JSON repair engine. " + "Your job is ONLY to fix invalid JSON syntax. " + "You MUST output valid JSON or nothing else." + ) + ) + fix_prompt = f"""{sys_util_prompt} + + {error_msg} + + Fixed JSON:""" + + try: + logger.info(f"JSON fix using sys_util model (attempt {attempt + 1})") + fixed_result = await self._sys_util.generate(fix_prompt) + + fixed_first = fixed_result.find('{') + fixed_last = fixed_result.rfind('}') + if fixed_first >= 0 and fixed_last > fixed_first: + return fixed_result[fixed_first:fixed_last + 1] + + return None + + except Exception as e: + logger.warning(f"JSON fix failed: {e}") + return None diff --git a/app/core/config.py b/app/core/config.py new file mode 100644 index 0000000..831d094 --- /dev/null +++ b/app/core/config.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from pydantic import BaseModel, Field + + +class ModelsConfig(BaseModel): + orchestrator_path: str = "models/llama.gguf" + coder_path: str = "models/xcoder.gguf" + critic_path: str = "models/gemma.gguf" + embeddings_path: str = "models/all-MiniLM-L6-v2" + inference: dict[str, Any] = Field(default_factory=dict) + thinker: dict[str, Any] = Field(default_factory=dict) + json_compiler: dict[str, Any] = Field(default_factory=dict) + orchestrator: dict[str, Any] = Field(default_factory=dict) + coder: dict[str, Any] = Field(default_factory=dict) + critic: dict[str, Any] = Field(default_factory=dict) + sys_util: dict[str, Any] = Field(default_factory=dict) + embeddings: dict[str, Any] = Field(default_factory=dict) + + +class PromptsConfig(BaseModel): + orchestration_prompt: str = "" + planning_prompt: str = "" + coder_prompt: str = "" + critic_prompt: str = "" + + +class PermissionsConfig(BaseModel): + dangerous_commands: dict[str, str] = Field(default_factory=dict) + sensitive_paths: list[str] = Field(default_factory=list) + default_approval_behavior: str = "ask_always" + + +class RuntimeConfig(BaseModel): + step_timeout_ms: int = 30_000 + task_timeout_ms: int = 300_000 + planner_retry_limit: int = 2 + tool_retry_limit: int = 1 + replan_limit: int = 1 + max_execution_steps: int = 20 + retrieval_top_k: int = 5 + max_context_tokens: int = 8192 + context_budgets: dict[str, int] = Field(default_factory=lambda: { + "system": 512, + "task": 512, + "memory": 2048, + "execution": 2048, + "tools": 1024, + "safety": 512, + }) + reserve_for_generation_pct: int = 25 + orchestrator_retry_limit: int = 2 + intent_classifier: str = "thinker" + memory_thresholds: dict[str, float] = Field(default_factory=dict) + critic_fallback_policy: str = "continue_without_critic" + checkpoint_policy: dict[str, Any] = Field(default_factory=dict) + event_retention_policy: dict[str, Any] = Field(default_factory=dict) + streaming_settings: dict[str, Any] = Field(default_factory=dict) + debug: bool = False + debug_orchestrator_log_length: int = 500 + json_fix_retry_limit: int = 2 + json_fix_use_sys_util: bool = True + + +class AppConfig(BaseModel): + models: ModelsConfig + prompts: PromptsConfig + permissions: PermissionsConfig + runtime: RuntimeConfig + + +def _load_json(path: Path) -> dict[str, Any]: + with path.open("r", encoding="utf-8") as handle: + return json.load(handle) + + +def load_app_config(config_dir: str | Path) -> AppConfig: + config_path = Path(config_dir) + return AppConfig( + models=ModelsConfig.model_validate(_load_json(config_path / "models.json")), + prompts=PromptsConfig.model_validate(_load_json(config_path / "prompts.json")), + permissions=PermissionsConfig.model_validate(_load_json(config_path / "permissions.json")), + runtime=RuntimeConfig.model_validate(_load_json(config_path / "runtime.json")), + ) + diff --git a/app/core/context_builder.py b/app/core/context_builder.py new file mode 100644 index 0000000..77f4b32 --- /dev/null +++ b/app/core/context_builder.py @@ -0,0 +1,172 @@ +from __future__ import annotations + +import logging +from typing import Any + +from app.core.contracts import TaskCheckpoint, UserTask + +logger = logging.getLogger(__name__) + +DEFAULT_BUDGETS = { + "system": 512, + "task": 512, + "memory": 2048, + "execution": 2048, + "tools": 1024, + "safety": 512, +} + + +class ContextBuilder: + def __init__( + self, + memory_interface=None, + tool_registry=None, + config: dict[str, Any] | None = None, + ) -> None: + self._memory = memory_interface + self._tool_registry = tool_registry + self._config = config or {} + self._max_tokens = self._config.get("max_context_tokens", 8192) + self._budgets = self._config.get("context_budgets", DEFAULT_BUDGETS) + self._reserve_pct = self._config.get("reserve_for_generation_pct", 25) + + def build( + self, + task: UserTask, + checkpoint: TaskCheckpoint | None = None, + query: str | None = None, + ) -> dict[str, Any]: + task_summary = task.input + search_query = query or task_summary + session_id = task.session_id + + memory_context = [] + if self._memory: + memory_context = self._retrieve_memory(search_query, session_id=session_id) + + budgets = self._calculate_budgets() + reserved = self._reserve_for_generation() + + system_budget = budgets.get("system", 512) + task_budget = budgets.get("task", 512) + safety_budget = budgets.get("safety", 512) + memory_budget = budgets.get("memory", 2048) + + truncated_memory = self._truncate_memory( + memory_context, memory_budget + ) + + # Get session history for follow-up context + session_history = self._get_session_history(session_id) + + context = { + "system_prompt": "", + "task_summary": task_summary[:task_budget], + "task_context": task.context, + "memory_context": truncated_memory, + "session_history": session_history, + "execution_context": checkpoint.model_dump() if checkpoint else {}, + "tool_context": self._get_tool_context(), + "safety_context": {}, + "constraints": { + "budgets": budgets, + "reserved_for_generation": reserved, + "original_memory_count": len(memory_context), + "truncated_memory_count": len(truncated_memory), + }, + } + + return context + + def _get_tool_context(self) -> list[dict[str, Any]]: + """Expose available tools to orchestrator.""" + if not self._tool_registry: + return [] + + tools = [] + for name in self._tool_registry.list_names(): + tool = self._tool_registry.get(name) + tools.append({ + "name": name, + "description": getattr(tool, "description", ""), + }) + return tools + + def _calculate_budgets(self) -> dict[str, int]: + return dict(self._budgets) + + def _reserve_for_generation(self) -> int: + return int(self._max_tokens * self._reserve_pct / 100) + + def _retrieve_memory( + self, + query: str, + session_id: str | None = None, + top_k: int = 5, + ) -> list[dict[str, Any]]: + if not self._memory: + return [] + + try: + results = self._memory.search(query, top_k=top_k, session_id=session_id) + return [ + { + "id": entry.id, + "text": entry.text, + "kind": entry.kind, + "source": entry.source, + "weight": entry.weight, + "score": score, + } + for entry, score in results + ] + except Exception as e: + logger.warning(f"Memory retrieval failed: {e}") + return [] + + def _get_session_history(self, session_id: str | None = None) -> list[dict[str, Any]]: + """Get previous task summaries from the same session for context.""" + if not self._memory or not session_id: + return [] + + try: + # Get recent entries from same session + entries = self._memory.get_by_session(session_id, limit=5) + # Filter to only task summaries + summaries = [ + { + "id": entry.id, + "text": entry.text, + "kind": entry.kind, + "source": entry.source, + "weight": entry.weight, + } + for entry in entries + if entry.kind in ("summary", "tool_result") + ] + return summaries + except Exception as e: + logger.warning(f"Session history retrieval failed: {e}") + return [] + + def _truncate_memory( + self, + memory_context: list[dict[str, Any]], + budget: int, + ) -> list[dict[str, Any]]: + if not memory_context: + return [] + + estimated_per_entry = 50 + max_entries = max(budget // estimated_per_entry, 1) + + if len(memory_context) > max_entries: + return memory_context[:max_entries] + + return memory_context + + def estimate_tokens(self, text: str) -> int: + if not text: + return 0 + return len(text.split()) * 4 // 3 \ No newline at end of file diff --git a/app/core/contracts.py b/app/core/contracts.py new file mode 100644 index 0000000..2a1baa8 --- /dev/null +++ b/app/core/contracts.py @@ -0,0 +1,148 @@ +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any, Literal +from uuid import uuid4 + +from pydantic import BaseModel, Field + + +def utc_now() -> datetime: + return datetime.now(timezone.utc) + + +class UserTask(BaseModel): + task_id: str = Field(default_factory=lambda: str(uuid4())) + session_id: str = Field(default_factory=lambda: str(uuid4())) + input: str + context: dict[str, Any] = Field(default_factory=dict) + created_at: datetime = Field(default_factory=utc_now) + + +class PlanStep(BaseModel): + id: str + kind: Literal["tool", "coder", "memory", "respond"] + tool: str | None = None + args: dict[str, Any] = Field(default_factory=dict) + description: str + requires_confirmation: bool = False + depends_on: list[str] = Field(default_factory=list) + + +class ToolCall(BaseModel): + tool: str + args: dict[str, Any] = Field(default_factory=dict) + task_id: str + step_id: str + + +class ToolResult(BaseModel): + tool: str + ok: bool + output: Any = None + error: str | None = None + metadata: dict[str, Any] = Field(default_factory=dict) + + +class CoderRequest(BaseModel): + mode: Literal["generate", "fix", "refactor"] + instruction: str + context: dict[str, Any] = Field(default_factory=dict) + task_id: str + + +class CriticScore(BaseModel): + correctness: float = Field(ge=0.0, le=1.0) + usefulness: float = Field(ge=0.0, le=1.0) + safety: float = Field(ge=0.0, le=1.0) + memory_store: bool + weight: float = Field(ge=0.0, le=1.0) + explanation: str + + +class MemoryEntry(BaseModel): + id: str = Field(default_factory=lambda: str(uuid4())) + text: str + kind: Literal["tool_result", "plan", "critique", "fact", "summary", "user_preference"] + source: Literal["tool", "critic", "user", "system"] + weight: float = Field(ge=0.0, le=1.0) + task_id: str | None = None + session_id: str | None = None + metadata: dict[str, Any] = Field(default_factory=dict) + created_at: datetime = Field(default_factory=utc_now) + embedding_model: str + embedding_dim: int + + +class PermissionDecision(BaseModel): + action_type: str + pattern: str + decision: Literal["allow_once", "allow_always", "deny", "ask_always"] + created_at: datetime = Field(default_factory=utc_now) + + +class RuntimeEvent(BaseModel): + event_id: str = Field(default_factory=lambda: str(uuid4())) + task_id: str + session_id: str + sequence: int + type: str + timestamp: datetime = Field(default_factory=utc_now) + payload: dict[str, Any] = Field(default_factory=dict) + causation_id: str | None = None + correlation_id: str = Field(default_factory=lambda: str(uuid4())) + + +class TaskCheckpoint(BaseModel): + task_id: str + status: str + active_step_id: str | None = None + plan_snapshot: dict[str, Any] = Field(default_factory=dict) + context_snapshot: dict[str, Any] = Field(default_factory=dict) + updated_at: datetime = Field(default_factory=utc_now) + + +class PermissionRequest(BaseModel): + task_id: str + session_id: str + action_type: str + pattern: str + command: str | None = None + path: str | None = None + requires_password: bool = False + + +class SecretRequest(BaseModel): + task_id: str + session_id: str + kind: str + prompt: str + command: str | None = None + + +class PasswordRequest(BaseModel): + task_id: str + session_id: str + command: str + reason: str + attempts: int = 0 + max_attempts: int = 3 + + +class ExecutionDirective(BaseModel): + type: Literal[ + "plan", + "tool", + "coder", + "respond", + "replan", + "store_memory", + "request_permission", + "complete", + "fail", + "noop", + ] + payload: dict[str, Any] = Field(default_factory=dict) + requires_permission: bool = False + confidence: float = Field(ge=0.0, le=1.0, default=0.0) + reason: str = "" diff --git a/app/core/execution_engine.py b/app/core/execution_engine.py new file mode 100644 index 0000000..7bbbcba --- /dev/null +++ b/app/core/execution_engine.py @@ -0,0 +1,591 @@ +from __future__ import annotations + +import asyncio +import json +import logging +from typing import Any + +from app.core.contracts import ( + CriticScore, + ExecutionDirective, + PermissionDecision, + PermissionRequest, + RuntimeEvent, + SecretRequest, + ToolCall, + UserTask, +) +from app.core.execution_scheduler import ExecutionScheduler +from app.events.event_bus import EventBus +from app.events.event_types import ( + CRITIC_CALLED, + CRITIC_RESULT, + PERMISSION_REQUESTED, + PERMISSION_RESOLVED, + PLAN_FAILED, + PLAN_STARTED, + SECRET_REQUESTED, + STEP_STARTED, + STEPPED_COMPLETED, + TOOL_CALLED, + TOOL_COMPLETED, +) +from app.models.async_adapters import AsyncCriticAdapter, AsyncCoderAdapter +from app.memory.write_policy import MemoryWritePolicy +from app.memory.interface import MemoryInterface + +logger = logging.getLogger(__name__) + + +class ExecutionEngine: + def __init__( + self, + event_bus: EventBus, + tool_registry, + permission_service, + scheduler: ExecutionScheduler | None = None, + critic: AsyncCriticAdapter | None = None, + memory_policy: MemoryWritePolicy | None = None, + memory_interface: MemoryInterface | None = None, + prompts: dict[str, str] | None = None, + ) -> None: + self._event_bus = event_bus + self._tool_registry = tool_registry + self._permission_service = permission_service + self._scheduler = scheduler or ExecutionScheduler() + self._critic = critic + self._coder: AsyncCoderAdapter | None = None + self._memory_policy = memory_policy + self._memory_interface = memory_interface + self._prompts = prompts or {} + + def set_critic(self, critic: AsyncCriticAdapter) -> None: + self._critic = critic + + def set_coder(self, coder: AsyncCoderAdapter) -> None: + self._coder = coder + + def set_memory_policy(self, policy: MemoryWritePolicy) -> None: + self._memory_policy = policy + + def execute( + self, + task: UserTask, + directive: ExecutionDirective, + permission_override: PermissionDecision | None = None, + secret_override: str | None = None, + password_override: str | None = None, + ) -> dict[str, Any]: + scheduled = self._scheduler.next_directive(directive) + self._publish(task, STEP_STARTED, {"directive_type": scheduled.type}) + + if scheduled.type == "plan": + return self._execute_plan( + task=task, + directive=scheduled, + permission_override=permission_override, + secret_override=secret_override, + password_override=password_override, + ) + + if scheduled.type == "tool": + return self._execute_tool( + task=task, + directive=scheduled, + permission_override=permission_override, + secret_override=secret_override, + password_override=password_override, + ) + + if scheduled.type == "respond": + return { + "status": "completed", + "result": { + "message": f"Runtime accepted task: {task.input}", + "mode": scheduled.payload.get("mode", "direct_response"), + }, + } + + if scheduled.type == "coder": + return self._execute_coder( + task=task, + directive=scheduled, + ) + + if scheduled.type == "fail": + return { + "status": "failed", + "result": {"error": scheduled.reason or "Execution failed."}, + } + + return { + "status": "completed", + "result": { + "message": "Directive accepted.", + "directive_type": scheduled.type, + }, + } + + def _execute_plan( + self, + task: UserTask, + directive: ExecutionDirective, + permission_override: PermissionDecision | None = None, + secret_override: str | None = None, + password_override: str | None = None, + ) -> dict[str, Any]: + # Unified format: {"type": "plan", "payload": {"steps": [...]}} + # Need to extract steps from nested payload + import json + + payload = directive.payload + steps_data = [] + + # If payload has "steps" directly, use them + if "steps" in payload: + steps_data = payload.get("steps", []) + # If payload is a string (JSON), parse it + elif isinstance(payload, str) and payload.strip().startswith("{"): + try: + parsed = json.loads(payload) + steps_data = parsed.get("payload", {}).get("steps", []) + except: + steps_data = [] + + if steps_data: + plan_json = json.dumps({"type": "plan", "payload": {"steps": steps_data}}) + else: + plan_json = json.dumps(payload) + + plan_steps = self._scheduler.parse_plan_steps(plan_json, task.task_id) + + if not plan_steps: + return { + "status": "failed", + "result": {"error": "Failed to parse plan steps from directive"}, + } + + if not self._scheduler.validate_no_cycles(plan_steps): + self._publish(task, PLAN_FAILED, {"error": "Cycle detected in plan"}) + return { + "status": "failed", + "result": {"error": "Cycle detected in plan"}, + } + + graph = self._scheduler.build_task_graph(plan_steps) + self._publish(task, PLAN_STARTED, {"steps": len(plan_steps)}) + + completed_steps: set[str] = set() + step_results: list[dict[str, Any]] = [] + + ready_steps = self._get_ready_steps(graph, completed_steps) + + while ready_steps: + step = ready_steps.pop(0) + + # Handle respond kind directly without tool execution + if step.kind == "respond": + result = { + "status": "completed", + "result": { + "message": step.args.get("text", step.description), + }, + } + else: + step_directive = ExecutionDirective( + type=step.kind, + payload={ + "tool": step.tool, + "args": step.args, + }, + requires_permission=step.requires_confirmation, + reason=step.description, + ) + + result = self._execute_tool( + task=task, + directive=step_directive, + permission_override=permission_override, + secret_override=secret_override, + password_override=password_override, + ) + + # If tool needs permission - return immediately, don't continue execution + if result.get("status") == "awaiting_permission": + return { + "status": "awaiting_permission", + "result": result.get("result", {}), + "step_results": step_results, + } + + step_results.append({ + "step_id": step.id, + "result": result, + }) + + completed_steps.add(step.id) + self._publish(task, STEPPED_COMPLETED, { + "step_id": step.id, + "status": result.get("status"), + }) + + # If tool needs permission or failed - return immediately, don't continue execution + if result.get("status") == "failed": + return { + "status": "failed", + "result": { + "error": f"Step {step.id} failed", + "failed_step": step.id, + "step_results": step_results, + }, + } + + requires_execution = directive.payload.get("requires_execution", True) + if requires_execution and self._critic: + critic_result = self._evaluate_with_critic( + task, step, result + ) + if critic_result: + # Convert to dict for JSON serialization + result["critic_score"] = critic_result.model_dump(mode="json") if hasattr(critic_result, 'model_dump') else dict(critic_result) + self._save_critique_to_memory(task, step, critic_result) + + ready_steps = self._get_ready_steps(graph, completed_steps) + + return { + "status": "completed", + "result": { + "message": f"Plan executed: {len(completed_steps)} steps completed", + "step_results": step_results, + }, + } + + def _get_ready_steps( + self, + graph: dict[str, Any], + completed: set[str], + ) -> list: + if not graph or not graph.get("nodes"): + return [] + + step_map: dict = graph.get("step_map", {}) + ready = [] + + for node in graph["nodes"]: + node_id = node["id"] + if node_id in completed: + continue + + deps = node.get("depends_on", []) + if all(dep in completed for dep in deps): + step = step_map.get(node_id) + if step: + ready.append(step) + + return ready + + def _evaluate_with_critic( + self, + task: UserTask, + step, + result: dict[str, Any], + ) -> CriticScore | None: + if not self._critic: + return None + + critic_prompt = self._build_critic_prompt(step, result) + + self._publish(task, CRITIC_CALLED, {"step_id": step.id}) + + try: + critic_output = asyncio.run(self._critic.generate(critic_prompt)) + score = self._parse_critic_score(critic_output) + + self._publish(task, CRITIC_RESULT, { + "step_id": step.id, + "score": score.model_dump(mode="json") if score else None, + }) + + if score: + result["critic_score"] = { + "correctness": score.correctness, + "usefulness": score.usefulness, + "safety": score.safety, + "memory_store": score.memory_store, + "weight": score.weight, + "explanation": score.explanation, + } + + return score + + except Exception as e: + logger.warning(f"Critic evaluation failed: {e}") + self._publish(task, CRITIC_RESULT, { + "step_id": step.id, + "error": str(e), + }) + return None + + def _save_critique_to_memory( + self, + task: UserTask, + step, + score: CriticScore, + ) -> None: + """Save critic evaluation as critique entry in memory.""" + if not self._memory_interface: + return + + try: + tool_name = step.tool + tool_args = step.args or {} + args_str = ", ".join([f"{k}={v}" for k, v in tool_args.items()]) + + critique_text = f"Tool: {tool_name}({args_str}) | Task: {task.input[:100]} | Scores: correctness={score.correctness}, usefulness={score.usefulness}, safety={score.safety} | {score.explanation}" + + metadata = { + "task_input": task.input, + "tool": tool_name, + "args": tool_args, + "step_id": step.id, + "scores": { + "correctness": score.correctness, + "usefulness": score.usefulness, + "safety": score.safety, + }, + } + + self._memory_interface.insert( + text=critique_text, + kind="critique", + source="critic", + task_id=task.task_id, + session_id=task.session_id, + weight=score.weight, + metadata=metadata, + ) + logger.info(f"Saved critique to memory: {tool_name} task_id={task.task_id}") + + except Exception as e: + logger.warning(f"Failed to save critique to memory: {e}") + + def _build_critic_prompt(self, step, result: dict[str, Any]) -> str: + base_prompt = self._prompts.get("critic", "") + tool_result = result.get("result", {}) + + return f"""{base_prompt} + +Step: {step.description} +Tool: {step.tool} +Args: {step.args} + +Result: +{json.dumps(tool_result, indent=2)} + +Evaluate and respond with JSON: +{{"correctness": 0.0-1.0, "usefulness": 0.0-1.0, "safety": 0.0-1.0, "memory_store": true|false, "weight": 0.0-1.0, "explanation": "..."}}""" + + def _parse_critic_score(self, output: str) -> CriticScore | None: + try: + json_start = output.find("{") + json_end = output.rfind("}") + 1 + if json_start < 0: + return None + + json_str = output[json_start:json_end] + data = json.loads(json_str) + + return CriticScore( + correctness=data.get("correctness", 0.5), + usefulness=data.get("usefulness", 0.5), + safety=data.get("safety", 1.0), + memory_store=data.get("memory_store", False), + weight=data.get("weight", 0.5), + explanation=data.get("explanation", ""), + ) + + except (json.JSONDecodeError, ValueError, TypeError) as e: + logger.warning(f"Critic score parsing failed: {e}") + return None + + def _execute_coder( + self, + task: UserTask, + directive: ExecutionDirective, + ) -> dict[str, Any]: + if not self._coder: + return {"status": "failed", "result": {"error": "Coder model not available"}} + + coder_task = directive.payload.get("task", "") + if not coder_task: + return {"status": "failed", "result": {"error": "Missing task for coder"}} + + try: + output = asyncio.run(self._coder.generate(coder_task)) + + return { + "status": "completed", + "result": {"code": output}, + } + except Exception as e: + logger.warning(f"Coder execution failed: {e}") + return {"status": "failed", "result": {"error": str(e)}} + + def _execute_tool( + self, + task: UserTask, + directive: ExecutionDirective, + permission_override: PermissionDecision | None = None, + secret_override: str | None = None, + password_override: str | None = None, + ) -> dict[str, Any]: + tool_name = str(directive.payload.get("tool", "")).strip() + tool_args = dict(directive.payload.get("args", {})) + + if password_override: + tool_args["password"] = password_override + + if not tool_name: + return {"status": "failed", "result": {"error": "Missing tool name"}} + + # Tool-first: validate tool exists in registry + available_tools = self._tool_registry.list_names() + if tool_name not in available_tools: + return {"status": "failed", "result": {"error": f"Unknown tool: {tool_name}. Available tools: {available_tools}"}} + + permission_result = None + + # Check permission for shell_exec and file_write + if tool_name == "shell_exec": + permission_result = self._permission_service.check_shell_command( + task_id=task.task_id, + session_id=task.session_id, + command=str(tool_args.get("command", "")), + ) + elif tool_name == "file_write": + # Allow writing to runtime data directory without permission check + write_path = str(tool_args.get("path", "")) + if "allowed_commands.json" in write_path or "/data/runtime" in write_path: + # Internal system write - allow without permission + permission_result = {"decision": "allowed", "path": write_path} + else: + permission_result = self._permission_service.check_write_path( + task_id=task.task_id, + session_id=task.session_id, + path=write_path, + ) + + # Handle permission result + if permission_result: + decision = permission_result.get("decision", "unknown") + + # Hard stop - deny execution + if decision == "hard_stop": + self._publish(task, PERMISSION_REQUESTED, permission_result) + return { + "status": "failed", + "result": { + "error": f"Command blocked: {permission_result.get('reason', 'Hard stop command')}", + "command": permission_result.get("command", ""), + }, + } + + # Cached - already allowed + if decision in ("allowed_always", "allowed") or permission_result.get("cached"): + self._publish(task, PERMISSION_RESOLVED, permission_result) + + # Need user confirmation - return immediately, don't continue execution + elif decision == "prompt": + self._publish(task, PERMISSION_REQUESTED, permission_result) + return { + "status": "awaiting_permission", + "result": { + "error": "Permission required before execution.", + "permission_request": permission_result, + }, + } + + # Hard stop - return immediately + elif decision == "deny": + self._publish(task, PERMISSION_RESOLVED, permission_result) + return { + "status": "failed", + "result": { + "error": "Permission denied", + "command": permission_result.get("command", ""), + }, + } + + # Deny + elif decision == "deny": + self._publish(task, PERMISSION_RESOLVED, permission_result) + return { + "status": "failed", + "result": { + "error": "Permission denied", + "command": permission_result.get("command", ""), + }, + } + + if tool_name == "shell_exec": + command = str(tool_args.get("command", "")) + if command.startswith("sudo ") and secret_override is None: + secret_request = SecretRequest( + task_id=task.task_id, + session_id=task.session_id, + kind="sudo_password", + prompt="Sudo password required", + command=command, + ) + self._publish(task, SECRET_REQUESTED, secret_request.model_dump(mode="json")) + return { + "status": "awaiting_input", + "result": { + "error": "Secret required", + "secret_request": secret_request.model_dump(mode="json"), + }, + } + if command.startswith("sudo ") and secret_override is not None: + tool_args["command"] = f"sudo -S -p '' {command[len('sudo '):]}" + tool_args["stdin_secret"] = f"{secret_override}\n" + + tool_call = ToolCall( + tool=tool_name, + args=tool_args, + task_id=task.task_id, + step_id="step-1", + ) + self._publish(task, TOOL_CALLED, tool_call.model_dump(mode="json")) + tool_result = self._tool_registry.get(tool_name).execute(task=task, args=tool_args) + self._publish(task, TOOL_COMPLETED, tool_result.model_dump(mode="json")) + + needs_sudo = tool_result.metadata.get("needs_sudo", False) if tool_result.metadata else False + + if not tool_result.ok and needs_sudo: + return { + "status": "awaiting_password", + "result": { + "task_id": task.task_id, + "needs_sudo": True, + "command": tool_args.get("command", ""), + "error": tool_result.error or "Permission denied", + "tool_result": tool_result.model_dump(mode="json"), + }, + } + + return { + "status": "completed" if tool_result.ok else "failed", + "result": tool_result.model_dump(mode="json"), + } + + def _publish(self, task: UserTask, event_type: str, payload: dict[str, Any]) -> None: + if not self._event_bus: + return + event = RuntimeEvent( + task_id=task.task_id, + session_id=task.session_id, + sequence=self._event_bus.next_sequence(task.task_id), + type=event_type, + payload=payload, + ) + self._event_bus.publish(event) diff --git a/app/core/execution_scheduler.py b/app/core/execution_scheduler.py new file mode 100644 index 0000000..fe2e19e --- /dev/null +++ b/app/core/execution_scheduler.py @@ -0,0 +1,212 @@ +from __future__ import annotations + +import json +import logging +from collections import deque +from typing import Any + +from app.core.contracts import ExecutionDirective, PlanStep + +logger = logging.getLogger(__name__) + + +class ExecutionScheduler: + def __init__(self, retry_limit: int = 2) -> None: + self._retry_limit = retry_limit + + def parse_plan_steps( + self, + json_str: str, + task_id: str | None = None, + ) -> list[PlanStep]: + try: + json_start = json_str.find("{") + json_end = json_str.rfind("}") + 1 + if json_start < 0: + return [] + + json_str = json_str[json_start:json_end] + data = json.loads(json_str) + + # Unified format: {"type": "plan", "payload": {"steps": [...]}} + # or direct: {"type": "step", "payload": {"tool": "...", "args": {...}}} + if isinstance(data, dict): + msg_type = data.get("type", "") + + # Single step format: {"type": "step", "payload": {"tool": ..., "args": ...}} + if msg_type == "step": + payload = data.get("payload", {}) + step = { + "id": "step-0", + "kind": "tool", + "tool": payload.get("tool"), + "args": payload.get("args", {}), + "description": payload.get("description", ""), + "depends_on": payload.get("depends_on", []), + } + data = [step] + + # Plan format: {"type": "plan", "payload": {"steps": [...]}} + elif msg_type == "plan": + payload = data.get("payload", {}) + steps_data = payload.get("steps", []) + + # Normalize steps: handle {"type": "step", "payload": {"tool": ...}} + normalized = [] + for step in steps_data: + if isinstance(step, dict) and step.get("type") == "step": + inner = step.get("payload", {}) + normalized.append({ + "tool": inner.get("tool"), + "args": inner.get("args", {}), + "description": inner.get("description", ""), + "depends_on": inner.get("depends_on", []), + }) + else: + normalized.append(step) + steps_data = normalized + + data = steps_data if steps_data else [] + + # Old format compatibility + elif "steps" in data: + data = data["steps"] + elif "plan" in data: + data = data["plan"] + else: + data = [data] + elif isinstance(data, str): + data = json.loads(data) + if isinstance(data, dict): + data = [data] + + steps = [] + for i, step_data in enumerate(data): + if isinstance(step_data, str): + step_data = {"id": f"step-{i}", "kind": "respond", "text": step_data} + + if not isinstance(step_data, dict): + continue + + step_data.setdefault("id", f"step-{i}") + + # Tool-first: scheduler получает tool напрямую, без трансформаций + # kind определяется по наличию tool name + # args передаются напрямую + if step_data.get("tool"): + step_data["kind"] = "tool" + + step_data.setdefault("kind", step_data.get("kind", "respond")) + step_data.setdefault("tool", step_data.get("tool")) + step_data.setdefault("args", step_data.get("args", {})) + step_data.setdefault("description", step_data.get("description", "")) + step_data.setdefault("requires_confirmation", False) + step_data.setdefault("depends_on", []) + + if "description" not in step_data: + step_data["description"] = f"Step {i}" + + steps.append(PlanStep(**step_data)) + + return steps + + except (json.JSONDecodeError, ValueError, TypeError) as e: + logger.warning(f"Plan parsing failed: {e}") + return [] + + def validate_no_cycles(self, steps: list[PlanStep]) -> bool: + if not steps: + return True + + graph: dict[str, set[str]] = {} + for step in steps: + graph[step.id] = set(step.depends_on) + + visited: set[str] = set() + rec_stack: set[str] = set() + + def has_cycle(node: str) -> bool: + if node in rec_stack: + return True + if node in visited: + return False + + visited.add(node) + rec_stack.add(node) + + for dep in graph.get(node, []): + if has_cycle(dep): + return True + + rec_stack.remove(node) + return False + + for step in steps: + if step.id not in visited: + if has_cycle(step.id): + logger.warning(f"Cycle detected in plan: {step.id}") + return False + + return True + + def build_task_graph( + self, + steps: list[PlanStep], + ) -> dict[str, Any]: + if not steps: + return {"nodes": [], "edges": []} + + if not self.validate_no_cycles(steps): + return {"nodes": [], "edges": [], "error": "Cycle detected in plan"} + + nodes = [] + edges = [] + + step_map = {s.id: s for s in steps} + + for step in steps: + nodes.append({ + "id": step.id, + "kind": step.kind, + "tool": step.tool, + "args": step.args, + "ready": len(step.depends_on) == 0, + }) + + for dep_id in step.depends_on: + edges.append({ + "from": dep_id, + "to": step.id, + }) + + return {"nodes": nodes, "edges": edges, "step_map": step_map} + + def get_ready_steps( + self, + graph: dict[str, Any], + completed: set[str], + ) -> list[PlanStep]: + if not graph or not graph.get("nodes"): + return [] + + step_map: dict[str, PlanStep] = graph.get("step_map", {}) + ready = [] + + for node in graph["nodes"]: + node_id = node["id"] + if node_id in completed: + continue + + deps = node.get("depends_on", []) + if all(dep in completed for dep in deps): + step = step_map.get(node_id) + if step: + ready.append(step) + + return ready + + def next_directive( + self, + directive: ExecutionDirective, + ) -> ExecutionDirective: + return directive \ No newline at end of file diff --git a/app/core/intent_parser.py b/app/core/intent_parser.py new file mode 100644 index 0000000..1217e85 --- /dev/null +++ b/app/core/intent_parser.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +import re +from typing import Any + +from app.core.contracts import ExecutionDirective + +SHELL_PREFIXES = ( + "run ", + "execute ", + "launch ", + "запусти ", + "выполни ", + "выполнить ", +) + +MEMORY_STORE_PATTERNS = ( + r"запомни\s+(.+)", + r"сохрани\s+(.+)", + r"запиши\s+(.+)", + r"remember\s+(.+)", + r"save\s+(.+)", +) + +MEMORY_SEARCH_PATTERNS = ( + r"найди\s+(.+)", + r"вспомни\s+(.+)", + r"search\s+(.+)", + r"find\s+(.+)", +) + + +class IntentParser: + """Extracts explicit tool intents from natural-language task text.""" + + def __init__(self) -> None: + self._store_patterns = [re.compile(p, re.IGNORECASE) for p in MEMORY_STORE_PATTERNS] + self._search_patterns = [re.compile(p, re.IGNORECASE) for p in MEMORY_SEARCH_PATTERNS] + + def parse(self, task_input: str) -> ExecutionDirective | None: + normalized = task_input.strip() + lowered = normalized.lower() + + if matched := self._match_patterns(self._store_patterns, normalized): + return ExecutionDirective( + type="tool", + payload={ + "tool": "memory_insert", + "args": { + "text": matched.group(1).strip(), + "kind": "fact", + "source": "user", + }, + }, + requires_permission=False, + confidence=0.85, + reason="User explicitly requested to store in memory.", + ) + + if matched := self._match_patterns(self._search_patterns, normalized): + return ExecutionDirective( + type="tool", + payload={ + "tool": "memory_search", + "args": {"query": matched.group(1).strip()}, + }, + requires_permission=False, + confidence=0.85, + reason="User explicitly requested to search memory.", + ) + + for prefix in SHELL_PREFIXES: + if lowered.startswith(prefix): + command = normalized[len(prefix) :].strip() + if command: + return ExecutionDirective( + type="tool", + payload={ + "tool": "shell_exec", + "args": {"command": command}, + }, + requires_permission=True, + confidence=0.92, + reason="Natural-language task explicitly requested shell execution.", + ) + + quoted = re.match(r"^`(.+)`$", normalized) + if quoted: + return ExecutionDirective( + type="tool", + payload={ + "tool": "shell_exec", + "args": {"command": quoted.group(1)}, + }, + requires_permission=True, + confidence=0.75, + reason="Backticked input treated as direct shell command.", + ) + + return None + + def _match_patterns(self, patterns: list[re.Pattern], text: str): + for pattern in patterns: + if match := pattern.match(text): + return match + return None diff --git a/app/core/permission_resolution.py b/app/core/permission_resolution.py new file mode 100644 index 0000000..83c2a67 --- /dev/null +++ b/app/core/permission_resolution.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from pydantic import BaseModel + + +class PermissionResolutionRequest(BaseModel): + task_id: str + decision: str + + +class SecretResolutionRequest(BaseModel): + task_id: str + secret: str + + +class PasswordResolutionRequest(BaseModel): + task_id: str + password: str diff --git a/app/core/permission_service.py b/app/core/permission_service.py new file mode 100644 index 0000000..72a6a93 --- /dev/null +++ b/app/core/permission_service.py @@ -0,0 +1,341 @@ +from __future__ import annotations + +import hashlib +import json +import logging +import os +import re +import shlex +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + + +class PermissionService: + """Permission-first model - user is the authority.""" + + def __init__(self, config: dict[str, Any] | None = None, cache_file: Path | None = None): + self._config = config or self._load_config() + self._settings = self._config.get("settings", {}) + self._cache_file = cache_file + self._categories = self._config.get("command_categories", {}) + self._path_settings = self._config.get("path_settings", {}) + self._legacy_dangerous_commands = self._config.get("dangerous_commands", {}) + self._legacy_sensitive_paths = self._config.get("sensitive_paths", []) + + def _load_config(self) -> dict[str, Any]: + try: + config_path = Path(__file__).parents[2] / "config" / "permissions.json" + with open(config_path) as f: + return json.load(f) + except Exception as e: + logger.warning(f"Failed to load permissions config: {e}") + return {"settings": {}, "command_categories": {}} + + def _get_cache_file(self) -> Path: + if self._cache_file: + return self._cache_file + + base_dir = Path(__file__).parents[2] + cache_relative = self._settings.get("cache_file", "data/runtime/allowed_commands.json") + return base_dir / cache_relative + + def _load_cache(self) -> dict[str, Any]: + cache_file = self._get_cache_file() + try: + if cache_file.exists(): + with open(cache_file) as f: + return json.load(f) + except Exception as e: + logger.warning(f"Failed to load cache: {e}") + + return {"allowed_once": {}, "allowed_always": {}} + + def _save_cache(self, cache: dict[str, Any]) -> None: + cache_file = self._get_cache_file() + cache_file.parent.mkdir(parents=True, exist_ok=True) + with open(cache_file, "w") as f: + json.dump(cache, f, indent=2) + + def check_shell_command( + self, + task_id: str, + session_id: str, + command: str, + ) -> dict[str, Any]: + """Check if shell command requires permission.""" + normalized = self._normalize_command(command) + command_hash = self._hash_command(normalized) + + cache = self._load_cache() + + # Check cache first + if command_hash in cache.get("allowed_always", {}): + return { + "decision": "allowed_always", + "command": normalized, + "cached": True, + } + + if command_hash in cache.get("allowed_once", {}): + cached = cache["allowed_once"][command_hash] + if cached.get("task_id") == task_id: + return { + "decision": "allowed_once", + "command": normalized, + "cached": True, + } + + # Check hard stop + if self._is_hard_stop(normalized): + return { + "decision": "hard_stop", + "command": normalized, + "reason": "Hard stop command - execution denied", + } + + if not self._categories and self._legacy_dangerous_commands: + if self._matches_legacy_dangerous(normalized): + return { + "decision": "prompt", + "command": normalized, + "category": "legacy_dangerous", + "allow_always": False, + "task_id": task_id, + "session_id": session_id, + } + return { + "decision": "allowed", + "command": normalized, + "category": "legacy_safe", + "task_id": task_id, + "session_id": session_id, + } + + # Check no_always category + category = self._get_category(normalized) + can_always = self._categories.get(category, {}).get("allow_always", True) + + # Need user confirmation + return { + "decision": "prompt", + "command": normalized, + "category": category, + "allow_always": can_always, + "task_id": task_id, + "session_id": session_id, + } + + def check_write_path( + self, + task_id: str, + session_id: str, + path: str, + ) -> dict[str, Any]: + """Check if write path requires permission.""" + if not self._path_settings and self._legacy_sensitive_paths: + if any(path.startswith(sensitive) for sensitive in self._legacy_sensitive_paths): + return { + "decision": "prompt", + "path": path, + "task_id": task_id, + "session_id": session_id, + } + return {"decision": "allowed", "path": path} + + allow_write_paths = self._path_settings.get("allow_write_paths", []) + + # Check if path is in allowed list + for allowed in allow_write_paths: + if path.startswith(allowed): + return {"decision": "allowed", "path": path} + + # Otherwise require permission + return { + "decision": "prompt", + "path": path, + "task_id": task_id, + "session_id": session_id, + } + + def resolve_permission( + self, + task_id: str, + session_id: str, + command: str, + decision: str, + ) -> dict[str, Any]: + """Resolve permission decision from user.""" + normalized = self._normalize_command(command) + command_hash = self._hash_command(normalized) + + cache = self._load_cache() + + if decision == "allow_once": + cache.setdefault("allowed_once", {})[command_hash] = { + "command": normalized, + "task_id": task_id, + "session_id": session_id, + } + self._save_cache(cache) + return {"status": "allowed_once", "command": normalized} + + elif decision == "allow_always": + cache.setdefault("allowed_always", {})[command_hash] = { + "command": normalized, + "task_id": task_id, + "session_id": session_id, + } + self._save_cache(cache) + return {"status": "allowed_always", "command": normalized} + + elif decision == "deny": + return {"status": "denied", "command": normalized} + + return {"status": "unknown", "decision": decision} + + def clear_cache(self) -> dict[str, Any]: + """Clear permission cache.""" + cache = {"allowed_once": {}, "allowed_always": {}} + self._save_cache(cache) + return {"status": "cache_cleared"} + + def _normalize_command(self, command: str) -> str: + """Normalize command for consistent hashing.""" + if not self._settings.get("normalize_commands", True): + return command.strip() + + normalized = command.strip() + + # Split chained commands if enabled + if self._settings.get("split_chained", True): + # Replace ; and || with && for splitting + normalized = normalized.replace(";", " && ") + normalized = normalized.replace("||", " && ") + + # Resolve environment variables + try: + normalized = os.path.expandvars(normalized) + except: + pass + + # Resolve home directory + normalized = normalized.replace("~", os.path.expanduser("~")) + + # Remove extra whitespace + normalized = " ".join(normalized.split()) + + return normalized + + def _hash_command(self, command: str) -> str: + """Generate hash for command.""" + return hashlib.sha256(command.encode()).hexdigest()[:16] + + def _matches_legacy_dangerous(self, command: str) -> bool: + cmd_lower = command.lower() + for pattern in self._legacy_dangerous_commands: + if pattern.lower() in cmd_lower: + return True + return False + + def _is_hard_stop(self, command: str) -> bool: + """Check if command is hard stop.""" + hard_stop_commands = self._categories.get("hard_stop", {}).get("commands", []) + + cmd_lower = command.lower() + for hs in hard_stop_commands: + if hs.lower() in cmd_lower: + return True + + return False + + def _get_category(self, command: str) -> str: + """Get command category.""" + cmd_lower = command.lower() + + # Check no_always category + no_always = self._categories.get("no_always", {}).get("commands", []) + for cmd in no_always: + if cmd in cmd_lower: + return "no_always" + + # Default to normal + return "normal" + + +SUDO_COMMANDS = { + "apt", "apt-get", "dpkg", "yum", "dnf", "pacman", "zypper", + "systemctl", "service", "mount", "umount", + "shutdown", "reboot", "halt", "poweroff", + "useradd", "usermod", "userdel", "groupadd", "groupmod", + "chmod", "chown", "chgrp", + "iptables", "ufw", + "kill", "killall", "pkill", +} + + +def _requires_sudo(command: str) -> bool: + """Check if command requires sudo.""" + if not command: + return False + cmd_lower = command.lower().strip() + first_word = cmd_lower.split()[0] if cmd_lower.split() else "" + return first_word in SUDO_COMMANDS + + +class PermissionRequest: + """Permission request to user.""" + + def __init__( + self, + task_id: str, + session_id: str, + command: str, + category: str = "normal", + allow_always: bool = True, + ) -> None: + self.task_id = task_id + self.session_id = session_id + self.command = command + self.category = category + self.allow_always = allow_always + self.requires_password = _requires_sudo(command) + + def to_dict(self) -> dict[str, Any]: + return { + "task_id": self.task_id, + "session_id": self.session_id, + "command": self.command, + "category": self.category, + "allow_always": self.allow_always, + "requires_password": self.requires_password, + "buttons": self._get_buttons(), + } + + def _get_buttons(self) -> list[dict[str, str]]: + buttons = [{"action": "deny", "label": "Запретить"}] + + if self.allow_always: + buttons.insert(0, {"action": "allow_always", "label": "Разрешить навсегда"}) + + if self.requires_password: + buttons.insert(0, {"action": "allow_with_password", "label": "Разрешить с паролем"}) + else: + buttons.insert(0, {"action": "allow_once", "label": "Разрешить"}) + + return buttons + + +class PermissionDecision: + """Permission decision.""" + + def __init__( + self, + decision: str, + command: str | None = None, + cached: bool = False, + ) -> None: + self.decision = decision + self.command = command + self.cached = cached diff --git a/app/events/__init__.py b/app/events/__init__.py new file mode 100644 index 0000000..b89dc5d --- /dev/null +++ b/app/events/__init__.py @@ -0,0 +1,2 @@ +"""Event bus and event store.""" + diff --git a/app/events/event_bus.py b/app/events/event_bus.py new file mode 100644 index 0000000..3bf4c80 --- /dev/null +++ b/app/events/event_bus.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from typing import Callable + +from app.core.contracts import RuntimeEvent +from app.events.event_store import SQLiteEventStore + + +Subscriber = Callable[[RuntimeEvent], None] + + +class EventBus: + """Per-task ordered event publishing with durable storage.""" + + def __init__(self, event_store: SQLiteEventStore) -> None: + self._store = event_store + self._subscribers: list[Subscriber] = [] + + def next_sequence(self, task_id: str) -> int: + return self._store.get_latest_sequence(task_id) + 1 + + def publish(self, event: RuntimeEvent) -> RuntimeEvent: + self._store.append(event) + for subscriber in self._subscribers: + subscriber(event) + return event + + def subscribe(self, subscriber: Subscriber) -> None: + self._subscribers.append(subscriber) + + def list_for_task(self, task_id: str) -> list[RuntimeEvent]: + return self._store.list_for_task(task_id) diff --git a/app/events/event_store.py b/app/events/event_store.py new file mode 100644 index 0000000..853e94a --- /dev/null +++ b/app/events/event_store.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +import json +import sqlite3 +from pathlib import Path + +from app.core.contracts import RuntimeEvent + + +class SQLiteEventStore: + """Append-only event store with per-task ordered history.""" + + def __init__(self, db_path: str | Path) -> None: + self._db_path = Path(db_path) + self._db_path.parent.mkdir(parents=True, exist_ok=True) + self._initialize() + + def append(self, event: RuntimeEvent) -> None: + with sqlite3.connect(self._db_path) as conn: + conn.execute( + """ + INSERT INTO events ( + event_id, task_id, session_id, sequence, type, timestamp, + payload_json, causation_id, correlation_id + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + event.event_id, + event.task_id, + event.session_id, + event.sequence, + event.type, + event.timestamp.isoformat(), + json.dumps(event.payload), + event.causation_id, + event.correlation_id, + ), + ) + conn.commit() + + def list_for_task(self, task_id: str) -> list[RuntimeEvent]: + with sqlite3.connect(self._db_path) as conn: + rows = conn.execute( + """ + SELECT event_id, task_id, session_id, sequence, type, timestamp, + payload_json, causation_id, correlation_id + FROM events + WHERE task_id = ? + ORDER BY sequence ASC + """, + (task_id,), + ).fetchall() + return [ + RuntimeEvent( + event_id=row[0], + task_id=row[1], + session_id=row[2], + sequence=row[3], + type=row[4], + timestamp=row[5], + payload=json.loads(row[6]), + causation_id=row[7], + correlation_id=row[8], + ) + for row in rows + ] + + def get_latest_sequence(self, task_id: str) -> int: + with sqlite3.connect(self._db_path) as conn: + row = conn.execute( + "SELECT COALESCE(MAX(sequence), 0) FROM events WHERE task_id = ?", + (task_id,), + ).fetchone() + return int(row[0]) if row else 0 + + def _initialize(self) -> None: + with sqlite3.connect(self._db_path) as conn: + conn.execute( + """ + CREATE TABLE IF NOT EXISTS events ( + event_id TEXT PRIMARY KEY, + task_id TEXT NOT NULL, + session_id TEXT NOT NULL, + sequence INTEGER NOT NULL, + type TEXT NOT NULL, + timestamp TEXT NOT NULL, + payload_json TEXT NOT NULL, + causation_id TEXT, + correlation_id TEXT NOT NULL, + UNIQUE(task_id, sequence) + ) + """ + ) + conn.commit() diff --git a/app/events/event_types.py b/app/events/event_types.py new file mode 100644 index 0000000..86280d2 --- /dev/null +++ b/app/events/event_types.py @@ -0,0 +1,31 @@ +TASK_RECEIVED = "task_received" +CONTEXT_BUILT = "context_built" +STEP_STARTED = "step_started" +TOOL_CALLED = "tool_called" +TOOL_COMPLETED = "tool_completed" +PERMISSION_REQUESTED = "permission_requested" +PERMISSION_RESOLVED = "permission_resolved" +TASK_AWAITING_PERMISSION = "task_awaiting_permission" +SECRET_REQUESTED = "secret_requested" +TASK_AWAITING_INPUT = "task_awaiting_input" +CHECKPOINT_SAVED = "checkpoint_saved" +TASK_COMPLETED = "task_completed" +TASK_FAILED = "task_failed" +ORCHESTRATOR_CALLED = "orchestrator_called" +ORCHESTRATOR_RESULT = "orchestrator_result" +ORCHESTRATOR_UNAVAILABLE = "orchestrator_unavailable" +ORCHESTRATOR_FALLBACK_USED = "orchestrator_fallback_used" +ORCHESTRATOR_RETRY = "orchestrator_retry" +PLANNER_CALLED = "planner_called" +PLANNER_RETRY = "planner_retry" +CRITIC_CALLED = "critic_called" +CRITIC_RESULT = "critic_result" +MEMORY_WRITE_DECIDED = "memory_write_decided" +PLAN_STARTED = "plan_started" +PLAN_FAILED = "plan_failed" +PLAN_COMPLETED = "plan_completed" +STEPPED_COMPLETED = "step_completed" +THINKER_CALLED = "thinker_called" +THINKER_RESULT = "thinker_result" +JSON_COMPILER_CALLED = "json_compiler_called" +JSON_COMPILER_RESULT = "json_compiler_result" diff --git a/app/memory/__init__.py b/app/memory/__init__.py new file mode 100644 index 0000000..f912364 --- /dev/null +++ b/app/memory/__init__.py @@ -0,0 +1,24 @@ +MEMORY_AVAILABLE = False +VECTOR_AVAILABLE = False + +try: + from app.memory.store import MemoryStore + from app.memory.vector_index import VectorIndex + from app.memory.interface import MemoryInterface + from app.memory.write_policy import MemoryWritePolicy + MEMORY_AVAILABLE = True + VECTOR_AVAILABLE = True +except ImportError: + MemoryStore = None + VectorIndex = None + MemoryInterface = None + MemoryWritePolicy = None + +__all__ = [ + "MemoryStore", + "VectorIndex", + "MemoryInterface", + "MemoryWritePolicy", + "MEMORY_AVAILABLE", + "VECTOR_AVAILABLE", +] \ No newline at end of file diff --git a/app/memory/interface.py b/app/memory/interface.py new file mode 100644 index 0000000..d066eda --- /dev/null +++ b/app/memory/interface.py @@ -0,0 +1,145 @@ +from __future__ import annotations + +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Literal + +import numpy as np + +from app.core.contracts import MemoryEntry +from app.memory.store import MemoryStore +from app.memory.vector_index import VectorIndex +from app.models.embeddings import EmbeddingsAdapter + + +class MemoryInterface: + def __init__( + self, + store: MemoryStore, + vector_index: VectorIndex, + embeddings: EmbeddingsAdapter, + ) -> None: + self._store = store + self._vector_index = vector_index + self._embeddings = embeddings + + def insert( + self, + text: str, + kind: Literal["tool_result", "plan", "critique", "fact", "summary", "user_preference"], + source: Literal["tool", "critic", "user", "system"], + task_id: str | None = None, + session_id: str | None = None, + weight: float = 0.5, + metadata: dict[str, Any] | None = None, + ) -> MemoryEntry: + entry = MemoryEntry( + text=text, + kind=kind, + source=source, + weight=weight, + task_id=task_id, + session_id=session_id, + metadata=metadata or {}, + embedding_model=self._embeddings.__class__.__name__, + embedding_dim=self._embeddings.embedding_dim, + ) + + embedding = self._embeddings.encode(text) + embedding_bytes = embedding.astype("float32").tobytes() + + self._store.insert(entry, embedding_bytes) + self._vector_index.insert(entry.id, embedding) + self._vector_index.save() + + self.cleanup() + + return entry + + def search( + self, + query: str, + top_k: int = 5, + kind: str | None = None, + session_id: str | None = None, + ) -> list[tuple[MemoryEntry, float]]: + query_embedding = self._embeddings.encode(query) + memory_ids, scores = self._vector_index.search(query_embedding, k=top_k) + + results: list[tuple[MemoryEntry, float]] = [] + for memory_id, score in zip(memory_ids, scores): + entry = self._store.get(memory_id) + if entry: + if kind and entry.kind != kind: + continue + if session_id and entry.session_id != session_id: + continue + results.append((entry, score)) + + return results[:top_k] + + def get(self, memory_id: str) -> MemoryEntry | None: + return self._store.get(memory_id) + + def delete(self, memory_id: str) -> bool: + entry = self._store.get(memory_id) + if entry: + self._vector_index.delete(memory_id) + return self._store.delete(memory_id) + return False + + def get_by_task(self, task_id: str) -> list[MemoryEntry]: + return self._store.get_by_task(task_id) + + def get_by_session(self, session_id: str, limit: int = 100) -> list[MemoryEntry]: + return self._store.get_by_session(session_id, limit) + + def get_recent(self, limit: int = 10) -> list[MemoryEntry]: + return self._store.get_all(limit) + + def count(self) -> int: + return self._store.count() + + def reindex(self) -> None: + entries = self._store.get_all(limit=10000) + self._vector_index.save() + for entry in entries: + text = entry.text + embedding = self._embeddings.encode(text) + self._vector_index.insert(entry.id, embedding) + self._vector_index.save() + + def close(self) -> None: + self._store.close() + + def cleanup(self, max_items: int = 750, decay_factor: float = 0.95) -> int: + """Remove low-weight entries when exceeding max_items limit. + + Applies weight decay based on freshness before cleanup. + Returns number of removed entries. + """ + current_count = self._store.count() + if current_count <= max_items: + return 0 + + removed = 0 + entries_to_remove = current_count - max_items + + all_entries = self._store.get_all(limit=current_count) + + def effective_weight(entry: MemoryEntry) -> float: + entry_weight = entry.weight + if entry.created_at: + age_days = (datetime.now(timezone.utc) - entry.created_at).total_seconds() / 86400 + freshness_factor = max(0.1, decay_factor ** age_days) + return entry_weight * freshness_factor + return entry_weight + + sorted_entries = sorted(all_entries, key=effective_weight) + + for entry in sorted_entries[:entries_to_remove]: + self._store.delete(entry.id) + removed += 1 + + return removed \ No newline at end of file diff --git a/app/memory/store.py b/app/memory/store.py new file mode 100644 index 0000000..dcf76ee --- /dev/null +++ b/app/memory/store.py @@ -0,0 +1,185 @@ +from __future__ import annotations + +import json +import sqlite3 +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Sequence +from uuid import uuid4 + +from app.core.contracts import MemoryEntry + + +def utc_now() -> datetime: + return datetime.now(timezone.utc) + + +class MemoryStore: + def __init__(self, db_path: str | Path) -> None: + self._db_path = Path(db_path) + self._db_path.parent.mkdir(parents=True, exist_ok=True) + self._conn = sqlite3.connect(str(self._db_path), check_same_thread=False) + self._conn.row_factory = sqlite3.Row + self._init_tables() + + def _init_tables(self) -> None: + self._conn.executescript(""" + CREATE TABLE IF NOT EXISTS memory_items ( + id TEXT PRIMARY KEY, + text TEXT NOT NULL, + kind TEXT NOT NULL, + source TEXT NOT NULL, + weight REAL NOT NULL DEFAULT 0.5, + task_id TEXT, + session_id TEXT, + metadata_json TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS memory_embeddings ( + memory_id TEXT PRIMARY KEY, + embedding BLOB NOT NULL, + embedding_model TEXT NOT NULL, + embedding_dim INTEGER NOT NULL, + created_at TEXT NOT NULL, + FOREIGN KEY (memory_id) REFERENCES memory_items(id) ON DELETE CASCADE + ); + + CREATE INDEX IF NOT EXISTS idx_memory_items_task ON memory_items(task_id); + CREATE INDEX IF NOT EXISTS idx_memory_items_session ON memory_items(session_id); + CREATE INDEX IF NOT EXISTS idx_memory_items_kind ON memory_items(kind); + CREATE INDEX IF NOT EXISTS idx_memory_embeddings_model ON memory_embeddings(embedding_model); + """) + self._conn.commit() + + def insert(self, entry: MemoryEntry, embedding: bytes) -> None: + cursor = self._conn.cursor() + cursor.execute( + """ + INSERT INTO memory_items (id, text, kind, source, weight, task_id, session_id, metadata_json, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + entry.id, + entry.text, + entry.kind, + entry.source, + entry.weight, + entry.task_id, + entry.session_id, + json.dumps(entry.metadata) if entry.metadata else None, + entry.created_at.isoformat(), + utc_now().isoformat(), + ), + ) + cursor.execute( + """ + INSERT INTO memory_embeddings (memory_id, embedding, embedding_model, embedding_dim, created_at) + VALUES (?, ?, ?, ?, ?) + """, + ( + entry.id, + embedding, + entry.embedding_model, + entry.embedding_dim, + utc_now().isoformat(), + ), + ) + self._conn.commit() + + def get(self, memory_id: str) -> MemoryEntry | None: + cursor = self._conn.cursor() + row = cursor.execute( + "SELECT * FROM memory_items WHERE id = ?", (memory_id,) + ).fetchone() + if not row: + return None + return self._row_to_entry(row) + + def get_embedding(self, memory_id: str) -> bytes | None: + cursor = self._conn.cursor() + row = cursor.execute( + "SELECT embedding FROM memory_embeddings WHERE memory_id = ?", (memory_id,) + ).fetchone() + return bytes(row["embedding"]) if row else None + + def get_all(self, limit: int = 1000) -> list[MemoryEntry]: + cursor = self._conn.cursor() + rows = cursor.execute( + "SELECT * FROM memory_items ORDER BY created_at DESC LIMIT ?", (limit,) + ).fetchall() + return [self._row_to_entry(row) for row in rows] + + def get_by_task(self, task_id: str) -> list[MemoryEntry]: + cursor = self._conn.cursor() + rows = cursor.execute( + "SELECT * FROM memory_items WHERE task_id = ? ORDER BY created_at DESC", (task_id,) + ).fetchall() + return [self._row_to_entry(row) for row in rows] + + def get_by_session(self, session_id: str, limit: int = 100) -> list[MemoryEntry]: + cursor = self._conn.cursor() + rows = cursor.execute( + "SELECT * FROM memory_items WHERE session_id = ? ORDER BY created_at DESC LIMIT ?", + (session_id, limit), + ).fetchall() + return [self._row_to_entry(row) for row in rows] + + def get_by_kind(self, kind: str, limit: int = 100) -> list[MemoryEntry]: + cursor = self._conn.cursor() + rows = cursor.execute( + "SELECT * FROM memory_items WHERE kind = ? ORDER BY created_at DESC LIMIT ?", (kind, limit) + ).fetchall() + return [self._row_to_entry(row) for row in rows] + + def delete(self, memory_id: str) -> bool: + cursor = self._conn.cursor() + cursor.execute("DELETE FROM memory_embeddings WHERE memory_id = ?", (memory_id,)) + cursor.execute("DELETE FROM memory_items WHERE id = ?", (memory_id,)) + self._conn.commit() + return cursor.rowcount > 0 + + def update_weight(self, memory_id: str, weight: float) -> bool: + cursor = self._conn.cursor() + cursor.execute( + "UPDATE memory_items SET weight = ?, updated_at = ? WHERE id = ?", + (weight, utc_now().isoformat(), memory_id), + ) + self._conn.commit() + return cursor.rowcount > 0 + + def search_text(self, query: str, limit: int = 10) -> list[MemoryEntry]: + cursor = self._conn.cursor() + rows = cursor.execute( + "SELECT * FROM memory_items WHERE text LIKE ? ORDER BY created_at DESC LIMIT ?", + (f"%{query}%", limit), + ).fetchall() + return [self._row_to_entry(row) for row in rows] + + def count(self) -> int: + cursor = self._conn.cursor() + row = cursor.execute("SELECT COUNT(*) FROM memory_items").fetchone() + return row[0] if row else 0 + + def close(self) -> None: + self._conn.close() + + def _row_to_entry(self, row: sqlite3.Row) -> MemoryEntry: + metadata = {} + if row["metadata_json"]: + import json + metadata = json.loads(row["metadata_json"]) + return MemoryEntry( + id=row["id"], + text=row["text"], + kind=row["kind"], + source=row["source"], + weight=row["weight"], + task_id=row["task_id"], + session_id=row["session_id"], + metadata=metadata, + created_at=datetime.fromisoformat(row["created_at"]), + embedding_model="", + embedding_dim=0, + ) \ No newline at end of file diff --git a/app/memory/vector_index.py b/app/memory/vector_index.py new file mode 100644 index 0000000..fb24fcf --- /dev/null +++ b/app/memory/vector_index.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +import logging +import numpy as np +import hnswlib +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + + +class VectorIndex: + def __init__( + self, + index_path: str | Path | None = None, + embedding_dim: int = 384, + max_elements: int = 10000, + ) -> None: + self._embedding_dim = embedding_dim + self._index_path = Path(index_path) if index_path else None + self._index: hnswlib.Index | None = None + self._max_elements = max_elements + self._loading = False # Prevent recursion + + self._init_index() + + def _init_index(self) -> None: + if self._loading: + return + self._loading = True + try: + if self._index_path and self._index_path.exists(): + self._load() + else: + self._index = hnswlib.Index( + space="l2", + dim=self._embedding_dim, + ) + self._index.init_index( + max_elements=self._max_elements, + ef_construction=200, + M=16, + ) + except Exception as e: + logger.warning(f"VectorIndex init failed: {e}") + self._index = hnswlib.Index( + space="l2", + dim=self._embedding_dim, + ) + self._index.init_index( + max_elements=self._max_elements, + ef_construction=100, + M=16, + ) + finally: + self._loading = False + + def insert(self, memory_id: str, embedding: np.ndarray) -> None: + if self._index is None: + self._init_index() + if self._index is None: + return + + try: + vector = self._normalize(embedding) + internal_id = self._get_internal_id(memory_id) + self._index.add_items(vector, ids=np.array([internal_id])) + except Exception as e: + logger.warning(f"VectorIndex insert failed: {e}") + + def search( + self, + query_embedding: np.ndarray, + k: int = 5, + ) -> tuple[list[str], list[float]]: + if self._index is None: + return [], [] + + try: + if self._index.get_current_count() == 0: + return [], [] + + # Set ef to at least k for proper search + self._index.set_ef(max(k * 2, 50)) + + vector = self._normalize(query_embedding) + labels, distances = self._index.knn_query(vector, k=k) + + memory_ids = [self._get_memory_id(int(label)) for label in labels[0]] + scores = [1.0 - dist for dist in distances[0]] + return memory_ids, scores + except Exception as e: + logger.warning(f"VectorIndex search failed: {e}") + return [], [] + + def delete(self, memory_id: str) -> bool: + return False + + def get_items(self, memory_ids: list[str]) -> np.ndarray: + if self._index is None: + raise RuntimeError("Index not initialized") + internal_ids = [self._get_internal_id(mid) for mid in memory_ids] + return self._index.get_items(np.array(internal_ids)) + + def save(self) -> None: + if self._index and self._index_path: + try: + self._index_path.parent.mkdir(parents=True, exist_ok=True) + self._index.save_index(str(self._index_path)) + except Exception as e: + logger.warning(f"VectorIndex save failed: {e}") + + def _load(self) -> None: + if self._loading: + return + self._loading = True + try: + if self._index_path and self._index_path.exists(): + self._index = hnswlib.Index(space="l2", dim=self._embedding_dim) + self._index.load_index( + str(self._index_path), + max_elements=self._max_elements + ) + except Exception as e: + logger.warning(f"VectorIndex load failed: {e}") + self._init_index() + finally: + self._loading = False + + def _normalize(self, vector: np.ndarray) -> np.ndarray: + vec = vector.flatten() + norm = np.linalg.norm(vec) + if norm > 0: + vec = vec / norm + return vec.reshape(1, -1) + + def _get_internal_id(self, memory_id: str) -> int: + return hash(memory_id) % (2**31) + + def _get_memory_id(self, internal_id: int) -> str: + return str(internal_id) + + @property + def embedding_dim(self) -> int: + return self._embedding_dim + + @property + def element_count(self) -> int: + return self._index.get_current_count() if self._index else 0 \ No newline at end of file diff --git a/app/memory/write_policy.py b/app/memory/write_policy.py new file mode 100644 index 0000000..9ac6c85 --- /dev/null +++ b/app/memory/write_policy.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +from typing import Any, Literal + +from app.core.contracts import CriticScore, MemoryEntry + + +class MemoryWritePolicy: + def __init__( + self, + store_threshold: float = 0.7, + min_usefulness: float = 0.3, + max_entries_per_session: int = 50, + ) -> None: + self._store_threshold = store_threshold + self._min_usefulness = min_usefulness + self._max_entries_per_session = max_entries_per_session + + def decide( + self, + critic_score: CriticScore, + memory_type: MemoryEntry.Kind, + session_id: str | None = None, + has_duplicate: bool = False, + current_session_count: int = 0, + ) -> Literal["store", "store_with_weight", "skip", "merge"]: + if critic_score.safety < 0.5: + return "skip" + + if has_duplicate: + return "merge" + + if not critic_score.memory_store: + return "skip" + + if critic_score.usefulness < self._min_usefulness: + return "skip" + + if session_id and current_session_count >= self._max_entries_per_session: + return "skip" + + base_decision = self._evaluate_scores(critic_score, memory_type) + + if base_decision == "store" and critic_score.weight < self._store_threshold: + adjusted_weight = self._adjust_weight(critic_score, memory_type) + if adjusted_weight >= self._store_threshold: + return "store_with_weight" + return base_decision + + return base_decision + + def _evaluate_scores( + self, + critic_score: CriticScore, + memory_type: MemoryEntry.Kind, + ) -> Literal["store", "store_with_weight", "skip", "merge"]: + avg_score = (critic_score.correctness + critic_score.usefulness + critic_score.safety) / 3.0 + + if memory_type in ("fact", "plan", "summary"): + if avg_score >= 0.8: + return "store" + elif avg_score >= 0.6: + return "store_with_weight" + + if memory_type in ("tool_result", "critique"): + if avg_score >= self._store_threshold: + return "store" + elif avg_score >= 0.5: + return "store_with_weight" + + if memory_type == "user_preference": + if avg_score >= 0.5: + return "store" + + return "skip" + + def _adjust_weight( + self, + critic_score: CriticScore, + memory_type: MemoryEntry.Kind, + ) -> float: + base_weight = critic_score.weight + + type_boost = { + "fact": 0.15, + "plan": 0.1, + "summary": 0.1, + "user_preference": 0.2, + "tool_result": 0.05, + "critique": 0.05, + }.get(memory_type, 0.0) + + safety_boost = 0.0 + if critic_score.safety >= 0.9: + safety_boost = 0.1 + + adjusted = base_weight + type_boost + safety_boost + return min(adjusted, 1.0) \ No newline at end of file diff --git a/app/models/__init__.py b/app/models/__init__.py new file mode 100644 index 0000000..3c4e242 --- /dev/null +++ b/app/models/__init__.py @@ -0,0 +1,32 @@ +LLM_AVAILABLE = False +EMBEDDINGS_AVAILABLE = False + +try: + from app.models.adapters import create_adapter, create_llama_adapter + from app.models.orchestrator import OrchestratorAdapter + from app.models.coder import CoderAdapter + from app.models.critic import CriticAdapter + LLM_AVAILABLE = True +except ImportError: + create_adapter = None + create_llama_adapter = None + OrchestratorAdapter = None + CoderAdapter = None + CriticAdapter = None + +try: + from app.models.embeddings import EmbeddingsAdapter + EMBEDDINGS_AVAILABLE = True +except ImportError: + EmbeddingsAdapter = None + +__all__ = [ + "create_adapter", + "create_llama_adapter", + "OrchestratorAdapter", + "CoderAdapter", + "CriticAdapter", + "EmbeddingsAdapter", + "LLM_AVAILABLE", + "EMBEDDINGS_AVAILABLE", +] \ No newline at end of file diff --git a/app/models/adapters.py b/app/models/adapters.py new file mode 100644 index 0000000..ef78f0e --- /dev/null +++ b/app/models/adapters.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any, Protocol, Iterator +import os + +try: + from llama_cpp import Llama + LLAMA_AVAILABLE = True +except ImportError: + Llama = None + LLAMA_AVAILABLE = False + + +class BaseModelAdapter(Protocol): + async def generate(self, prompt: str, **kwargs: Any) -> str: ... + def stream(self, prompt: str, **kwargs: Any) -> Iterator[str]: ... + + +def create_llama_adapter( + model_path: str, + backend: str = "cpu", + n_gpu_layers: int = 0, + max_tokens: int = 2048, + temperature: float = 0.2, + base_dir: Path | None = None, +) -> "Llama": + if not LLAMA_AVAILABLE: + raise RuntimeError("llama-cpp-python not installed") + + if base_dir: + model_path = str(base_dir / model_path) + else: + model_path = str(Path.cwd() / model_path) + + return Llama( + model_path=model_path, + n_gpu_layers=n_gpu_layers, + n_ctx=4096, + n_threads=int(os.environ.get("DUCKLM_N_THREADS", max(4, min((os.cpu_count() or 4) // 2, 20)))), + n_threads_batch=-1, + max_tokens=max_tokens, + temperature=temperature, + verbose=False, + ) + + +def create_adapter( + model_type: str, + config: dict[str, Any], + base_dir: Path | None = None, +) -> "Llama": + if not LLAMA_AVAILABLE: + raise RuntimeError("llama-cpp-python not installed") + + model_path = config.get("path", "") + backend = config.get("backend", "cpu") + n_gpu_layers = config.get("n_gpu_layers", 0) + max_tokens = config.get("max_tokens", 2048) + temperature = config.get("temperature", 0.2) + + if backend == "vulkan" and n_gpu_layers != 0: + n_gpu_layers = -1 + + return create_llama_adapter( + model_path=model_path, + backend=backend, + n_gpu_layers=n_gpu_layers, + max_tokens=max_tokens, + temperature=temperature, + base_dir=base_dir, + ) diff --git a/app/models/async_adapters.py b/app/models/async_adapters.py new file mode 100644 index 0000000..ae23d55 --- /dev/null +++ b/app/models/async_adapters.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +import asyncio +from typing import Any, AsyncIterator + +from app.models.orchestrator import OrchestratorAdapter as SyncOrchestrator + + +class AsyncOrchestratorAdapter: + """Async wrapper for orchestrator - runs in executor to avoid blocking event loop.""" + + def __init__(self, sync_adapter: SyncOrchestrator) -> None: + self._sync = sync_adapter + + async def generate(self, prompt: str, max_tokens: int | None = None) -> str: + loop = asyncio.get_event_loop() + return await loop.run_in_executor( + None, + lambda: self._sync.generate(prompt, max_tokens) + ) + + async def stream(self, prompt: str, max_tokens: int | None = None) -> AsyncIterator[str]: + loop = asyncio.get_event_loop() + + async def gen(): + return list(self._sync.stream(prompt, max_tokens)) + + result = await loop.run_in_executor(None, gen) + for chunk in result: + yield chunk + + +class AsyncCoderAdapter: + """Async wrapper for coder.""" + + def __init__(self, sync_adapter) -> None: + self._sync = sync_adapter + + async def generate(self, prompt: str, max_tokens: int | None = None) -> str: + loop = asyncio.get_event_loop() + return await loop.run_in_executor( + None, + lambda: self._sync.generate(prompt, max_tokens) + ) + + +class AsyncCriticAdapter: + """Async wrapper for critic.""" + + def __init__(self, sync_adapter) -> None: + self._sync = sync_adapter + + async def generate(self, prompt: str, max_tokens: int | None = None) -> str: + loop = asyncio.get_event_loop() + return await loop.run_in_executor( + None, + lambda: self._sync.generate(prompt, max_tokens) + ) \ No newline at end of file diff --git a/app/models/coder.py b/app/models/coder.py new file mode 100644 index 0000000..17af40c --- /dev/null +++ b/app/models/coder.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from threading import RLock +from typing import Any, Iterator +from llama_cpp import Llama + + +class CoderAdapter: + def __init__(self, llm: Llama, system_prompt: str | None = None, lock: RLock | None = None) -> None: + self._llm = llm + self._lock = lock or RLock() + self._system_prompt = system_prompt or ( + "You are an expert code generation model." + ) + self._temperature = 0.2 + + def generate(self, prompt: str, max_tokens: int | None = None) -> str: + messages = [ + {"role": "system", "content": self._system_prompt}, + {"role": "user", "content": prompt}, + ] + with self._lock: + output = self._llm.create_chat_completion( + messages=messages, + max_tokens=max_tokens or 1024, + temperature=self._temperature, + ) + return output["choices"][0]["message"]["content"] + + def stream(self, prompt: str, max_tokens: int | None = None) -> Iterator[str]: + messages = [ + {"role": "system", "content": self._system_prompt}, + {"role": "user", "content": prompt}, + ] + with self._lock: + for chunk in self._llm.create_chat_completion( + messages=messages, + max_tokens=max_tokens or 1024, + temperature=self._temperature, + stream=True, + ): + content = chunk["choices"][0].get("delta", {}).get("content") + if content: + yield content diff --git a/app/models/critic.py b/app/models/critic.py new file mode 100644 index 0000000..94ff83c --- /dev/null +++ b/app/models/critic.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from threading import RLock +from typing import Any, Iterator +from llama_cpp import Llama + + +class CriticAdapter: + def __init__(self, llm: Llama, system_prompt: str | None = None, lock: RLock | None = None) -> None: + self._llm = llm + self._lock = lock or RLock() + self._system_prompt = system_prompt or ( + "You are a critic model. Evaluate tool results and respond with JSON." + ) + self._temperature = 0.1 + + def generate(self, prompt: str, max_tokens: int | None = None) -> str: + messages = [ + {"role": "system", "content": self._system_prompt}, + {"role": "user", "content": prompt}, + ] + with self._lock: + output = self._llm.create_chat_completion( + messages=messages, + max_tokens=max_tokens or 512, + temperature=self._temperature, + ) + return output["choices"][0]["message"]["content"] + + def stream(self, prompt: str, max_tokens: int | None = None) -> Iterator[str]: + messages = [ + {"role": "system", "content": self._system_prompt}, + {"role": "user", "content": prompt}, + ] + with self._lock: + for chunk in self._llm.create_chat_completion( + messages=messages, + max_tokens=max_tokens or 512, + temperature=self._temperature, + stream=True, + ): + content = chunk["choices"][0].get("delta", {}).get("content") + if content: + yield content diff --git a/app/models/embeddings.py b/app/models/embeddings.py new file mode 100644 index 0000000..ea3958d --- /dev/null +++ b/app/models/embeddings.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import numpy as np +from sentence_transformers import SentenceTransformer + + +class EmbeddingsAdapter: + def __init__( + self, + model_path: str | Path | None = None, + model_name: str = "sentence-transformers/all-MiniLM-L6-v2", + embedding_dim: int = 384, + ) -> None: + self._embedding_dim = embedding_dim + if model_path and Path(model_path).exists(): + self._model = SentenceTransformer(str(model_path)) + else: + self._model = SentenceTransformer(model_name) + + def encode(self, texts: str | list[str]) -> np.ndarray: + is_single = isinstance(texts, str) + if is_single: + texts = [texts] + embeddings = self._model.encode(texts, convert_to_numpy=True) + if is_single: + return embeddings[0] + return embeddings + + def encode_batch(self, texts: list[str], batch_size: int = 32) -> np.ndarray: + return self._model.encode(texts, batch_size=batch_size, convert_to_numpy=True) + + @property + def embedding_dim(self) -> int: + return self._embedding_dim \ No newline at end of file diff --git a/app/models/orchestrator.py b/app/models/orchestrator.py new file mode 100644 index 0000000..0a7482d --- /dev/null +++ b/app/models/orchestrator.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from threading import RLock +from typing import Any, Iterator +from llama_cpp import Llama + + +class OrchestratorAdapter: + def __init__(self, llm: Llama, system_prompt: str | None = None, lock: RLock | None = None) -> None: + self._llm = llm + self._lock = lock or RLock() + self._system_prompt = system_prompt or ( + "You are an expert orchestrator for a local AI agent system. " + "Your role is to analyze the user's task, decide whether planning is needed." + ) + self._temperature = 0.2 + + def generate(self, prompt: str, max_tokens: int | None = None) -> str: + messages = [ + {"role": "system", "content": self._system_prompt}, + {"role": "user", "content": prompt}, + ] + with self._lock: + output = self._llm.create_chat_completion( + messages=messages, + max_tokens=max_tokens or 512, + temperature=self._temperature, + ) + return output["choices"][0]["message"]["content"] + + def stream(self, prompt: str, max_tokens: int | None = None) -> Iterator[str]: + messages = [ + {"role": "system", "content": self._system_prompt}, + {"role": "user", "content": prompt}, + ] + with self._lock: + for chunk in self._llm.create_chat_completion( + messages=messages, + max_tokens=max_tokens or 512, + temperature=self._temperature, + stream=True, + ): + content = chunk["choices"][0].get("delta", {}).get("content") + if content: + yield content diff --git a/app/permissions/__init__.py b/app/permissions/__init__.py new file mode 100644 index 0000000..ad3e429 --- /dev/null +++ b/app/permissions/__init__.py @@ -0,0 +1,2 @@ +"""Permission and approval handling.""" + diff --git a/app/permissions/approval_store.py b/app/permissions/approval_store.py new file mode 100644 index 0000000..5b9ea42 --- /dev/null +++ b/app/permissions/approval_store.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +from app.core.contracts import PermissionDecision + + +class SQLiteApprovalStore: + """Stores persistent user approval decisions.""" + + def __init__(self, db_path: str | Path) -> None: + self._db_path = Path(db_path) + self._db_path.parent.mkdir(parents=True, exist_ok=True) + self._initialize() + + def save(self, decision: PermissionDecision) -> PermissionDecision: + with sqlite3.connect(self._db_path) as conn: + conn.execute( + """ + INSERT OR REPLACE INTO approvals (action_type, pattern, decision, created_at) + VALUES (?, ?, ?, ?) + """, + ( + decision.action_type, + decision.pattern, + decision.decision, + decision.created_at.isoformat(), + ), + ) + conn.commit() + return decision + + def load(self, action_type: str, pattern: str) -> PermissionDecision | None: + with sqlite3.connect(self._db_path) as conn: + row = conn.execute( + """ + SELECT action_type, pattern, decision, created_at + FROM approvals + WHERE action_type = ? AND pattern = ? + """, + (action_type, pattern), + ).fetchone() + if not row: + return None + return PermissionDecision( + action_type=row[0], + pattern=row[1], + decision=row[2], + created_at=row[3], + ) + + def _initialize(self) -> None: + with sqlite3.connect(self._db_path) as conn: + conn.execute( + """ + CREATE TABLE IF NOT EXISTS approvals ( + action_type TEXT NOT NULL, + pattern TEXT NOT NULL, + decision TEXT NOT NULL, + created_at TEXT NOT NULL, + PRIMARY KEY (action_type, pattern) + ) + """ + ) + conn.commit() + diff --git a/app/runtime/__init__.py b/app/runtime/__init__.py new file mode 100644 index 0000000..b2327dd --- /dev/null +++ b/app/runtime/__init__.py @@ -0,0 +1,2 @@ +"""Runtime loop and execution coordination.""" + diff --git a/app/runtime/async_runtime_loop.py b/app/runtime/async_runtime_loop.py new file mode 100644 index 0000000..77196e8 --- /dev/null +++ b/app/runtime/async_runtime_loop.py @@ -0,0 +1,148 @@ +from __future__ import annotations + +import asyncio +from app.core.context_builder import ContextBuilder +from app.core.contracts import ExecutionDirective, PermissionDecision, PermissionRequest, RuntimeEvent, TaskCheckpoint, UserTask +from app.core.execution_engine import ExecutionEngine +from app.core.async_router import AsyncRouter +from app.events.event_bus import EventBus +from app.events.event_types import CHECKPOINT_SAVED, CONTEXT_BUILT, TASK_AWAITING_PERMISSION, TASK_COMPLETED, TASK_FAILED, TASK_RECEIVED +from app.core.permission_service import PermissionService +from app.state.checkpoint_store import SQLiteCheckpointStore +from app.state.task_state_store import SQLiteTaskStateStore + + +class AsyncRuntimeLoop: + """Async runtime loop using LLM orchestrator.""" + + def __init__( + self, + event_bus: EventBus, + task_state_store: SQLiteTaskStateStore, + checkpoint_store: SQLiteCheckpointStore, + context_builder: ContextBuilder, + router: AsyncRouter, + execution_engine: ExecutionEngine, + permission_service: PermissionService, + memory_interface=None, + ) -> None: + self._event_bus = event_bus + self._task_state_store = task_state_store + self._checkpoint_store = checkpoint_store + self._context_builder = context_builder + self._router = router + self._execution_engine = execution_engine + self._permission_service = permission_service + self._memory_interface = memory_interface + + async def run_task(self, task: UserTask) -> dict[str, object]: + state = self._task_state_store.create_task( + task.task_id, + { + "status": "received", + "session_id": task.session_id, + "plan": None, + "task_input": task.input, + "task_context": task.context, + }, + ) + self._publish(task, TASK_RECEIVED, {"status": "received"}) + + checkpoint = TaskCheckpoint(task_id=task.task_id, status="received") + self._checkpoint_store.save(checkpoint) + self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json")) + + context = self._context_builder.build(task=task, checkpoint=checkpoint) + self._publish(task, CONTEXT_BUILT, {"keys": sorted(context.keys())}) + + directive = await self._router.decide(state=state, context=context, task_id=task.task_id, session_id=task.session_id) + + execution_result = await asyncio.to_thread( + self._execution_engine.execute, + task=task, + directive=directive, + ) + + state_patch = {"status": execution_result["status"], "last_directive": directive.model_dump(mode="json")} + + if execution_result["status"] == "awaiting_permission": + state_patch["pending_permission_request"] = execution_result["result"].get("permission_request") + + self._task_state_store.update_task(task.task_id, state_patch) + + status = execution_result["status"] + + if status == "completed": + self._publish(task, TASK_COMPLETED, {"directive": directive.model_dump(mode="json"), "execution_result": execution_result["result"]}) + elif status == "failed": + self._publish(task, TASK_FAILED, {"error": execution_result.get("result", {}).get("error")}) + + checkpoint.status = status + self._checkpoint_store.save(checkpoint) + self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json")) + + # Save task and result to memory for session context + self._save_to_memory(task, execution_result, status) + + return { + "task_id": task.task_id, + "status": status, + "directive": directive.model_dump(mode="json"), + "result": execution_result.get("result"), + "events": list(self._event_bus.get_task_events(task.task_id)), + } + + def _publish(self, task: UserTask, event_type: str, payload: dict) -> None: + if not self._event_bus: + return + event = RuntimeEvent( + task_id=task.task_id, + session_id=task.session_id, + sequence=self._event_bus.next_sequence(task.task_id), + type=event_type, + payload=payload, + ) + self._event_bus.publish(event) + + def _save_to_memory(self, task: UserTask, execution_result: dict, status: str) -> None: + """Save task input and result to memory for session context.""" + if not self._memory_interface: + return + + try: + # Save task input as summary + self._memory_interface.insert( + text=f"User request: {task.input}", + kind="summary", + source="user", + task_id=task.task_id, + session_id=task.session_id, + weight=0.8, + metadata={"status": status}, + ) + + # Save execution result + result_text = "" + if status == "completed": + step_results = execution_result.get("result", {}).get("step_results", []) + if step_results: + for step in step_results: + tool_result = step.get("result", {}).get("result", {}) + if tool_result.get("output"): + result_text += f" | {step.get('step_id')}: {tool_result.get('output')[:200]}" + elif status == "failed": + result_text = f" | Error: {execution_result.get('result', {}).get('error', 'Unknown')}" + + if result_text: + self._memory_interface.insert( + text=f"Result: {status}{result_text}", + kind="tool_result", + source="system", + task_id=task.task_id, + session_id=task.session_id, + weight=0.7, + metadata={"status": status}, + ) + except Exception as e: + import logging + logging.getLogger(__name__).warning(f"Failed to save to memory: {e}") \ No newline at end of file diff --git a/app/runtime/runtime_controller.py b/app/runtime/runtime_controller.py new file mode 100644 index 0000000..36e6325 --- /dev/null +++ b/app/runtime/runtime_controller.py @@ -0,0 +1,462 @@ +from __future__ import annotations + +import json +from threading import RLock +from pathlib import Path + +from app.core.config import AppConfig, load_app_config +from app.core.context_builder import ContextBuilder +from app.core.contracts import UserTask +from app.core.execution_engine import ExecutionEngine +from app.core.execution_scheduler import ExecutionScheduler +from app.core.async_router import AsyncRouter +from app.events.event_bus import EventBus +from app.events.event_store import SQLiteEventStore +from app.memory import MemoryInterface, MemoryStore, VectorIndex +from app.memory.write_policy import MemoryWritePolicy +from app.models import ( + CoderAdapter, + CriticAdapter, + EmbeddingsAdapter, + OrchestratorAdapter, + create_adapter, +) +from app.models.async_adapters import AsyncOrchestratorAdapter, AsyncCriticAdapter, AsyncCoderAdapter +from app.permissions.approval_store import SQLiteApprovalStore +from app.core.permission_service import PermissionService +from app.runtime.runtime_loop import RuntimeLoop +from app.state.checkpoint_store import SQLiteCheckpointStore +from app.state.task_state_store import SQLiteTaskStateStore +from app.tools.file_read import FileReadTool +from app.tools.file_write import FileWriteTool +from app.tools.registry import ToolRegistry +from app.tools.sandbox import ToolSandbox +from app.tools.shell_exec import ShellExecTool +from app.tools.memory_tools import MemoryInsertTool, MemorySearchTool, MemoryListTool + + +class RuntimeController: + """Composition root for the ducklm runtime.""" + + def __init__(self, base_dir: str | Path | None = None) -> None: + self.base_dir = Path(base_dir or Path(__file__).resolve().parents[2]) + self.config: AppConfig = load_app_config(self.base_dir / "config") + + self.event_bus = EventBus( + SQLiteEventStore(self.base_dir / "data" / "events" / "events.sqlite3") + ) + self.task_state_store = SQLiteTaskStateStore( + self.base_dir / "data" / "state" / "task_state.sqlite3" + ) + self.checkpoint_store = SQLiteCheckpointStore( + self.base_dir / "data" / "state" / "checkpoints.sqlite3" + ) + self.approval_store = SQLiteApprovalStore( + self.base_dir / "data" / "permissions" / "approvals.sqlite3" + ) + + self._thinker: OrchestratorAdapter | None = None + self._json_compiler: OrchestratorAdapter | None = None + self._orchestrator: OrchestratorAdapter | None = None + self._coder: CoderAdapter | None = None + self._critic: CriticAdapter | None = None + self._sys_util: OrchestratorAdapter | None = None + self._model_cache: dict[tuple[object, ...], tuple[object, RLock]] = {} + self._memory_interface: MemoryInterface | None = None + self._memory_policy: MemoryWritePolicy | None = None + self.tool_registry = None + self.tool_sandbox = None + + self._init_models() + self._init_memory() + + runtime_config = self.config.runtime + + self.tool_sandbox = ToolSandbox( + allowed_root=self.base_dir, + timeout_ms=runtime_config.step_timeout_ms, + ) + + self.tool_registry = self._create_tool_registry() + + context_config = { + "max_context_tokens": runtime_config.max_context_tokens, + "context_budgets": runtime_config.context_budgets, + "reserve_for_generation_pct": runtime_config.reserve_for_generation_pct, + } + + self.context_builder = ContextBuilder( + memory_interface=self._memory_interface, + tool_registry=self.tool_registry, + config=context_config, + ) + + self._prompts = self._load_prompts() + # ensure sys_util prompt is present in prompts dict for router + # ensure sys_util prompt is available to router (prompts.json may have "sys_util" key) + if "sys_util" not in self._prompts and "prompts" in self.config: + self._prompts["sys_util"] = self.config.get("sys_util") + + self.context_builder = ContextBuilder( + memory_interface=self._memory_interface, + tool_registry=self.tool_registry, + config=context_config, + ) + + self.router = AsyncRouter( + thinker=None, + json_compiler=None, + intent_parser=None, + prompts=self._prompts, + event_bus=self.event_bus, + tool_registry=self.tool_registry, + retry_limit=runtime_config.orchestrator_retry_limit, + debug=runtime_config.debug if hasattr(runtime_config, 'debug') else False, + log_length=runtime_config.debug_orchestrator_log_length if hasattr(runtime_config, 'debug_orchestrator_log_length') else 500, + json_fix_retry_limit=runtime_config.json_fix_retry_limit if hasattr(runtime_config, 'json_fix_retry_limit') else 2, + json_fix_use_sys_util=runtime_config.json_fix_use_sys_util if hasattr(runtime_config, "json_fix_use_sys_util") else True, + intent_classifier=runtime_config.intent_classifier if hasattr(runtime_config, "intent_classifier") else "thinker", + ) + + self.permission_service = PermissionService( + config=self._load_permissions_config(), + ) + + self.execution_engine = ExecutionEngine( + event_bus=self.event_bus, + tool_registry=self.tool_registry, + permission_service=self.permission_service, + scheduler=ExecutionScheduler( + retry_limit=runtime_config.planner_retry_limit + ), + critic=self._critic, + memory_policy=self._memory_policy, + memory_interface=self._memory_interface, + prompts=self._prompts, + ) + + self.runtime_loop = RuntimeLoop( + event_bus=self.event_bus, + task_state_store=self.task_state_store, + checkpoint_store=self.checkpoint_store, + context_builder=self.context_builder, + router=self.router, + execution_engine=self.execution_engine, + permission_service=self.permission_service, + memory_interface=self._memory_interface, + ) + + def _load_prompts(self) -> dict[str, str]: + prompts_dir = self.base_dir / "config" / "prompts" + prompts = {} + + if prompts_dir.is_dir(): + for md_file in prompts_dir.glob("*.md"): + role = md_file.stem + prompts[role] = md_file.read_text(encoding="utf-8") + + if prompts: + return prompts + + prompts_file = self.base_dir / "config" / "prompts.json" + if prompts_file.exists(): + with open(prompts_file) as f: + return json.load(f) + return {} + + def _load_permissions_config(self) -> dict: + permissions_file = self.base_dir / "config" / "permissions.json" + if not permissions_file.exists(): + return {} + with permissions_file.open("r", encoding="utf-8") as handle: + return json.load(handle) + + def _init_models(self) -> None: + try: + memory_config = self.config.runtime.memory_thresholds or {} + if memory_config: + self._memory_policy = MemoryWritePolicy( + store_threshold=memory_config.get("default_store_weight", 0.8), + ) + print("Models policy ready") + except Exception as e: + print(f"Models init failed: {e}") + + def load_models_at_startup(self) -> None: + """Load all LLM models synchronously. Called from startup hook in executor.""" + import os + os.chdir(str(self.base_dir / "models")) + + try: + print("Loading thinker model...") + thinker_config = self.config.models.thinker or {} + if thinker_config.get("path"): + llm, lock = self._get_or_create_llm("thinker", thinker_config) + self._thinker = OrchestratorAdapter(llm, system_prompt=self._prompts.get("thinker"), lock=lock) + print(f"Thinker loaded: {self._thinker} (model: {thinker_config.get("path")})") + + print("Loading json_compiler model...") + compiler_config = self.config.models.json_compiler or {} + if compiler_config.get("path"): + llm, lock = self._get_or_create_llm("json_compiler", compiler_config) + self._json_compiler = OrchestratorAdapter(llm, system_prompt=self._prompts.get("json_compiler"), lock=lock) + print(f"JSON Compiler loaded: {self._json_compiler} (model: {compiler_config.get("path")})") + + print("Loading coder model...") + coder_config = self.config.models.coder or {} + if coder_config.get("path"): + llm, lock = self._get_or_create_llm("coder", coder_config) + self._coder = CoderAdapter(llm, system_prompt=self._prompts.get("coder"), lock=lock) + print(f"Coder loaded: {self._coder} (model: {coder_config.get("path")})") + + print("Loading critic model...") + critic_config = self.config.models.critic or {} + if critic_config.get("path"): + llm, lock = self._get_or_create_llm("critic", critic_config) + self._critic = CriticAdapter(llm, system_prompt=self._prompts.get("critic"), lock=lock) + print(f"Critic loaded: {self._critic} (model: {critic_config.get("path")})") + + print("Loading sys_util model...") + sys_util_config = self.config.models.sys_util or {} + if sys_util_config.get("path"): + llm, lock = self._get_or_create_llm("sys_util", sys_util_config) + self._sys_util = OrchestratorAdapter(llm, system_prompt=self._prompts.get("sys_util"), lock=lock) + print(f"Sys_util loaded: {self._sys_util} (model: {sys_util_config.get("path")})") + + print("All models loaded successfully") + + async_thinker = AsyncOrchestratorAdapter(self._thinker) if self._thinker else None + async_compiler = AsyncOrchestratorAdapter(self._json_compiler) if self._json_compiler else None + async_coder = AsyncCoderAdapter(self._coder) if self._coder else None + async_critic = AsyncCriticAdapter(self._critic) if self._critic else None + async_sys_util = AsyncOrchestratorAdapter(self._sys_util) if self._sys_util else None + + self.router.set_thinker(async_thinker) + self.router.set_json_compiler(async_compiler) + self.router.set_sys_util(async_sys_util) + self.router.set_tool_registry(self.tool_registry) + if async_critic: + self.execution_engine.set_critic(async_critic) + if async_coder: + self.execution_engine.set_coder(async_coder) + + except Exception as e: + print(f"Failed to load models at startup: {e}") + raise RuntimeError(f"Model loading failed: {e}") from e + + def _model_cache_key(self, model_config: dict) -> tuple[object, ...]: + path = str((self.base_dir / "models" / model_config.get("path", "")).resolve()) + return ( + path, + model_config.get("backend", "cpu"), + model_config.get("n_gpu_layers", 0), + model_config.get("n_ctx", 4096), + ) + + def _get_or_create_llm(self, model_type: str, model_config: dict): + key = self._model_cache_key(model_config) + cached = self._model_cache.get(key) + if cached: + print(f"Reusing model instance: {model_config.get('path')} for {model_type}") + return cached + + llm = create_adapter(model_type, model_config, self.base_dir / "models") + lock = RLock() + cached = (llm, lock) + self._model_cache[key] = cached + return cached + + def _init_memory(self) -> None: + try: + emb_config = self.config.models.embeddings or {} + model_path = self.base_dir / emb_config.get("path", "models/all-MiniLM-L6-v2") + if not model_path.exists(): + print(f"Memory init skipped: embeddings model not found at {model_path}") + self._memory_interface = None + return + embeddings = EmbeddingsAdapter( + model_path=model_path, + embedding_dim=emb_config.get("embedding_dim", 384), + ) + + store = MemoryStore( + self.base_dir / "data" / "memory" / "memory.sqlite3" + ) + vector_index = VectorIndex( + index_path=self.base_dir / "data" / "memory" / "index.bin", + embedding_dim=embeddings.embedding_dim, + ) + + self._memory_interface = MemoryInterface(store, vector_index, embeddings) + + except Exception as e: + print(f"Memory init failed: {e}") + self._memory_interface = None + + def _create_tool_registry(self) -> ToolRegistry: + from app.tools.registry import ToolRegistry + from app.tools.plugins.shell_exec import Tool as ShellExecTool + from app.tools.plugins.file_read import Tool as FileReadTool + from app.tools.plugins.file_write import Tool as FileWriteTool + from app.tools.plugins.memory_tools import Tool as MemoryTool + from app.tools.discover import ToolDiscovery + + registry = ToolRegistry() + + tool_init_map = { + "shell_exec": lambda m: ShellExecTool(self.tool_sandbox), + "file_read": lambda m: FileReadTool(self.tool_sandbox), + "file_write": lambda m: FileWriteTool(self.tool_sandbox), + "memory": lambda m: MemoryTool(self._memory_interface), + } + + discovery = ToolDiscovery() + discovered = discovery.discover() + + for name, data in discovered.items(): + init_fn = tool_init_map.get(name) + if init_fn: + tool = init_fn(data.get("manifest", {})) + registry.register(tool) + registry._schemas[name] = { + "description": data.get("manifest", {}).get("description", ""), + "args_schema": data.get("manifest", {}).get("args_schema", {}), + "requires_permission": data.get("manifest", {}).get("requires_permission", False), + } + print(f"Registered tool: {name}") + else: + print(f"No init mapping for tool: {name} - skipping") + + return registry + + @property + def orchestrator(self) -> OrchestratorAdapter | None: + return self._orchestrator + + @property + def coder(self) -> CoderAdapter | None: + return self._coder + + @property + def critic(self) -> CriticAdapter | None: + return self._critic + + @property + def memory_interface(self) -> MemoryInterface | None: + return self._memory_interface + + def _ensure_orchestrator(self) -> OrchestratorAdapter | None: + if self._orchestrator is not None: + return self._orchestrator + try: + orch_config = self.config.models.orchestrator or {} + if orch_config.get("path"): + llm, lock = self._get_or_create_llm("orchestrator", orch_config) + self._orchestrator = OrchestratorAdapter(llm, lock=lock) + except Exception as e: + print(f"Orchestrator load failed: {e}") + return self._orchestrator + + def _ensure_critic(self) -> CriticAdapter | None: + if self._critic is not None: + return self._critic + try: + critic_config = self.config.models.critic or {} + if critic_config.get("path"): + llm, lock = self._get_or_create_llm("critic", critic_config) + self._critic = CriticAdapter(llm, lock=lock) + except Exception as e: + print(f"Critic load failed: {e}") + return self._critic + + def handle_task(self, task: UserTask) -> dict[str, object]: + return self.runtime_loop.run_task(task) + + def resolve_permission(self, task_id: str, decision: str) -> dict[str, object]: + return self.runtime_loop.resolve_permission( + task_id=task_id, decision=decision + ) + + def resolve_secret(self, task_id: str, secret: str) -> dict[str, object]: + return self.runtime_loop.resolve_secret( + task_id=task_id, secret=secret + ) + + def resolve_password(self, task_id: str, password: str) -> dict[str, object]: + return self.runtime_loop.resolve_password( + task_id=task_id, password=password + ) + + def handle_critic_feedback( + self, + feedback: str, + task_id: str | None = None, + session_id: str | None = None, + correctness_override: float | None = None, + usefulness_override: float | None = None, + safety_override: float | None = None, + ) -> dict[str, object]: + if not self._memory_interface: + return {"status": "error", "message": "Memory not available"} + + target_task_id = task_id + target_session_id = session_id + + if not target_session_id and not target_task_id: + return { + "status": "error", + "message": "Either task_id or session_id must be provided", + } + + if not target_session_id and target_task_id: + state = self.task_state_store.get_task(target_task_id) + if state: + target_session_id = state.get("session_id") + + if not target_task_id and target_session_id: + recent_tasks = self.task_state_store.get_session_tasks(target_session_id, limit=1) + if recent_tasks: + target_task_id = recent_tasks[0]["task_id"] + + min_weight = 0.3 + max_weight = 0.95 + user_weight = 0.9 + + final_weight = max(min_weight, min(max_weight, user_weight)) + + metadata = { + "feedback_text": feedback, + "overrides": { + "correctness": correctness_override, + "usefulness": usefulness_override, + "safety": safety_override, + }, + "source": "user", + } + + feedback_text = f"User feedback: {feedback}" + if correctness_override is not None: + feedback_text += f" | Correctness corrected to: {correctness_override}" + if usefulness_override is not None: + feedback_text += f" | Usefulness corrected to: {usefulness_override}" + if safety_override is not None: + feedback_text += f" | Safety corrected to: {safety_override}" + + try: + self._memory_interface.insert( + text=feedback_text, + kind="critique", + source="user", + task_id=target_task_id, + session_id=target_session_id, + weight=final_weight, + metadata=metadata, + ) + return { + "status": "ok", + "message": "Feedback saved", + "task_id": target_task_id, + "session_id": target_session_id, + } + except Exception as e: + return {"status": "error", "message": str(e)} diff --git a/app/runtime/runtime_loop.py b/app/runtime/runtime_loop.py new file mode 100644 index 0000000..bde7703 --- /dev/null +++ b/app/runtime/runtime_loop.py @@ -0,0 +1,504 @@ +from __future__ import annotations + +import asyncio + +from app.core.context_builder import ContextBuilder +from app.core.contracts import ExecutionDirective, PermissionDecision, PermissionRequest, RuntimeEvent, SecretRequest, TaskCheckpoint, UserTask +from app.core.execution_engine import ExecutionEngine +from app.core.async_router import AsyncRouter +from app.events.event_bus import EventBus +from app.events.event_types import CHECKPOINT_SAVED, CONTEXT_BUILT, TASK_AWAITING_INPUT, TASK_AWAITING_PERMISSION, TASK_COMPLETED, TASK_FAILED, TASK_RECEIVED +from app.core.permission_service import PermissionService +from app.state.checkpoint_store import SQLiteCheckpointStore +from app.state.task_state_store import SQLiteTaskStateStore + + +class RuntimeLoop: + """Central control loop skeleton coordinating task state and events.""" + + def __init__( + self, + event_bus: EventBus, + task_state_store: SQLiteTaskStateStore, + checkpoint_store: SQLiteCheckpointStore, + context_builder: ContextBuilder, + router: AsyncRouter, + execution_engine: ExecutionEngine, + permission_service: PermissionService, + memory_interface=None, + ) -> None: + self._event_bus = event_bus + self._task_state_store = task_state_store + self._checkpoint_store = checkpoint_store + self._context_builder = context_builder + self._router = router + self._execution_engine = execution_engine + self._permission_service = permission_service + self._memory_interface = memory_interface + + def run_task(self, task: UserTask) -> dict[str, object]: + # Check input for hard-stop commands BEFORE processing + hard_stop_check = self._permission_service.check_shell_command( + task_id=task.task_id, + session_id=task.session_id, + command=task.input, + ) + if hard_stop_check.get("decision") == "hard_stop": + # Immediately reject hard-stop commands + self._publish(task, TASK_RECEIVED, {"status": "received"}) + checkpoint = TaskCheckpoint(task_id=task.task_id, status="received") + self._checkpoint_store.save(checkpoint) + self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json")) + + error_msg = f"⚠️ BLOCKED: {hard_stop_check.get('reason', 'Hard stop command')}" + self._publish(task, TASK_FAILED, { + "directive": {}, + "execution_result": {"error": error_msg}, + }) + return { + "task_id": task.task_id, + "status": "failed", + "directive": {}, + "result": {"error": error_msg}, + "events": [e.model_dump(mode="json") for e in self._event_bus.list_for_task(task.task_id)], + } + + state = self._task_state_store.create_task( + task.task_id, + { + "status": "received", + "session_id": task.session_id, + "plan": None, + "task_input": task.input, + "task_context": task.context, + }, + ) + self._publish(task, TASK_RECEIVED, {"status": "received"}) + + checkpoint = TaskCheckpoint(task_id=task.task_id, status="received") + self._checkpoint_store.save(checkpoint) + self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json")) + + context = self._context_builder.build(task=task, checkpoint=checkpoint) + self._publish(task, CONTEXT_BUILT, {"keys": sorted(context.keys())}) + + directive = asyncio.run( + self._router.decide(state=state, context=context, task_id=task.task_id, session_id=task.session_id) + ) + execution_result = self._execution_engine.execute(task=task, directive=directive) + state_patch = {"status": execution_result["status"], "last_directive": directive.model_dump(mode="json")} + if execution_result["status"] == "awaiting_permission": + state_patch["pending_permission_request"] = execution_result["result"]["permission_request"] + state_patch["pending_secret_request"] = None + state_patch["resolved_permission_decision"] = None + elif execution_result["status"] == "awaiting_input": + state_patch["pending_permission_request"] = None + state_patch["pending_secret_request"] = execution_result["result"]["secret_request"] + state_patch["resolved_permission_decision"] = None + elif execution_result["status"] == "awaiting_password": + state_patch["pending_permission_request"] = None + state_patch["pending_secret_request"] = None + state_patch["resolved_permission_decision"] = None + state_patch["pending_password_request"] = { + "command": execution_result["result"].get("command", ""), + "reason": "Permission denied - требуется sudo пароль", + "attempts": 0, + } + else: + state_patch["pending_permission_request"] = None + state_patch["pending_secret_request"] = None + state_patch["resolved_permission_decision"] = None + self._task_state_store.update_task(task.task_id, state_patch) + final_status = str(execution_result["status"]) + + # For awaiting states - do NOT mark task as completed, keep it in pending state + if final_status in ("awaiting_permission", "awaiting_input", "awaiting_password"): + # Task stays in pending state, don't update to completed + pass + else: + self._task_state_store.update_task(task.task_id, {"status": final_status}) + + final_checkpoint = TaskCheckpoint( + task_id=task.task_id, + status=final_status, + context_snapshot=context, + ) + self._checkpoint_store.save(final_checkpoint) + + # Generate response after plan execution + if final_status == "completed" and execution_result.get("result", {}).get("step_results"): + # Format tool results into response + step_results = execution_result["result"]["step_results"] + response_parts = [] + for step in step_results: + result_data = step.get("result", {}) + tool_result = result_data.get("result", result_data) + if tool_result.get("ok") and tool_result.get("output"): + response_parts.append(tool_result["output"]) + + if response_parts: + # Create respond directive + response_text = "\n\n".join(response_parts) + respond_directive = ExecutionDirective( + type="respond", + payload={"text": response_text}, + ) + # Add to execution result + execution_result["response_directive"] = respond_directive.model_dump(mode="json") + + # Map status to terminal event type + if final_status == "completed": + terminal_event_type = TASK_COMPLETED + elif final_status == "failed": + terminal_event_type = TASK_FAILED + elif final_status == "awaiting_permission": + terminal_event_type = TASK_AWAITING_PERMISSION + elif final_status == "awaiting_input": + terminal_event_type = TASK_AWAITING_INPUT + elif final_status == "awaiting_password": + terminal_event_type = TASK_AWAITING_PERMISSION + else: + terminal_event_type = TASK_FAILED + self._publish( + task, + terminal_event_type, + { + "directive": directive.model_dump(mode="json"), + "execution_result": execution_result["result"], + }, + ) + + # Save task and result to memory for session context + self._save_to_memory(task, execution_result, final_status) + + return { + "task_id": task.task_id, + "status": final_status, + "directive": directive.model_dump(mode="json"), + "result": execution_result["result"], + "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)], + } + + def resolve_permission(self, task_id: str, decision: str) -> dict[str, object]: + state = self._task_state_store.get_task(task_id) + if not state: + return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}} + + pending_request_payload = state.get("pending_permission_request") + last_directive_payload = state.get("last_directive") + if not pending_request_payload or not last_directive_payload: + return {"task_id": task_id, "status": "failed", "result": {"error": "No pending permission request"}} + + task = UserTask( + task_id=task_id, + session_id=state["session_id"], + input=state["task_input"], + context=state.get("task_context", {}), + ) + # Get command from pending request + command = pending_request_payload.get("command", "") + + # Resolve permission using new service + resolved = self._permission_service.resolve_permission( + task_id=task_id, + session_id=state["session_id"], + command=command, + decision=decision, + ) + + if decision == "deny": + execution_result = { + "status": "failed", + "result": { + "error": "Permission denied by user.", + "permission_decision": resolved, + }, + } + elif decision == "allow_with_password": + directive = ExecutionDirective.model_validate(last_directive_payload) + self._task_state_store.update_task( + task.task_id, + { + "status": "awaiting_password", + "pending_password_request": { + "command": command, + "reason": pending_request_payload.get("reason", "Требуется пароль для выполнения команды"), + "attempts": 0, + }, + "pending_permission_request": None, + }, + ) + self._publish(task, TASK_AWAITING_PERMISSION, { + "password_required": True, + "command": command, + }) + return { + "task_id": task_id, + "status": "awaiting_password", + "result": {"message": "Требуется ввод пароля"}, + } + else: + directive = ExecutionDirective.model_validate(last_directive_payload) + execution_result = self._execution_engine.execute( + task=task, + directive=directive, + ) + + final_status = str(execution_result["status"]) + if decision != "allow_with_password": + self._task_state_store.update_task( + task.task_id, + { + "status": final_status, + "pending_permission_request": None, + "pending_secret_request": execution_result["result"].get("secret_request") + if final_status == "awaiting_input" + else None, + "resolved_permission_decision": resolved, + }, + ) + checkpoint = TaskCheckpoint(task_id=task.task_id, status=final_status) + self._checkpoint_store.save(checkpoint) + self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json")) + if final_status == "completed": + terminal_event_type = TASK_COMPLETED + elif final_status == "awaiting_input": + terminal_event_type = TASK_AWAITING_INPUT + elif final_status == "awaiting_permission": + terminal_event_type = TASK_AWAITING_PERMISSION + else: + terminal_event_type = TASK_FAILED + self._publish( + task, + terminal_event_type, + { + "permission_resolution": resolved.model_dump(mode="json") if hasattr(resolved, 'model_dump') else resolved, + "execution_result": execution_result["result"], + }, + ) + + # Save to memory after permission resolution + self._save_to_memory(task, execution_result, final_status) + + return { + "task_id": task.task_id, + "status": final_status, + "result": execution_result["result"], + "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)], + } + + def resolve_secret(self, task_id: str, secret: str) -> dict[str, object]: + state = self._task_state_store.get_task(task_id) + if not state: + return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}} + pending_secret_payload = state.get("pending_secret_request") + last_directive_payload = state.get("last_directive") + resolved_permission_payload = state.get("resolved_permission_decision") + if not pending_secret_payload or not last_directive_payload: + return {"task_id": task_id, "status": "failed", "result": {"error": "No pending secret request"}} + if not resolved_permission_payload: + return {"task_id": task_id, "status": "failed", "result": {"error": "No resolved permission available"}} + + task = UserTask( + task_id=task_id, + session_id=state["session_id"], + input=state["task_input"], + context=state.get("task_context", {}), + ) + _secret_request = SecretRequest.model_validate(pending_secret_payload) + directive = ExecutionDirective.model_validate(last_directive_payload) + execution_result = self._execution_engine.execute( + task=task, + directive=directive, + permission_override=None, + secret_override=secret, + ) + final_status = str(execution_result["status"]) + self._task_state_store.update_task( + task.task_id, + { + "status": final_status, + "pending_secret_request": None, + "resolved_permission_decision": None, + }, + ) + checkpoint = TaskCheckpoint(task_id=task.task_id, status=final_status) + self._checkpoint_store.save(checkpoint) + self._publish(task, CHECKPOINT_SAVED, checkpoint.model_dump(mode="json")) + if final_status == "completed": + terminal_event_type = TASK_COMPLETED + elif final_status == "awaiting_input": + terminal_event_type = TASK_AWAITING_INPUT + elif final_status == "awaiting_permission": + terminal_event_type = TASK_AWAITING_PERMISSION + else: + terminal_event_type = TASK_FAILED + self._publish( + task, + terminal_event_type, + { + "secret_resolution": {"task_id": task_id}, + "execution_result": execution_result["result"], + }, + ) + return { + "task_id": task.task_id, + "status": final_status, + "result": execution_result["result"], + "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)], + } + + def resolve_password(self, task_id: str, password: str) -> dict[str, object]: + state = self._task_state_store.get_task(task_id) + if not state: + return {"task_id": task_id, "status": "failed", "result": {"error": "Unknown task_id"}} + + pending_password_payload = state.get("pending_password_request") + last_directive_payload = state.get("last_directive") + if not pending_password_payload or not last_directive_payload: + return {"task_id": task_id, "status": "failed", "result": {"error": "No pending password request"}} + + current_attempt = pending_password_payload.get("attempts", 0) + 1 + + task = UserTask( + task_id=task_id, + session_id=state["session_id"], + input=state["task_input"], + context=state.get("task_context", {}), + ) + directive = ExecutionDirective.model_validate(last_directive_payload) + + execution_result = self._execution_engine.execute( + task=task, + directive=directive, + password_override=password, + ) + + final_status = str(execution_result["status"]) + + if final_status == "failed": + error_msg = execution_result.get("result", {}).get("error", "") + is_password_error = "permission denied" in error_msg.lower() or "incorrect password" in error_msg.lower() + + if is_password_error and current_attempt < 3: + self._task_state_store.update_task( + task.task_id, + { + "status": "awaiting_password", + "pending_password_request": { + "command": pending_password_payload.get("command"), + "reason": pending_password_payload.get("reason"), + "attempts": current_attempt, + }, + }, + ) + self._publish(task, TASK_AWAITING_PERMISSION, { + "password_attempt_failed": True, + "attempts": current_attempt, + "max_attempts": 3, + "message": "Неверный пароль. Попробуйте снова.", + }) + return { + "task_id": task_id, + "status": "awaiting_password", + "result": {"error": "Неверный пароль", "attempts": current_attempt, "max_attempts": 3}, + } + else: + self._task_state_store.update_task( + task.task_id, + { + "status": "failed", + "pending_password_request": None, + "password_attempts": current_attempt, + }, + ) + self._publish(task, TASK_FAILED, { + "password_failed": True, + "attempts": current_attempt, + "message": "Неверный пароль (3 попытки). Передаю решение модели.", + "execution_result": execution_result["result"], + }) + return { + "task_id": task_id, + "status": "failed", + "result": { + "error": "Password failed after 3 attempts", + "attempts": current_attempt, + "message": "Пользователь 3 раза ввёл неверный пароль. Решение за вами.", + }, + } + + self._task_state_store.update_task( + task.task_id, + { + "status": final_status, + "pending_password_request": None, + }, + ) + checkpoint = TaskCheckpoint(task_id=task.task_id, status=final_status) + self._checkpoint_store.save(checkpoint) + self._publish(task, TASK_COMPLETED, {"execution_result": execution_result["result"]}) + + # Save to memory after password resolution + self._save_to_memory(task, execution_result, final_status) + + return { + "task_id": task.task_id, + "status": final_status, + "result": execution_result["result"], + "events": [event.model_dump(mode="json") for event in self._event_bus.list_for_task(task.task_id)], + } + + def _publish(self, task: UserTask, event_type: str, payload: dict[str, object]) -> None: + event = RuntimeEvent( + task_id=task.task_id, + session_id=task.session_id, + sequence=self._event_bus.next_sequence(task.task_id), + type=event_type, + payload=payload, + ) + self._event_bus.publish(event) + + def _save_to_memory(self, task: UserTask, execution_result: dict, status: str) -> None: + """Save task input and result to memory for session context.""" + if not self._memory_interface: + return + + try: + # Save task input as summary + self._memory_interface.insert( + text=f"User request: {task.input}", + kind="summary", + source="user", + task_id=task.task_id, + session_id=task.session_id, + weight=0.8, + metadata={"status": status}, + ) + + # Save execution result + result_text = "" + if status == "completed": + step_results = execution_result.get("result", {}).get("step_results", []) + if step_results: + for step in step_results: + tool_result = step.get("result", {}).get("result", {}) + if tool_result.get("output"): + result_text += f" | {step.get('step_id')}: {tool_result.get('output')[:200]}" + elif status == "failed": + result_text = f" | Error: {execution_result.get('result', {}).get('error', 'Unknown')}" + + if result_text: + self._memory_interface.insert( + text=f"Result: {status}{result_text}", + kind="tool_result", + source="system", + task_id=task.task_id, + session_id=task.session_id, + weight=0.7, + metadata={"status": status}, + ) + except Exception as e: + # Log but don't fail the task + import logging + logging.getLogger(__name__).warning(f"Failed to save to memory: {e}") diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..6f66849 --- /dev/null +++ b/app/services/__init__.py @@ -0,0 +1,2 @@ +"""Shared services.""" + diff --git a/app/state/__init__.py b/app/state/__init__.py new file mode 100644 index 0000000..5cc321c --- /dev/null +++ b/app/state/__init__.py @@ -0,0 +1,2 @@ +"""Task state and checkpoints.""" + diff --git a/app/state/checkpoint_store.py b/app/state/checkpoint_store.py new file mode 100644 index 0000000..277d8b8 --- /dev/null +++ b/app/state/checkpoint_store.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import json +import sqlite3 +from pathlib import Path + +from app.core.contracts import TaskCheckpoint + + +class SQLiteCheckpointStore: + """Durable checkpoint store for resumable runtime state.""" + + def __init__(self, db_path: str | Path) -> None: + self._db_path = Path(db_path) + self._db_path.parent.mkdir(parents=True, exist_ok=True) + self._initialize() + + def save(self, checkpoint: TaskCheckpoint) -> TaskCheckpoint: + with sqlite3.connect(self._db_path) as conn: + conn.execute( + """ + INSERT OR REPLACE INTO checkpoints ( + task_id, status, active_step_id, plan_snapshot_json, + context_snapshot_json, updated_at + ) VALUES (?, ?, ?, ?, ?, ?) + """, + ( + checkpoint.task_id, + checkpoint.status, + checkpoint.active_step_id, + json.dumps(checkpoint.plan_snapshot, default=str), + json.dumps(checkpoint.context_snapshot, default=str), + checkpoint.updated_at.isoformat(), + ), + ) + conn.commit() + return checkpoint + + def load(self, task_id: str) -> TaskCheckpoint | None: + with sqlite3.connect(self._db_path) as conn: + row = conn.execute( + """ + SELECT task_id, status, active_step_id, plan_snapshot_json, + context_snapshot_json, updated_at + FROM checkpoints + WHERE task_id = ? + """, + (task_id,), + ).fetchone() + if not row: + return None + return TaskCheckpoint( + task_id=row[0], + status=row[1], + active_step_id=row[2], + plan_snapshot=json.loads(row[3]), + context_snapshot=json.loads(row[4]), + updated_at=row[5], + ) + + def _initialize(self) -> None: + with sqlite3.connect(self._db_path) as conn: + conn.execute( + """ + CREATE TABLE IF NOT EXISTS checkpoints ( + task_id TEXT PRIMARY KEY, + status TEXT NOT NULL, + active_step_id TEXT, + plan_snapshot_json TEXT NOT NULL, + context_snapshot_json TEXT NOT NULL, + updated_at TEXT NOT NULL + ) + """ + ) + conn.commit() diff --git a/app/state/task_state_store.py b/app/state/task_state_store.py new file mode 100644 index 0000000..b6b7470 --- /dev/null +++ b/app/state/task_state_store.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +import json +import sqlite3 +from pathlib import Path +from typing import Any + + +class SQLiteTaskStateStore: + """Durable task state store for runtime lifecycle state.""" + + def __init__(self, db_path: str | Path) -> None: + self._db_path = Path(db_path) + self._db_path.parent.mkdir(parents=True, exist_ok=True) + self._initialize() + + def create_task(self, task_id: str, initial_state: dict[str, Any]) -> dict[str, Any]: + state = dict(initial_state) + session_id = state.get("session_id") + with sqlite3.connect(self._db_path) as conn: + conn.execute( + """ + INSERT OR REPLACE INTO task_states (task_id, state_json, session_id) + VALUES (?, ?, ?) + """, + (task_id, json.dumps(state), session_id), + ) + conn.commit() + return state + + def get_task(self, task_id: str) -> dict[str, Any] | None: + with sqlite3.connect(self._db_path) as conn: + row = conn.execute( + "SELECT state_json FROM task_states WHERE task_id = ?", + (task_id,), + ).fetchone() + return json.loads(row[0]) if row else None + + def update_task(self, task_id: str, patch: dict[str, Any]) -> dict[str, Any]: + state = self.get_task(task_id) or {} + state.update(patch) + with sqlite3.connect(self._db_path) as conn: + conn.execute( + """ + INSERT OR REPLACE INTO task_states (task_id, state_json) + VALUES (?, ?) + """, + (task_id, json.dumps(state)), + ) + conn.commit() + return state + + def _initialize(self) -> None: + with sqlite3.connect(self._db_path) as conn: + conn.execute( + """ + CREATE TABLE IF NOT EXISTS task_states ( + task_id TEXT PRIMARY KEY, + state_json TEXT NOT NULL + ) + """ + ) + conn.commit() + try: + conn.execute("ALTER TABLE task_states ADD COLUMN session_id TEXT") + conn.commit() + except sqlite3.OperationalError: + pass + + def get_session_tasks(self, session_id: str, limit: int = 10) -> list[dict[str, Any]]: + with sqlite3.connect(self._db_path) as conn: + conn.row_factory = sqlite3.Row + rows = conn.execute( + "SELECT state_json FROM task_states WHERE session_id = ? ORDER BY rowid DESC LIMIT ?", + (session_id, limit), + ).fetchall() + return [json.loads(row[0]) for row in rows] diff --git a/app/streaming/__init__.py b/app/streaming/__init__.py new file mode 100644 index 0000000..24d18ec --- /dev/null +++ b/app/streaming/__init__.py @@ -0,0 +1,2 @@ +"""Streaming projections.""" + diff --git a/app/streaming/manager.py b/app/streaming/manager.py new file mode 100644 index 0000000..838d533 --- /dev/null +++ b/app/streaming/manager.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import asyncio +from collections import defaultdict + +from app.core.contracts import RuntimeEvent +from app.events.event_bus import EventBus + + +class StreamingManager: + """Simple in-process projection from event bus to websocket consumers.""" + + def __init__(self, event_bus: EventBus) -> None: + self._event_bus = event_bus + self._subscribers: dict[str, list[asyncio.Queue[RuntimeEvent]]] = defaultdict(list) + self._event_bus.subscribe(self._on_event) + + def replay_events(self, task_id: str) -> list[RuntimeEvent]: + return self._event_bus.list_for_task(task_id) + + def subscribe(self, task_id: str) -> asyncio.Queue[RuntimeEvent]: + queue: asyncio.Queue[RuntimeEvent] = asyncio.Queue() + self._subscribers[task_id].append(queue) + return queue + + def unsubscribe(self, task_id: str, queue: asyncio.Queue[RuntimeEvent]) -> None: + listeners = self._subscribers.get(task_id, []) + if queue in listeners: + listeners.remove(queue) + if not listeners and task_id in self._subscribers: + del self._subscribers[task_id] + + def _on_event(self, event: RuntimeEvent) -> None: + for queue in self._subscribers.get(event.task_id, []): + queue.put_nowait(event) + diff --git a/app/tools/__init__.py b/app/tools/__init__.py new file mode 100644 index 0000000..b8046a9 --- /dev/null +++ b/app/tools/__init__.py @@ -0,0 +1,2 @@ +"""Tool registry and tool adapters.""" + diff --git a/app/tools/base.py b/app/tools/base.py new file mode 100644 index 0000000..6601eba --- /dev/null +++ b/app/tools/base.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any + +from app.core.contracts import ToolResult, UserTask + + +class BaseTool(ABC): + name: str = "" + description: str = "" + + @property + def name(self) -> str: + return getattr(self, '_name', self.__class__.__name__.replace('Tool', '').lower()) + + @property + def description(self) -> str: + return getattr(self, '_description', "") + + @abstractmethod + def execute(self, task: UserTask, args: dict[str, Any]) -> ToolResult: + raise NotImplementedError + diff --git a/app/tools/discover.py b/app/tools/discover.py new file mode 100644 index 0000000..421acce --- /dev/null +++ b/app/tools/discover.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +import importlib +import json +import logging +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + +PLUGINS_DIR = Path(__file__).parent / "plugins" + + +class ToolDiscovery: + """Decentralized tool discovery system.""" + + def __init__(self, plugins_dir: Path | None = None) -> None: + self._plugins_dir = plugins_dir or PLUGINS_DIR + + def discover(self) -> dict[str, Any]: + """Discover all tools from plugins directory.""" + tools = {} + + if not self._plugins_dir.exists(): + logger.warning(f"Plugins directory not found: {self._plugins_dir}") + return tools + + for folder in self._plugins_dir.iterdir(): + if not folder.is_dir(): + continue + + manifest_file = folder / "manifest.json" + if not manifest_file.exists(): + logger.warning(f"Missing manifest.json in {folder.name}") + continue + + try: + manifest = self._load_manifest(manifest_file) + + tool_name = manifest.get("name", folder.name) + tools[tool_name] = { + "manifest": manifest, + "tool_class": folder.name, + } + logger.info(f"Discovered tool: {tool_name}") + + except Exception as e: + logger.error(f"Failed to load tool {folder.name}: {e}") + continue + + return tools + + def _load_manifest(self, manifest_file: Path) -> dict[str, Any]: + with open(manifest_file) as f: + return json.load(f) + + def _load_tool_class(self, tool_name: str, manifest: dict[str, Any]) -> Any: + entrypoint = manifest.get("entrypoint", "Tool") + module = importlib.import_module(f"app.tools.plugins.{tool_name}") + tool_class = getattr(module, entrypoint) + return tool_class + + def get_tool_schemas(self) -> list[dict[str, Any]]: + """Get schemas for all discovered tools.""" + tools = self.discover() + schemas = [] + + for name, data in tools.items(): + manifest = data.get("manifest", {}) + schemas.append({ + "name": name, + "description": manifest.get("description", ""), + "args_schema": manifest.get("args_schema", {}), + "requires_permission": manifest.get("requires_permission", False), + }) + + return schemas + + +def discover_tools() -> dict[str, Any]: + """Convenience function for quick tool discovery.""" + discovery = ToolDiscovery() + return discovery.discover() \ No newline at end of file diff --git a/app/tools/file_read.py b/app/tools/file_read.py new file mode 100644 index 0000000..6bba378 --- /dev/null +++ b/app/tools/file_read.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from app.core.contracts import ToolResult, UserTask +from app.tools.base import BaseTool +from app.tools.sandbox import ToolSandbox + + +class FileReadTool(BaseTool): + name = "file_read" + + def __init__(self, sandbox: ToolSandbox) -> None: + self._sandbox = sandbox + + def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: + path = args.get("path") + if not path: + return ToolResult(tool=self.name, ok=False, error="Missing path") + resolved = self._sandbox.ensure_path_allowed(str(path)) + content = resolved.read_text(encoding="utf-8") + return ToolResult( + tool=self.name, + ok=True, + output=content, + metadata={"path": str(resolved), "size": len(content)}, + ) + diff --git a/app/tools/file_write.py b/app/tools/file_write.py new file mode 100644 index 0000000..0bf7708 --- /dev/null +++ b/app/tools/file_write.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from app.core.contracts import ToolResult, UserTask +from app.tools.base import BaseTool +from app.tools.sandbox import ToolSandbox + + +class FileWriteTool(BaseTool): + name = "file_write" + + def __init__(self, sandbox: ToolSandbox) -> None: + self._sandbox = sandbox + + def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: + path = args.get("path") + content = str(args.get("content", "")) + if not path: + return ToolResult(tool=self.name, ok=False, error="Missing path") + resolved = self._sandbox.ensure_path_allowed(str(path)) + resolved.parent.mkdir(parents=True, exist_ok=True) + resolved.write_text(content, encoding="utf-8") + return ToolResult( + tool=self.name, + ok=True, + output=f"Wrote {len(content)} bytes", + metadata={"path": str(resolved), "size": len(content)}, + ) diff --git a/app/tools/memory_tools.py b/app/tools/memory_tools.py new file mode 100644 index 0000000..77bf03b --- /dev/null +++ b/app/tools/memory_tools.py @@ -0,0 +1,123 @@ +from __future__ import annotations + +import logging +from typing import Any + +from app.tools.base import BaseTool +from app.core.contracts import ToolResult, UserTask +from app.tools.sandbox import ToolSandbox + +logger = logging.getLogger(__name__) + + +class MemoryInsertTool(BaseTool): + _name = "memory_insert" + _description = "Store information in memory" + + def __init__(self, sandbox: ToolSandbox, memory_interface=None) -> None: + super().__init__() + self._sandbox = sandbox + self._memory = memory_interface + + def execute(self, task: UserTask, args: dict[str, Any]) -> ToolResult: + text = args.get("text", "") + kind = args.get("kind", "fact") + source = args.get("source", "user") + weight = args.get("weight", 0.5) + + if not text: + return ToolResult(tool="memory_insert", ok=False, output="", error="text is required") + if not self._memory: + return ToolResult(tool="memory_insert", ok=False, output="", error="Memory not available") + + try: + entry = self._memory.insert( + text=text, + kind=kind, + source=source, + task_id=task.task_id, + session_id=task.session_id, + weight=weight, + ) + return ToolResult( + tool="memory_insert", + ok=True, + output=f"Stored: {entry.id}", + metadata={"entry_id": entry.id}, + ) + except Exception as e: + logger.warning(f"Memory insert failed: {e}") + return ToolResult(tool="memory_insert", ok=False, output="", error=str(e)) + + +class MemorySearchTool(BaseTool): + _name = "memory_search" + _description = "Search memory for information" + + def __init__(self, sandbox: ToolSandbox, memory_interface=None) -> None: + super().__init__() + self._sandbox = sandbox + self._memory = memory_interface + + def execute(self, task: UserTask, args: dict[str, Any]) -> ToolResult: + query = args.get("query", "") + top_k = args.get("top_k", 5) + + if not query: + return ToolResult(tool="memory_search", ok=False, output="", error="query is required") + if not self._memory: + return ToolResult(tool="memory_search", ok=False, output="", error="Memory not available") + + try: + results = self._memory.search(query, top_k=top_k) + if not results: + return ToolResult(tool="memory_search", ok=True, output="No results found", metadata={"count": 0}) + + output_lines = [] + for entry, score in results: + output_lines.append(f"[{score:.2f}] {entry.text[:100]}") + + return ToolResult( + tool="memory_search", + ok=True, + output="\n".join(output_lines), + metadata={"count": len(results)}, + ) + except Exception as e: + logger.warning(f"Memory search failed: {e}") + return ToolResult(tool="memory_search", ok=False, output="", error=str(e)) + + +class MemoryListTool(BaseTool): + _name = "memory_list" + _description = "List recent memories" + + def __init__(self, sandbox: ToolSandbox, memory_interface=None) -> None: + super().__init__() + self._sandbox = sandbox + self._memory = memory_interface + + def execute(self, task: UserTask, args: dict[str, Any]) -> ToolResult: + limit = args.get("limit", 10) + + if not self._memory: + return ToolResult(tool="memory_list", ok=False, output="", error="Memory not available") + + try: + entries = self._memory.get_recent(limit=limit) + if not entries: + return ToolResult(tool="memory_list", ok=True, output="No memories", metadata={"count": 0}) + + output_lines = [] + for entry in entries: + output_lines.append(f"{entry.kind}: {entry.text[:80]}") + + return ToolResult( + tool="memory_list", + ok=True, + output="\n".join(output_lines), + metadata={"count": len(entries)}, + ) + except Exception as e: + logger.warning(f"Memory list failed: {e}") + return ToolResult(tool="memory_list", ok=False, output="", error=str(e)) \ No newline at end of file diff --git a/app/tools/plugins/file_read/__init__.py b/app/tools/plugins/file_read/__init__.py new file mode 100644 index 0000000..a06af7b --- /dev/null +++ b/app/tools/plugins/file_read/__init__.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from app.core.contracts import ToolResult, UserTask +from app.tools.base import BaseTool +from app.tools.sandbox import ToolSandbox + + +class Tool(BaseTool): + name = "file_read" + description = "Read file contents" + + def __init__(self, sandbox: ToolSandbox) -> None: + self._sandbox = sandbox + + def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: + path = args.get("path") + if not path: + return ToolResult(tool=self.name, ok=False, error="Missing path") + try: + resolved = self._sandbox.ensure_path_allowed(str(path)) + if not resolved.exists(): + return ToolResult(tool=self.name, ok=False, error=f"File not found: {path}") + content = resolved.read_text(encoding="utf-8") + return ToolResult( + tool=self.name, + ok=True, + output=content, + metadata={"path": str(resolved), "size": len(content)}, + ) + except PermissionError as e: + return ToolResult(tool=self.name, ok=False, error=f"Access denied: {e}") + except FileNotFoundError as e: + return ToolResult(tool=self.name, ok=False, error=f"File not found: {path}") + except Exception as e: + return ToolResult(tool=self.name, ok=False, error=f"Error: {e}") \ No newline at end of file diff --git a/app/tools/plugins/file_read/manifest.json b/app/tools/plugins/file_read/manifest.json new file mode 100644 index 0000000..ec51f07 --- /dev/null +++ b/app/tools/plugins/file_read/manifest.json @@ -0,0 +1,10 @@ +{ + "name": "file_read", + "version": "1.0", + "entrypoint": "Tool", + "description": "Read file contents from allowed paths", + "args_schema": { + "path": {"type": "string", "required": true, "description": "File path to read"} + }, + "requires_permission": false +} \ No newline at end of file diff --git a/app/tools/plugins/file_write/__init__.py b/app/tools/plugins/file_write/__init__.py new file mode 100644 index 0000000..7cd8572 --- /dev/null +++ b/app/tools/plugins/file_write/__init__.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from app.core.contracts import ToolResult, UserTask +from app.tools.base import BaseTool +from app.tools.sandbox import ToolSandbox + + +class Tool(BaseTool): + name = "file_write" + description = "Write content to file" + + def __init__(self, sandbox: ToolSandbox) -> None: + self._sandbox = sandbox + + def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: + path = args.get("path") + content = str(args.get("content", "")) + if not path: + return ToolResult(tool=self.name, ok=False, error="Missing path") + try: + resolved = self._sandbox.ensure_path_allowed(str(path)) + resolved.parent.mkdir(parents=True, exist_ok=True) + resolved.write_text(content, encoding="utf-8") + return ToolResult( + tool=self.name, + ok=True, + output=f"Wrote {len(content)} bytes", + metadata={"path": str(resolved), "size": len(content)}, + ) + except PermissionError as e: + return ToolResult(tool=self.name, ok=False, error=f"Access denied: {e}") + except Exception as e: + return ToolResult(tool=self.name, ok=False, error=f"Error: {e}") \ No newline at end of file diff --git a/app/tools/plugins/file_write/manifest.json b/app/tools/plugins/file_write/manifest.json new file mode 100644 index 0000000..742451a --- /dev/null +++ b/app/tools/plugins/file_write/manifest.json @@ -0,0 +1,11 @@ +{ + "name": "file_write", + "version": "1.0", + "entrypoint": "Tool", + "description": "Write content to file", + "args_schema": { + "path": {"type": "string", "required": true, "description": "File path to write"}, + "content": {"type": "string", "required": true, "description": "Content to write"} + }, + "requires_permission": true +} \ No newline at end of file diff --git a/app/tools/plugins/memory_tools/__init__.py b/app/tools/plugins/memory_tools/__init__.py new file mode 100644 index 0000000..ba60907 --- /dev/null +++ b/app/tools/plugins/memory_tools/__init__.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +import logging +from typing import Any + +from app.core.contracts import ToolResult, UserTask +from app.tools.base import BaseTool + +logger = logging.getLogger(__name__) + + +class Tool(BaseTool): + name = "memory" + description = "Memory operations: insert, search, list" + + def __init__(self, memory_interface=None) -> None: + self._memory = memory_interface + + def execute(self, task: UserTask, args: dict[str, Any]) -> ToolResult: + action = args.get("action", "search") + + if action == "insert": + return self._insert(task, args) + elif action == "search": + return self._search(task, args) + elif action == "list": + return self._list(task, args) + else: + return ToolResult(tool=self.name, ok=False, error=f"Unknown action: {action}") + + def _insert(self, task: UserTask, args: dict[str, Any]) -> ToolResult: + text = args.get("text", "") + kind = args.get("kind", "fact") + source = args.get("source", "user") + weight = args.get("weight", 0.5) + + if not text: + return ToolResult(tool=self.name, ok=False, output="", error="text is required") + if not self._memory: + return ToolResult(tool=self.name, ok=False, output="", error="Memory not available") + + try: + entry = self._memory.insert( + text=text, + kind=kind, + source=source, + task_id=task.task_id, + session_id=task.session_id, + weight=weight, + ) + return ToolResult( + tool=self.name, + ok=True, + output=f"Stored: {entry.id}", + metadata={"entry_id": entry.id}, + ) + except Exception as e: + logger.warning(f"Memory insert failed: {e}") + return ToolResult(tool=self.name, ok=False, output="", error=str(e)) + + def _search(self, task: UserTask, args: dict[str, Any]) -> ToolResult: + query = args.get("query", "") + top_k = args.get("top_k", 5) + + if not query: + return ToolResult(tool=self.name, ok=False, output="", error="query is required") + if not self._memory: + return ToolResult(tool=self.name, ok=False, output="", error="Memory not available") + + try: + results = self._memory.search(query, top_k=top_k) + if not results: + return ToolResult(tool=self.name, ok=True, output="No results found", metadata={"count": 0}) + + output_lines = [] + for entry, score in results: + output_lines.append(f"[{score:.2f}] {entry.text[:100]}") + + return ToolResult( + tool=self.name, + ok=True, + output="\n".join(output_lines), + metadata={"count": len(results)}, + ) + except Exception as e: + logger.warning(f"Memory search failed: {e}") + return ToolResult(tool=self.name, ok=False, output="", error=str(e)) + + def _list(self, task: UserTask, args: dict[str, Any]) -> ToolResult: + limit = args.get("limit", 10) + + if not self._memory: + return ToolResult(tool=self.name, ok=False, output="", error="Memory not available") + + try: + entries = self._memory.get_recent(limit=limit) + if not entries: + return ToolResult(tool=self.name, ok=True, output="No memories", metadata={"count": 0}) + + output_lines = [] + for entry in entries: + output_lines.append(f"{entry.kind}: {entry.text[:80]}") + + return ToolResult( + tool=self.name, + ok=True, + output="\n".join(output_lines), + metadata={"count": len(entries)}, + ) + except Exception as e: + logger.warning(f"Memory list failed: {e}") + return ToolResult(tool=self.name, ok=False, output="", error=str(e)) \ No newline at end of file diff --git a/app/tools/plugins/memory_tools/manifest.json b/app/tools/plugins/memory_tools/manifest.json new file mode 100644 index 0000000..ac23ef9 --- /dev/null +++ b/app/tools/plugins/memory_tools/manifest.json @@ -0,0 +1,22 @@ +{ + "name": "memory", + "version": "1.0", + "entrypoint": "Tool", + "description": "Memory operations: insert, search, list", + "args_schema": { + "action": { + "type": "string", + "required": true, + "description": "Action: insert, search, or list", + "enum": ["insert", "search", "list"] + }, + "text": {"type": "string", "required": false, "description": "Text to store (insert)"}, + "query": {"type": "string", "required": false, "description": "Query string (search)"}, + "kind": {"type": "string", "required": false, "description": "Memory kind: fact, command, etc"}, + "source": {"type": "string", "required": false, "description": "Source: user, system, etc"}, + "weight": {"type": "number", "required": false, "description": "Memory weight 0-1"}, + "top_k": {"type": "number", "required": false, "description": "Max results (search)"}, + "limit": {"type": "number", "required": false, "description": "Max entries (list)"} + }, + "requires_permission": false +} \ No newline at end of file diff --git a/app/tools/plugins/shell_exec/__init__.py b/app/tools/plugins/shell_exec/__init__.py new file mode 100644 index 0000000..f608d3b --- /dev/null +++ b/app/tools/plugins/shell_exec/__init__.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from app.core.contracts import ToolResult, UserTask +from app.tools.base import BaseTool +from app.tools.sandbox import ToolSandbox + + +class Tool(BaseTool): + name = "shell_exec" + description = "Execute shell commands" + + def __init__(self, sandbox: ToolSandbox) -> None: + self._sandbox = sandbox + + def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: + command = str(args.get("command", "")).strip() + if not command: + return ToolResult(tool=self.name, ok=False, error="Missing command", metadata={"exit_code": -1}) + cwd = args.get("cwd") + stdin_secret = args.get("stdin_secret") + completed = self._sandbox.run_shell( + command=command, + cwd=str(cwd) if cwd else None, + stdin_data=str(stdin_secret) if stdin_secret is not None else None, + ) + output = completed.stdout if completed.returncode == 0 else completed.stderr or completed.stdout + return ToolResult( + tool=self.name, + ok=completed.returncode == 0, + output=output, + error=None if completed.returncode == 0 else f"Command failed with exit code {completed.returncode}", + metadata={"exit_code": completed.returncode}, + ) \ No newline at end of file diff --git a/app/tools/plugins/shell_exec/manifest.json b/app/tools/plugins/shell_exec/manifest.json new file mode 100644 index 0000000..a797718 --- /dev/null +++ b/app/tools/plugins/shell_exec/manifest.json @@ -0,0 +1,12 @@ +{ + "name": "shell_exec", + "version": "1.0", + "entrypoint": "Tool", + "description": "Execute shell commands in sandboxed environment", + "args_schema": { + "command": {"type": "string", "required": true, "description": "Shell command to execute"}, + "cwd": {"type": "string", "required": false, "description": "Working directory"}, + "stdin_secret": {"type": "string", "required": false, "description": "Data to pass via stdin"} + }, + "requires_permission": true +} \ No newline at end of file diff --git a/app/tools/registry.py b/app/tools/registry.py new file mode 100644 index 0000000..1bcc296 --- /dev/null +++ b/app/tools/registry.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +import logging +from typing import Any, Callable + +from app.tools.base import BaseTool +from app.tools.discover import ToolDiscovery + +logger = logging.getLogger(__name__) + + +class ToolRegistry: + def __init__(self) -> None: + self._tools: dict[str, BaseTool] = {} + self._schemas: dict[str, dict[str, Any]] = {} + + def register(self, tool: BaseTool) -> None: + self._tools[tool.name] = tool + + def discover_and_init( + self, + init_factory: Callable[[dict], BaseTool] | None = None, + ) -> None: + """Discover tools from plugins and initialize them.""" + discovery = ToolDiscovery() + discovered = discovery.discover() + + for name, data in discovered.items(): + manifest = data.get("manifest", {}) + + if init_factory: + tool = init_factory({"name": name, "manifest": manifest}) + else: + tool_instance = data.get("instance") + if tool_instance: + self._tools[name] = tool_instance + self._schemas[name] = { + "description": manifest.get("description", ""), + "args_schema": manifest.get("args_schema", {}), + "requires_permission": manifest.get("requires_permission", False), + } + logger.info(f"Registered tool: {name}") + logger.warning(f"No init_factory provided for {name}") + + def get(self, name: str) -> BaseTool: + if name not in self._tools: + raise KeyError(f"Tool {name} is not registered") + return self._tools[name] + + def list_names(self) -> list[str]: + return list(self._tools.keys()) + + def get_schema(self, name: str) -> dict[str, Any]: + return self._schemas.get(name, {}) + + def list_schemas(self) -> list[dict[str, Any]]: + return [ + {"name": name, **schema} + for name, schema in self._schemas.items() + ] + diff --git a/app/tools/sandbox.py b/app/tools/sandbox.py new file mode 100644 index 0000000..fba53e1 --- /dev/null +++ b/app/tools/sandbox.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +import os +import subprocess +from pathlib import Path + + +class ToolSandbox: + """Applies simple working directory and timeout restrictions.""" + + def __init__(self, allowed_root: str | Path, timeout_ms: int) -> None: + self._allowed_root = Path(allowed_root).resolve() + self._timeout_seconds = max(timeout_ms / 1000, 1) + + def ensure_path_allowed(self, path: str | Path) -> Path: + resolved = Path(path).expanduser().resolve() + # Permission-first model: path is allowed if it exists + # Permission service will handle write/shell restrictions + return resolved + + def run_shell( + self, + command: str, + cwd: str | Path | None = None, + stdin_data: str | None = None, + ) -> subprocess.CompletedProcess[str]: + working_directory = self.ensure_path_allowed(cwd or self._allowed_root) + env = {"PATH": os.environ.get("PATH", "")} + return subprocess.run( + command, + shell=True, + cwd=str(working_directory), + env=env, + text=True, + capture_output=True, + input=stdin_data, + timeout=self._timeout_seconds, + check=False, + ) diff --git a/app/tools/shell_exec.py b/app/tools/shell_exec.py new file mode 100644 index 0000000..364527a --- /dev/null +++ b/app/tools/shell_exec.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from app.core.contracts import ToolResult, UserTask +from app.tools.base import BaseTool +from app.tools.sandbox import ToolSandbox + + +class ShellExecTool(BaseTool): + name = "shell_exec" + + def __init__(self, sandbox: ToolSandbox) -> None: + self._sandbox = sandbox + + def execute(self, task: UserTask, args: dict[str, object]) -> ToolResult: + command = str(args.get("command", "")).strip() + if not command: + return ToolResult(tool=self.name, ok=False, error="Missing command", metadata={"exit_code": -1}) + cwd = args.get("cwd") + stdin_secret = args.get("stdin_secret") + password = args.get("password") + + if password: + command = f'echo "{password}" | sudo -S {command}' + + completed = self._sandbox.run_shell( + command=command, + cwd=str(cwd) if cwd else None, + stdin_data=str(stdin_secret) if stdin_secret is not None else None, + ) + output = completed.stdout if completed.returncode == 0 else completed.stderr or completed.stdout + error_output = completed.stderr or completed.stdout + + is_sudo_error = ( + completed.returncode != 0 and + ("permission denied" in error_output.lower() or + "incorrect password" in error_output.lower() or + "sudo: password incorrect" in error_output.lower() or + "wrong password" in error_output.lower()) + ) + + return ToolResult( + tool=self.name, + ok=completed.returncode == 0, + output=output, + error=None if completed.returncode == 0 else f"Command failed with exit code {completed.returncode}", + metadata={"exit_code": completed.returncode, "needs_sudo": is_sudo_error}, + ) diff --git a/config/models.json b/config/models.json new file mode 100644 index 0000000..0f221d6 --- /dev/null +++ b/config/models.json @@ -0,0 +1,42 @@ +{ + "thinker": { + "path": "Qwen3.5-9B-GLM5.1-Distill-v1-Q4_K_M.gguf", + "backend": "vulkan", + "n_gpu_layers": -1, + "max_tokens": 2048, + "temperature": 0.3 + }, + "json_compiler": { + "path": "gemma-4-E4B-it-Q4_K_M.gguf", + "backend": "cpu", + "n_gpu_layers": 0, + "max_tokens": 1024, + "temperature": 0.1 + }, + "coder": { + "path": "X-Coder-SFT-Qwen3-8B.Q6_K.gguf", + "backend": "cpu", + "n_gpu_layers": 0, + "max_tokens": 2048, + "temperature": 0.2 + }, + "critic": { + "path": "gemma-4-E4B-it-Q4_K_M.gguf", + "backend": "cpu", + "n_gpu_layers": 0, + "max_tokens": 1024, + "temperature": 0.1 + }, + "sys_util": { + "path": "Menlo_Lucy-Q4_K_M.gguf", + "backend": "cpu", + "n_gpu_layers": 0, + "max_tokens": 1024, + "temperature": 0.1 + }, + "embeddings": { + "path": "all-MiniLM-L6-v2", + "model_name": "sentence-transformers/all-MiniLM-L6-v2", + "embedding_dim": 384 + } +} \ No newline at end of file diff --git a/config/permissions.json b/config/permissions.json new file mode 100644 index 0000000..5e1259f --- /dev/null +++ b/config/permissions.json @@ -0,0 +1,88 @@ +{ + "description": "Permission-first model configuration", + "settings": { + "allow_caching": true, + "cache_file": "data/runtime/allowed_commands.json", + "normalize_commands": true, + "split_chained": true + }, + "command_categories": { + "hard_stop": { + "description": "Commands that are never executed - hard stop", + "allow_once": false, + "allow_always": false, + "commands": [ + "rm -rf /", + "rm -rf /*", + "dd if=/dev/zero of=/dev/sd*", + "dd if=/dev/zero of=/dev/hd*", + "mkfs", + "> /dev/sd*", + "> /dev/hd*" + ] + }, + "no_always": { + "description": "Dangerous commands - allow once only", + "allow_once": true, + "allow_always": false, + "commands": [ + "rm -rf *", + "rm -rf .*", + "curl |", + "wget -O- |", + ":(){:|:&};:", + "fork", + "chmod -R 000", + "chmod -R 777", + "chown -R", + "shutdown", + "reboot", + "halt", + "init 0", + "init 6", + "telinit", + "systemctl stop", + "systemctl start", + "systemctl restart", + "service stop", + "service start", + "kill -9 -1", + "killall", + "pkill -9", + "reboot -f", + "shutdown -h now", + "poweroff", + "echo .* > /proc/", + "echo .* > /sys/" + ] + }, + "normal": { + "description": "Normal commands - allow once or always", + "allow_once": true, + "allow_always": true, + "commands": [ + "shell_exec", + "file_write" + ], + "file_extensions": [ + ".py", + ".txt", + ".json", + ".md", + ".yaml", + ".yml", + ".sh", + ".bash" + ] + } + }, + "path_settings": { + "allow_read_outside": true, + "allow_write_paths": [ + "/home/mirivlad/git/ducklm", + "/tmp" + ], + "require_confirmation_for_write": true, + "require_confirmation_for_shell": true + } +} \ No newline at end of file diff --git a/config/prompts.json b/config/prompts.json new file mode 100644 index 0000000..c909c0b --- /dev/null +++ b/config/prompts.json @@ -0,0 +1,8 @@ +{ + "orchestrator": "You are an expert orchestrator for a local AI agent system. Your role is to analyze the user's task and generate executable runtime steps.\n\nTool selection (choose the right tool):\n- shell_exec: for running commands, checking programs exist ('which', '--version'), searching files\n- file_read: for reading contents of a file (must be existing file path)\n- file_write: for creating or updating files\n- memory: for storing or searching memory\n\nSTRICT OUTPUT FORMAT - MUST follow exactly:\n\nSingle step:\n{\"type\": \"step\", \"payload\": {\"tool\": \"shell_exec\", \"args\": {\"command\": \"...\"}}}\n{\"type\": \"step\", \"payload\": {\"tool\": \"file_read\", \"args\": {\"path\": \"...\"}}}\n{\"type\": \"step\", \"payload\": {\"tool\": \"file_write\", \"args\": {\"path\": \"...\", \"content\": \"...\"}}}\n\nMulti-step plan:\n{\"type\": \"plan\", \"payload\": {\"steps\": [{\"tool\": \"file_read\", \"args\": {\"path\": \"...\"}, \"description\": \"...\", \"depends_on\": []}]}}\n\nDirect response:\n{\"type\": \"respond\", \"payload\": {\"text\": \"...\"}}\n\nIMPORTANT:\n- Use exactly {\"type\": \"step|plan|respond\", \"payload\": {...}} format\n- Do NOT output array alone\n- Do NOT use \"kind\" - use \"type\"\n- Respond ONLY with valid JSON\n- Your response MUST be complete valid JSON - the closing brace } MUST be present\n- Do NOT truncate your response - if you cannot fit all steps, use a single step\n\nTool selection:\n- For checking if a program/command exists: use shell_exec with 'which ' or ' --version'\n- For reading file contents: use file_read with path to file (NOT command)\n- For executing any command: use shell_exec\n- Previous experience (from memory) may help - consider it but YOU decide how to proceed", + "planning": "You are a planning specialist. Generate execution plans.\n\nOutput MUST be:\n{\"type\": \"plan\", \"version\": \"1.0\", \"payload\": {\"steps\": [{\"tool\": \"\", \"args\": {}, \"description\": \"...\", \"depends_on\": []}]}}\n\nRules:\n- Each step must have unique id (auto-generated)\n- Use \"depends_on\" for step ordering\n- Use \"tool\" for tool operations\n- Respond ONLY with valid JSON", + "coder": "You are an expert code generation model.\n\nOutput format:\n{\"type\": \"code\", \"payload\": {\"language\": \"python\", \"content\": \"...\"}}\n\nOR for completion:\n{\"type\": \"respond\", \"payload\": {\"text\": \"...\"}}\n\nGenerate clean, working code. Respond ONLY with valid JSON.", + "critic": "You are a critic model. Evaluate tool execution results.\n\nScoring criteria:\n- correctness: 0-1 (does result accomplish task?)\n- usefulness: 0-1 (is result useful?)\n- safety: 0-1 (is result safe?)\n- suggest_memory: boolean (should this be stored in memory?)\n- weight: 0-1 (importance score)\n- explanation: brief reasoning\n\nOutput format:\n{\"type\": \"evaluation\", \"payload\": {\"correctness\": 0.0-1.0, \"usefulness\": 0.0-1.0, \"safety\": 0.0-1.0, \"suggest_memory\": true|false, \"weight\": 0.0-1.0, \"explanation\": \"...\"}}\n\nRespond ONLY with valid JSON.", + "system": "You are ducklm, a local AI agent runtime.\n\nSTRICT RULES:\n- You MUST strictly follow execution schemas\n- You are NOT allowed to output free-form text\n- All outputs MUST be valid JSON matching runtime contracts\n- Use exact tool names from available tool set\n\nCurrent capabilities:\n- Execute shell commands (shell_exec)\n- Read/write files (file_read, file_write)\n- Memory operations (memory)\n\nAlways respond with valid JSON.", + "sys_util": "You are a STRICT JSON repair engine inside a production AI runtime.\nYour job is ONLY to fix invalid JSON syntax.\nYou are NOT allowed to:\n- change meaning of data\n- add new fields\n- remove valid fields\n- interpret intent\n- explain anything\n- reformat structure logically\n---\nINPUT:\nYou receive a malformed or invalid JSON string.\n---\nOUTPUT RULES:\n- Output ONLY valid JSON\n- No markdown\n- No comments\n- No explanations\n- No extra text\n---\nREPAIR RULES (STRICT):\nFix ONLY syntax issues:\n- missing or extra commas\n- missing quotes\n- incorrect brackets\n- trailing commas\n- invalid escaping\n- broken strings\n- unbalanced braces\nDO NOT:\n- rename keys\n- reorder fields intentionally\n- guess missing semantic data\n- \"improve\" structure\n---\nIMPORTANT:\nIf multiple valid repairs exist:\n\u2192 choose the minimal change that makes JSON valid\n---\nOUTPUT MUST BE VALID JSON OR NOTHING ELSE\nInvalid JSON:" +} \ No newline at end of file diff --git a/config/prompts/coder.md b/config/prompts/coder.md new file mode 100644 index 0000000..4dde8b6 --- /dev/null +++ b/config/prompts/coder.md @@ -0,0 +1,9 @@ +You are an expert code generation model. + +Output format: +{"type": "code", "payload": {"language": "python", "content": "..."}} + +OR for completion: +{"type": "respond", "payload": {"text": "..."}} + +Generate clean, working code. Respond ONLY with valid JSON. \ No newline at end of file diff --git a/config/prompts/critic.md b/config/prompts/critic.md new file mode 100644 index 0000000..f3b986e --- /dev/null +++ b/config/prompts/critic.md @@ -0,0 +1,14 @@ +You are a critic model. Evaluate tool execution results. + +Scoring criteria: +- correctness: 0-1 (does result accomplish task?) +- usefulness: 0-1 (is result useful?) +- safety: 0-1 (is result safe?) +- suggest_memory: boolean (should this be stored in memory?) +- weight: 0-1 (importance score) +- explanation: brief reasoning + +Output format: +{"type": "evaluation", "payload": {"correctness": 0.0-1.0, "usefulness": 0.0-1.0, "safety": 0.0-1.0, "suggest_memory": true|false, "weight": 0.0-1.0, "explanation": "..."}} + +Respond ONLY with valid JSON. \ No newline at end of file diff --git a/config/prompts/json_compiler.md b/config/prompts/json_compiler.md new file mode 100644 index 0000000..b944a93 --- /dev/null +++ b/config/prompts/json_compiler.md @@ -0,0 +1,25 @@ +You are a JSON Compiler. Convert semantic plan to strict JSON. + +INPUT: Semantic plan from Thinker +OUTPUT: Valid JSON only + +RULES: +- Convert ONLY, do not make decisions +- Do not invent tools +- Do not modify plan logic +- Do not skip steps +- Output ONLY valid JSON + +AVAILABLE TOOLS: +- file_write (requires permission) +- shell_exec (execute shell commands, requires permission) +- memory (no permission needed) +- file_read (no permission needed) +- respond (just return text to user, no execution) + +IMPORTANT: Use exactly "shell_exec" (not "shell") for shell commands! + +OUTPUT FORMAT: +{"type": "plan", "payload": {"steps": [{"id": "1", "tool": "shell_exec", "args": {"command": "..."}, "depends_on": []}]}} +OR +{"type": "respond", "payload": {"text": "..."}} diff --git a/config/prompts/orchestrator.md b/config/prompts/orchestrator.md new file mode 100644 index 0000000..e8eeb95 --- /dev/null +++ b/config/prompts/orchestrator.md @@ -0,0 +1,34 @@ +You are an expert orchestrator for a local AI agent system. Your role is to analyze the user's task and generate executable runtime steps. + +Tool selection (choose the right tool): +- shell_exec: for running commands, checking programs exist ('which', '--version'), searching files +- file_read: for reading contents of a file (must be existing file path) +- file_write: for creating or updating files +- memory: for storing or searching memory + +STRICT OUTPUT FORMAT - MUST follow exactly: + +Single step: +{"type": "step", "payload": {"tool": "shell_exec", "args": {"command": "..."}}} +{"type": "step", "payload": {"tool": "file_read", "args": {"path": "..."}}} +{"type": "step", "payload": {"tool": "file_write", "args": {"path": "...", "content": "..."}}} + +Multi-step plan: +{"type": "plan", "payload": {"steps": [{"tool": "file_read", "args": {"path": "..."}, "description": "...", "depends_on": []}]}} + +Direct response: +{"type": "respond", "payload": {"text": "..."}} + +IMPORTANT: +- Use exactly {"type": "step|plan|respond", "payload": {...}} format +- Do NOT output array alone +- Do NOT use "kind" - use "type" +- Respond ONLY with valid JSON +- Your response MUST be complete valid JSON - the closing brace } MUST be present +- Do NOT truncate your response - if you cannot fit all steps, use a single step + +Tool selection: +- For checking if a program/command exists: use shell_exec with 'which ' or ' --version' +- For reading file contents: use file_read with path to file (NOT command) +- For executing any command: use shell_exec +- Previous experience (from memory) may help - consider it but YOU decide how to proceed \ No newline at end of file diff --git a/config/prompts/planning.md b/config/prompts/planning.md new file mode 100644 index 0000000..186cdf6 --- /dev/null +++ b/config/prompts/planning.md @@ -0,0 +1,10 @@ +You are a planning specialist. Generate execution plans. + +Output MUST be: +{"type": "plan", "version": "1.0", "payload": {"steps": [{"tool": "", "args": {}, "description": "...", "depends_on": []}]}} + +Rules: +- Each step must have unique id (auto-generated) +- Use "depends_on" for step ordering +- Use "tool" for tool operations +- Respond ONLY with valid JSON \ No newline at end of file diff --git a/config/prompts/sys_util.md b/config/prompts/sys_util.md new file mode 100644 index 0000000..daeefb2 --- /dev/null +++ b/config/prompts/sys_util.md @@ -0,0 +1,41 @@ +You are a STRICT JSON repair engine inside a production AI runtime. +Your job is ONLY to fix invalid JSON syntax. +You are NOT allowed to: +- change meaning of data +- add new fields +- remove valid fields +- interpret intent +- explain anything +- reformat structure logically +--- +INPUT: +You receive a malformed or invalid JSON string. +--- +OUTPUT RULES: +- Output ONLY valid JSON +- No markdown +- No comments +- No explanations +- No extra text +--- +REPAIR RULES (STRICT): +Fix ONLY syntax issues: +- missing or extra commas +- missing quotes +- incorrect brackets +- trailing commas +- invalid escaping +- broken strings +- unbalanced braces +DO NOT: +- rename keys +- reorder fields intentionally +- guess missing semantic data +- "improve" structure +--- +IMPORTANT: +If multiple valid repairs exist: +→ choose the minimal change that makes JSON valid +--- +OUTPUT MUST BE VALID JSON OR NOTHING ELSE +Invalid JSON: \ No newline at end of file diff --git a/config/prompts/system.md b/config/prompts/system.md new file mode 100644 index 0000000..6e5c00f --- /dev/null +++ b/config/prompts/system.md @@ -0,0 +1,14 @@ +You are ducklm, a local AI agent runtime. + +STRICT RULES: +- You MUST strictly follow execution schemas +- You are NOT allowed to output free-form text +- All outputs MUST be valid JSON matching runtime contracts +- Use exact tool names from available tool set + +Current capabilities: +- Execute shell commands (shell_exec) +- Read/write files (file_read, file_write) +- Memory operations (memory) + +Always respond with valid JSON. \ No newline at end of file diff --git a/config/prompts/thinker.md b/config/prompts/thinker.md new file mode 100644 index 0000000..4c3fc89 --- /dev/null +++ b/config/prompts/thinker.md @@ -0,0 +1,30 @@ +You are a Thinker. Analyze user task and create execution plan. + +CONTEXT: +{task_summary} +{memory_context} + +AVAILABLE TOOLS (injected at runtime): +{tools_json} + +INSTRUCTIONS: +1. Understand what user wants +2. Create step-by-step plan in natural language +3. Choose appropriate tools from available + +MODE: {mode_hint} +- If mode is "execution": create a plan with TOOL STEPS (shell_exec, file_write, etc) +- If mode is "conversation": just respond with text, NO tool execution +- If mode is "clarification_needed": ask user for clarification + +OUTPUT FORMAT (SEMANTIC PLAN - NOT JSON): +For execution mode: +ПЛАН: +Шаг 1: [use tool - e.g., shell_exec] +Шаг 2: [use tool] + +For conversation mode: +ОТВЕТ: [just text, no tools needed] + +For clarification: +ОТВЕТ: [вопрос пользователю для уточнения] diff --git a/config/runtime.json b/config/runtime.json new file mode 100644 index 0000000..a442b8c --- /dev/null +++ b/config/runtime.json @@ -0,0 +1,38 @@ +{ + "step_timeout_ms": 30000, + "task_timeout_ms": 300000, + "planner_retry_limit": 2, + "tool_retry_limit": 1, + "replan_limit": 1, + "max_execution_steps": 20, + "retrieval_top_k": 5, + "max_context_tokens": 8192, + "context_budgets": { + "system": 512, + "task": 512, + "memory": 2048, + "execution": 2048, + "tools": 1024, + "safety": 512 + }, + "reserve_for_generation_pct": 25, + "orchestrator_retry_limit": 2, + "memory_thresholds": { + "default_store_weight": 0.8 + }, + "critic_fallback_policy": "continue_without_critic", + "checkpoint_policy": { + "save_on_transition": true + }, + "event_retention_policy": { + "keep_all": true + }, + "streaming_settings": { + "enabled": true + }, + "debug": true, + "debug_orchestrator_log_length": 500, + "json_fix_retry_limit": 2, + "json_fix_use_sys_util": true, + "intent_classifier": "thinker" +} \ No newline at end of file diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/data/.gitkeep @@ -0,0 +1 @@ + diff --git a/data/events/.gitkeep b/data/events/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/data/events/.gitkeep @@ -0,0 +1 @@ + diff --git a/data/memory/.gitkeep b/data/memory/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/data/memory/.gitkeep @@ -0,0 +1 @@ + diff --git a/data/permissions/.gitkeep b/data/permissions/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/data/permissions/.gitkeep @@ -0,0 +1 @@ + diff --git a/data/state/.gitkeep b/data/state/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/data/state/.gitkeep @@ -0,0 +1 @@ + diff --git a/main.py b/main.py new file mode 100644 index 0000000..f11b955 --- /dev/null +++ b/main.py @@ -0,0 +1,5 @@ +from app.api.server import app + + +__all__ = ["app"] + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a9f2e9b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,29 @@ +[project] +name = "ducklm" +version = "0.1.0" +description = "Local event-driven multi-model execution runtime" +requires-python = ">=3.11" +dependencies = [ + "fastapi>=0.115", + "httpx>=0.28", + "pydantic>=2.7", + "uvicorn>=0.30", + "websockets>=15.0", + "llama-cpp-python>=0.2.0", + "hnswlib>=0.8.0", + "sentence-transformers>=3.0", + "numpy>=1.26", +] + +[build-system] +requires = ["setuptools>=68"] +build-backend = "setuptools.build_meta" + +[tool.setuptools] +packages = ["app"] + +[tool.setuptools.package-dir] +"" = "." + +[tool.pytest.ini_options] +pythonpath = ["."] diff --git a/scripts/server.sh b/scripts/server.sh new file mode 100755 index 0000000..21a03bb --- /dev/null +++ b/scripts/server.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +VENV_PYTHON="$ROOT_DIR/.venv/bin/python" +PID_FILE="$ROOT_DIR/data/runtime/server.pid" +LOG_FILE="$ROOT_DIR/data/runtime/server.log" +HOST="${HOST:-127.0.0.1}" +PORT="${PORT:-8000}" + +mkdir -p "$ROOT_DIR/data/runtime" + +export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}$ROOT_DIR/.venv/lib/python3.13/site-packages/llama_cpp/lib" +export GGML_VULKAN=1 + +is_running() { + if [[ -f "$PID_FILE" ]]; then + local pid + pid="$(cat "$PID_FILE")" + if kill -0 "$pid" >/dev/null 2>&1; then + return 0 + fi + fi + return 1 +} + +start_server() { + if is_running; then + echo "Server already running with PID $(cat "$PID_FILE")" + exit 0 + fi + nohup "$VENV_PYTHON" -m uvicorn main:app --host "$HOST" --port "$PORT" >"$LOG_FILE" 2>&1 & + echo $! >"$PID_FILE" + echo "Started server on http://$HOST:$PORT with PID $(cat "$PID_FILE")" + echo "Log: $LOG_FILE" +} + +stop_server() { + if ! is_running; then + echo "Server is not running" + rm -f "$PID_FILE" + exit 0 + fi + local pid + pid="$(cat "$PID_FILE")" + kill "$pid" + rm -f "$PID_FILE" + echo "Stopped server PID $pid" +} + +status_server() { + if is_running; then + echo "Server is running with PID $(cat "$PID_FILE") on http://$HOST:$PORT" + else + echo "Server is not running" + fi +} + +case "${1:-}" in + start) + start_server + ;; + stop) + stop_server + ;; + restart) + stop_server || true + start_server + ;; + status) + status_server + ;; + logs) + touch "$LOG_FILE" + tail -n 50 -f "$LOG_FILE" + ;; + *) + echo "Usage: $0 {start|stop|restart|status|logs}" + exit 1 + ;; +esac diff --git a/tests/test_api_handlers.py b/tests/test_api_handlers.py new file mode 100644 index 0000000..658d9e9 --- /dev/null +++ b/tests/test_api_handlers.py @@ -0,0 +1,27 @@ +from app.api.server import chat, health, resolve_permission, resolve_secret +from app.core.permission_resolution import PermissionResolutionRequest, SecretResolutionRequest +from app.core.contracts import UserTask + + +def test_health_handler() -> None: + assert health() == {"status": "ok"} + + +def test_chat_handler_returns_runtime_events() -> None: + body = chat(UserTask(input="hello from handler test")) + assert body["status"] == "completed" + assert body["events"][0]["type"] == "task_received" + + +def test_resolve_permission_handler_allows_completion() -> None: + initial = chat(UserTask(input="запусти pwd")) + if initial["status"] == "awaiting_permission": + body = resolve_permission( + PermissionResolutionRequest(task_id=initial["task_id"], decision="allow_once") + ) + assert body["status"] in {"completed", "failed"} + + +def test_resolve_secret_handler_requires_pending_request() -> None: + body = resolve_secret(SecretResolutionRequest(task_id="missing", secret="x")) + assert body["status"] == "failed" diff --git a/tests/test_contracts.py b/tests/test_contracts.py new file mode 100644 index 0000000..8bf8ce3 --- /dev/null +++ b/tests/test_contracts.py @@ -0,0 +1,37 @@ +from app.core.contracts import CriticScore, ExecutionDirective, PlanStep, UserTask + + +def test_user_task_defaults() -> None: + task = UserTask(input="hello") + assert task.task_id + assert task.session_id + + +def test_plan_step_supports_dependencies() -> None: + step = PlanStep( + id="step-1", + kind="tool", + tool="shell_exec", + description="run command", + depends_on=[], + ) + assert step.tool == "shell_exec" + + +def test_critic_score_bounds() -> None: + score = CriticScore( + correctness=1.0, + usefulness=0.5, + safety=0.0, + memory_store=False, + weight=0.2, + explanation="ok", + ) + assert score.weight == 0.2 + + +def test_execution_directive_defaults() -> None: + directive = ExecutionDirective(type="noop") + assert directive.payload == {} + assert directive.confidence == 0.0 + diff --git a/tests/test_runtime_loop.py b/tests/test_runtime_loop.py new file mode 100644 index 0000000..8bd69f2 --- /dev/null +++ b/tests/test_runtime_loop.py @@ -0,0 +1,25 @@ +from app.core.contracts import UserTask +from app.runtime.runtime_controller import RuntimeController + + +def test_runtime_loop_emits_basic_events() -> None: + controller = RuntimeController() + result = controller.handle_task(UserTask(input="hello runtime")) + event_types = [event["type"] for event in result["events"]] + assert result["status"] == "completed" + assert "message" in result["result"] + assert "task_received" in event_types + assert "context_built" in event_types + assert "task_completed" in event_types + + +def test_runtime_loop_routes_natural_language_shell_request_to_permission_flow() -> None: + controller = RuntimeController() + result = controller.handle_task(UserTask(input="запусти sudo apt update")) + event_types = [event["type"] for event in result["events"]] + assert result["status"] == "awaiting_permission" + assert result["directive"]["type"] == "tool" + assert result["directive"]["payload"]["tool"] == "shell_exec" + assert "permission_requested" in event_types + assert "task_awaiting_permission" in event_types + assert result["result"]["error"] == "Permission required before execution." diff --git a/tests/test_tools_flow.py b/tests/test_tools_flow.py new file mode 100644 index 0000000..5cc781d --- /dev/null +++ b/tests/test_tools_flow.py @@ -0,0 +1,147 @@ +import json +from pathlib import Path + +from app.core.contracts import UserTask +from app.runtime.runtime_controller import RuntimeController + + +def _write_config_tree(base_dir: Path) -> None: + (base_dir / "config").mkdir() + (base_dir / "data" / "events").mkdir(parents=True, exist_ok=True) + (base_dir / "data" / "state").mkdir(parents=True, exist_ok=True) + (base_dir / "data" / "permissions").mkdir(parents=True, exist_ok=True) + (base_dir / "models").mkdir(exist_ok=True) + + configs = { + "models.json": { + "orchestrator_path": "models/llama.gguf", + "coder_path": "models/xcoder.gguf", + "critic_path": "models/gemma.gguf", + "embeddings_path": "models/all-MiniLM-L6-v2", + "inference": {}, + }, + "prompts.json": { + "orchestration_prompt": "", + "planning_prompt": "", + "coder_prompt": "", + "critic_prompt": "", + }, + "permissions.json": { + "dangerous_commands": {"rm": "ask_always", "sudo": "ask_always"}, + "sensitive_paths": ["/etc", "/usr", "/var"], + "default_approval_behavior": "ask_always", + }, + "runtime.json": { + "step_timeout_ms": 5000, + "task_timeout_ms": 30000, + "planner_retry_limit": 1, + "tool_retry_limit": 0, + "replan_limit": 0, + "max_execution_steps": 5, + "retrieval_top_k": 3, + "memory_thresholds": {}, + "critic_fallback_policy": "continue_without_critic", + "checkpoint_policy": {"save_on_transition": True}, + "event_retention_policy": {"keep_all": True}, + "streaming_settings": {"enabled": True}, + }, + } + for name, payload in configs.items(): + (base_dir / "config" / name).write_text(json.dumps(payload), encoding="utf-8") + + +def test_file_write_and_read_tool_flow(tmp_path: Path) -> None: + _write_config_tree(tmp_path) + controller = RuntimeController(base_dir=tmp_path) + target = tmp_path / "notes" / "test.txt" + + write_result = controller.handle_task( + UserTask( + input="write a file", + context={ + "requested_tool": "file_write", + "tool_args": {"path": str(target), "content": "hello from ducklm"}, + }, + ) + ) + assert write_result["status"] == "completed" + assert target.read_text(encoding="utf-8") == "hello from ducklm" + + read_result = controller.handle_task( + UserTask( + input="read the file", + context={ + "requested_tool": "file_read", + "tool_args": {"path": str(target)}, + }, + ) + ) + assert read_result["status"] == "completed" + assert read_result["result"]["output"] == "hello from ducklm" + + +def test_shell_exec_requires_permission_for_dangerous_command(tmp_path: Path) -> None: + _write_config_tree(tmp_path) + controller = RuntimeController(base_dir=tmp_path) + result = controller.handle_task( + UserTask( + input="run dangerous shell command", + context={ + "requested_tool": "shell_exec", + "tool_args": {"command": "rm -rf /tmp/nonexistent"}, + }, + ) + ) + assert result["status"] == "awaiting_permission" + assert "permission_request" in result["result"] + + +def test_shell_exec_allows_safe_command(tmp_path: Path) -> None: + _write_config_tree(tmp_path) + controller = RuntimeController(base_dir=tmp_path) + result = controller.handle_task( + UserTask( + input="run safe shell command", + context={ + "requested_tool": "shell_exec", + "tool_args": {"command": "pwd"}, + }, + ) + ) + assert result["status"] == "completed" + assert str(tmp_path) in result["result"]["output"] + + +def test_permission_resolution_can_resume_task(tmp_path: Path) -> None: + _write_config_tree(tmp_path) + controller = RuntimeController(base_dir=tmp_path) + initial = controller.handle_task( + UserTask( + input="запусти sudo apt update", + ) + ) + assert initial["status"] == "awaiting_permission" + resumed = controller.resolve_permission(task_id=initial["task_id"], decision="deny") + assert resumed["status"] == "failed" + assert resumed["result"]["error"] == "Permission denied by user." + + +def test_sudo_permission_resolution_requests_secret_input(tmp_path: Path) -> None: + _write_config_tree(tmp_path) + controller = RuntimeController(base_dir=tmp_path) + initial = controller.handle_task(UserTask(input="запусти sudo apt update")) + assert initial["status"] == "awaiting_permission" + resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once") + assert resumed["status"] == "awaiting_input" + assert resumed["result"]["secret_request"]["kind"] == "sudo_password" + + +def test_secret_resolution_continues_after_pending_secret_saved(tmp_path: Path) -> None: + _write_config_tree(tmp_path) + controller = RuntimeController(base_dir=tmp_path) + initial = controller.handle_task(UserTask(input="запусти sudo apt update")) + resumed = controller.resolve_permission(task_id=initial["task_id"], decision="allow_once") + assert resumed["status"] == "awaiting_input" + final = controller.resolve_secret(task_id=initial["task_id"], secret="wrongpass") + assert final["status"] in {"completed", "failed"} + assert "error" in final["result"] or "output" in final["result"]