from __future__ import annotations import logging from typing import Any from app.core.contracts import TaskCheckpoint, UserTask logger = logging.getLogger(__name__) DEFAULT_BUDGETS = { "system": 512, "task": 512, "memory": 2048, "execution": 2048, "tools": 1024, "safety": 512, } class ContextBuilder: def __init__( self, memory_interface=None, tool_registry=None, config: dict[str, Any] | None = None, ) -> None: self._memory = memory_interface self._tool_registry = tool_registry self._config = config or {} self._max_tokens = self._config.get("max_context_tokens", 8192) self._budgets = self._config.get("context_budgets", DEFAULT_BUDGETS) self._reserve_pct = self._config.get("reserve_for_generation_pct", 25) def build( self, task: UserTask, checkpoint: TaskCheckpoint | None = None, query: str | None = None, ) -> dict[str, Any]: task_summary = task.input search_query = query or task_summary session_id = task.session_id memory_context = [] if self._memory: memory_context = self._retrieve_memory(search_query, session_id=session_id) budgets = self._calculate_budgets() reserved = self._reserve_for_generation() system_budget = budgets.get("system", 512) task_budget = budgets.get("task", 512) safety_budget = budgets.get("safety", 512) memory_budget = budgets.get("memory", 2048) truncated_memory = self._truncate_memory( memory_context, memory_budget ) # Get session history for follow-up context session_history = self._get_session_history(session_id) context = { "system_prompt": "", "task_summary": task_summary[:task_budget], "task_context": task.context, "memory_context": truncated_memory, "session_history": session_history, "execution_context": checkpoint.model_dump() if checkpoint else {}, "tool_context": self._get_tool_context(), "safety_context": {}, "constraints": { "budgets": budgets, "reserved_for_generation": reserved, "original_memory_count": len(memory_context), "truncated_memory_count": len(truncated_memory), }, } return context def _get_tool_context(self) -> list[dict[str, Any]]: """Expose available tools to orchestrator.""" if not self._tool_registry: return [] tools = [] for name in self._tool_registry.list_names(): tool = self._tool_registry.get(name) tools.append({ "name": name, "description": getattr(tool, "description", ""), }) return tools def _calculate_budgets(self) -> dict[str, int]: return dict(self._budgets) def _reserve_for_generation(self) -> int: return int(self._max_tokens * self._reserve_pct / 100) def _retrieve_memory( self, query: str, session_id: str | None = None, top_k: int = 5, ) -> list[dict[str, Any]]: if not self._memory: return [] try: results = self._memory.search(query, top_k=top_k, session_id=session_id) return [ { "id": entry.id, "text": entry.text, "kind": entry.kind, "source": entry.source, "weight": entry.weight, "score": score, } for entry, score in results ] except Exception as e: logger.warning(f"Memory retrieval failed: {e}") return [] def _get_session_history(self, session_id: str | None = None) -> list[dict[str, Any]]: """Get previous task summaries from the same session for context.""" if not self._memory or not session_id: return [] try: # Get recent entries from same session entries = self._memory.get_by_session(session_id, limit=5) # Filter to only task summaries summaries = [ { "id": entry.id, "text": entry.text, "kind": entry.kind, "source": entry.source, "weight": entry.weight, } for entry in entries if entry.kind in ("summary", "tool_result") ] return summaries except Exception as e: logger.warning(f"Session history retrieval failed: {e}") return [] def _truncate_memory( self, memory_context: list[dict[str, Any]], budget: int, ) -> list[dict[str, Any]]: if not memory_context: return [] estimated_per_entry = 50 max_entries = max(budget // estimated_per_entry, 1) if len(memory_context) > max_entries: return memory_context[:max_entries] return memory_context def estimate_tokens(self, text: str) -> int: if not text: return 0 return len(text.split()) * 4 // 3