Add structured response feedback

This commit is contained in:
mirivlad 2026-05-10 23:57:50 +08:00
parent 2ba18e0b35
commit 77c2e37b95
4 changed files with 367 additions and 35 deletions

View File

@ -13,6 +13,12 @@ class CriticFeedbackRequest(BaseModel):
feedback: str feedback: str
task_id: str | None = None task_id: str | None = None
session_id: str | None = None session_id: str | None = None
feedback_type: str | None = None
severity: str | None = None
correction: str | None = None
remember: bool = True
retry: bool = False
assistant_answer: str | None = None
correctness_override: float | None = None correctness_override: float | None = None
usefulness_override: float | None = None usefulness_override: float | None = None
safety_override: float | None = None safety_override: float | None = None
@ -87,6 +93,12 @@ def critic_feedback(request: CriticFeedbackRequest) -> dict[str, object]:
feedback=request.feedback, feedback=request.feedback,
task_id=request.task_id, task_id=request.task_id,
session_id=request.session_id, session_id=request.session_id,
feedback_type=request.feedback_type,
severity=request.severity,
correction=request.correction,
remember=request.remember,
retry=request.retry,
assistant_answer=request.assistant_answer,
correctness_override=request.correctness_override, correctness_override=request.correctness_override,
usefulness_override=request.usefulness_override, usefulness_override=request.usefulness_override,
safety_override=request.safety_override, safety_override=request.safety_override,

View File

@ -68,6 +68,14 @@
font: inherit; font: inherit;
cursor: pointer; cursor: pointer;
} }
button.secondary {
background: #ffffff;
color: var(--accent);
border: 1px solid var(--border);
}
button.danger {
background: #b42318;
}
.messages, .events { .messages, .events {
display: grid; display: grid;
gap: 12px; gap: 12px;
@ -95,6 +103,48 @@
gap: 10px; gap: 10px;
margin-top: 12px; margin-top: 12px;
} }
.feedback-actions {
display: flex;
gap: 8px;
flex-wrap: wrap;
margin-top: 10px;
}
dialog {
border: 1px solid var(--border);
border-radius: 8px;
padding: 0;
width: min(560px, calc(100vw - 32px));
color: var(--text);
}
dialog::backdrop {
background: rgba(20, 18, 15, 0.4);
}
.modal-body {
display: grid;
gap: 12px;
padding: 18px;
background: var(--panel);
}
select {
width: 100%;
border: 1px solid var(--border);
background: #fff;
border-radius: 12px;
padding: 10px;
font: inherit;
}
label {
display: grid;
gap: 6px;
}
label.inline {
display: flex;
align-items: center;
gap: 8px;
}
label.inline input {
width: auto;
}
@media (max-width: 860px) { @media (max-width: 860px) {
.layout { grid-template-columns: 1fr; } .layout { grid-template-columns: 1fr; }
} }
@ -117,6 +167,53 @@
<div class="events" id="events"></div> <div class="events" id="events"></div>
</aside> </aside>
</div> </div>
<dialog id="feedbackDialog">
<form method="dialog" class="modal-body" id="feedbackForm">
<strong>Что было неверно?</strong>
<label>
Тип ошибки
<select id="feedbackType">
<option value="misunderstood_task">Неправильно понял задачу</option>
<option value="wrong_tool">Выбрал не тот инструмент</option>
<option value="wrong_command">Выполнил не ту команду</option>
<option value="should_have_checked">Ответил без проверки</option>
<option value="hallucination">Выдумал факт</option>
<option value="incomplete">Неполный ответ</option>
<option value="unsafe">Опасное действие</option>
<option value="bad_format">Плохой формат ответа</option>
<option value="other">Другое</option>
</select>
</label>
<label>
Критичность
<select id="feedbackSeverity">
<option value="minor">Мелкая ошибка</option>
<option value="major" selected>Существенная ошибка</option>
<option value="critical">Критическая ошибка</option>
</select>
</label>
<label>
Комментарий
<textarea id="feedbackText" placeholder="Что именно было неверно?"></textarea>
</label>
<label>
Как должно было быть
<textarea id="feedbackCorrection" placeholder="Корректировка или желаемое поведение"></textarea>
</label>
<label class="inline">
<input type="checkbox" id="feedbackRemember" checked />
Запомнить для похожих задач
</label>
<label class="inline">
<input type="checkbox" id="feedbackRetry" />
Исправить ответ сейчас
</label>
<div>
<button id="submitFeedbackBtn" value="submit">Отправить</button>
<button class="secondary" value="cancel">Отмена</button>
</div>
</form>
</dialog>
</main> </main>
<script> <script>
const messages = document.getElementById("messages"); const messages = document.getElementById("messages");
@ -130,6 +227,17 @@
let activePermissionBubble = null; let activePermissionBubble = null;
let activeSecretBubble = null; let activeSecretBubble = null;
let activePasswordBubble = null; let activePasswordBubble = null;
let currentTaskId = null;
let currentSessionId = "web-session";
let pendingFeedback = null;
const feedbackDialog = document.getElementById("feedbackDialog");
const feedbackForm = document.getElementById("feedbackForm");
const feedbackType = document.getElementById("feedbackType");
const feedbackSeverity = document.getElementById("feedbackSeverity");
const feedbackText = document.getElementById("feedbackText");
const feedbackCorrection = document.getElementById("feedbackCorrection");
const feedbackRemember = document.getElementById("feedbackRemember");
const feedbackRetry = document.getElementById("feedbackRetry");
function addBubble(title, body) { function addBubble(title, body) {
const el = document.createElement("div"); const el = document.createElement("div");
@ -137,6 +245,7 @@
el.innerHTML = `<strong>${title}</strong><div>${body}</div>`; el.innerHTML = `<strong>${title}</strong><div>${body}</div>`;
messages.appendChild(el); messages.appendChild(el);
messages.scrollTop = messages.scrollHeight; messages.scrollTop = messages.scrollHeight;
return el;
} }
function escapeHtml(value) { function escapeHtml(value) {
@ -147,11 +256,47 @@
} }
function addSystemMessage(title, text) { function addSystemMessage(title, text) {
addBubble(title, `<div>${escapeHtml(text)}</div>`); return addBubble(title, `<div>${escapeHtml(text)}</div>`);
} }
function addJsonBubble(title, data) { function addJsonBubble(title, data) {
addBubble(title, `<pre>${escapeHtml(JSON.stringify(data, null, 2))}</pre>`); return addBubble(title, `<pre>${escapeHtml(JSON.stringify(data, null, 2))}</pre>`);
}
function addFeedbackControls(bubble, answerText) {
if (!currentTaskId || !bubble) return;
const actions = document.createElement("div");
actions.className = "feedback-actions";
actions.innerHTML = `
<button class="secondary" data-kind="correct">Верно</button>
<button class="danger" data-kind="wrong">Неверно</button>
`;
actions.querySelector('[data-kind="correct"]').addEventListener("click", async () => {
const data = await submitFeedback({
feedback_type: "correct",
severity: "minor",
feedback: "User marked response as correct.",
correction: "",
remember: true,
retry: false,
assistant_answer: answerText
});
if (data?.status === "ok") {
actions.remove();
addSystemMessage("Feedback", "Оценка сохранена.");
}
});
actions.querySelector('[data-kind="wrong"]').addEventListener("click", () => {
pendingFeedback = { answerText, actions };
feedbackText.value = "";
feedbackCorrection.value = "";
feedbackType.value = "misunderstood_task";
feedbackSeverity.value = "major";
feedbackRemember.checked = true;
feedbackRetry.checked = false;
feedbackDialog.showModal();
});
bubble.appendChild(actions);
} }
function renderRuntimeResult(result, status) { function renderRuntimeResult(result, status) {
@ -180,13 +325,15 @@
return; return;
} }
if (result.message && !result.step_results) { if (result.message && !result.step_results) {
addSystemMessage("Runtime", result.message); const bubble = addSystemMessage("Runtime", result.message);
addFeedbackControls(bubble, result.message);
} }
if (result.step_results && Array.isArray(result.step_results)) { if (result.step_results && Array.isArray(result.step_results)) {
for (const step of result.step_results) { for (const step of result.step_results) {
const toolResult = step.result?.result || step.result; const toolResult = step.result?.result || step.result;
if (toolResult && toolResult.output) { if (toolResult && toolResult.output) {
addBubble("💻", escapeHtml(toolResult.output)); const bubble = addBubble("💻", escapeHtml(toolResult.output));
addFeedbackControls(bubble, String(toolResult.output));
} else if (toolResult && toolResult.error) { } else if (toolResult && toolResult.error) {
addSystemMessage("❌", toolResult.error); addSystemMessage("❌", toolResult.error);
} }
@ -194,10 +341,12 @@
return; return;
} }
if (typeof result.output === "string") { if (typeof result.output === "string") {
addBubble("Runtime", `<pre>${escapeHtml(result.output)}</pre>`); const bubble = addBubble("Runtime", `<pre>${escapeHtml(result.output)}</pre>`);
addFeedbackControls(bubble, result.output);
return; return;
} }
addJsonBubble("Runtime", result); const bubble = addJsonBubble("Runtime", result);
addFeedbackControls(bubble, JSON.stringify(result));
} }
function addEvent(event) { function addEvent(event) {
@ -224,7 +373,8 @@
if (directive.type === "respond") { if (directive.type === "respond") {
const text = directive.payload?.text || directive.payload?.message || ""; const text = directive.payload?.text || directive.payload?.message || "";
if (text) { if (text) {
addBubble("🤖", escapeHtml(text)); const bubble = addBubble("🤖", escapeHtml(text));
addFeedbackControls(bubble, text);
} }
} }
} }
@ -235,7 +385,8 @@
for (const step of execResult.step_results) { for (const step of execResult.step_results) {
const toolResult = step.result?.result || step.result; const toolResult = step.result?.result || step.result;
if (toolResult && toolResult.output) { if (toolResult && toolResult.output) {
addBubble("💻", escapeHtml(toolResult.output)); const bubble = addBubble("💻", escapeHtml(toolResult.output));
addFeedbackControls(bubble, String(toolResult.output));
} }
} }
} }
@ -458,10 +609,11 @@
} }
async function sendTask() { async function sendTask() {
const taskId = "web-" + Date.now();
const body = { const body = {
input: promptEl.value || "browser task", input: promptEl.value || "browser task",
task_id: "web-" + Date.now(), task_id: taskId,
session_id: "web-session", session_id: currentSessionId,
context: {} context: {}
}; };
if (!promptEl.value.trim()) return; if (!promptEl.value.trim()) return;
@ -483,6 +635,8 @@
lastPermissionRequest = null; lastPermissionRequest = null;
lastSecretRequest = null; lastSecretRequest = null;
lastPasswordRequest = null; lastPasswordRequest = null;
currentTaskId = data.task_id || taskId;
currentSessionId = body.session_id;
clearPermissionControls(); clearPermissionControls();
clearSecretControls(); clearSecretControls();
clearPasswordControls(); clearPasswordControls();
@ -504,6 +658,63 @@
}; };
} }
async function submitFeedback(payload) {
if (!currentTaskId) {
addSystemMessage("Feedback", "Нет активной задачи для оценки.");
return;
}
const response = await fetch("/critic/feedback", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
task_id: currentTaskId,
session_id: currentSessionId,
...payload
})
});
const data = await response.json();
if (data.retry_result) {
events.innerHTML = "";
seenEvents.clear();
if (Array.isArray(data.retry_result.events)) {
data.retry_result.events.forEach(addEvent);
}
currentTaskId = data.retry_result.task_id || currentTaskId;
renderRuntimeResult(data.retry_result.result, data.retry_result.status);
}
if (data.status !== "ok") {
addSystemMessage("Feedback", data.message || "Не удалось сохранить feedback.");
}
return data;
}
feedbackForm.addEventListener("submit", async (event) => {
event.preventDefault();
const submitter = event.submitter;
if (submitter && submitter.value === "cancel") {
feedbackDialog.close();
return;
}
const feedback = feedbackText.value.trim() || "User marked response as incorrect.";
const data = await submitFeedback({
feedback_type: feedbackType.value,
severity: feedbackSeverity.value,
feedback,
correction: feedbackCorrection.value.trim(),
remember: feedbackRemember.checked,
retry: feedbackRetry.checked,
assistant_answer: pendingFeedback?.answerText || ""
});
if (pendingFeedback?.actions) {
pendingFeedback.actions.remove();
}
pendingFeedback = null;
feedbackDialog.close();
if (data?.status === "ok") {
addSystemMessage("Feedback", "Feedback отправлен.");
}
});
sendBtn.addEventListener("click", sendTask); sendBtn.addEventListener("click", sendTask);
</script> </script>
</body> </body>

View File

@ -394,13 +394,16 @@ class RuntimeController:
feedback: str, feedback: str,
task_id: str | None = None, task_id: str | None = None,
session_id: str | None = None, session_id: str | None = None,
feedback_type: str | None = None,
severity: str | None = None,
correction: str | None = None,
remember: bool = True,
retry: bool = False,
assistant_answer: str | None = None,
correctness_override: float | None = None, correctness_override: float | None = None,
usefulness_override: float | None = None, usefulness_override: float | None = None,
safety_override: float | None = None, safety_override: float | None = None,
) -> dict[str, object]: ) -> dict[str, object]:
if not self._memory_interface:
return {"status": "error", "message": "Memory not available"}
target_task_id = task_id target_task_id = task_id
target_session_id = session_id target_session_id = session_id
@ -410,10 +413,9 @@ class RuntimeController:
"message": "Either task_id or session_id must be provided", "message": "Either task_id or session_id must be provided",
} }
if not target_session_id and target_task_id: state = self.task_state_store.get_task(target_task_id) if target_task_id else None
state = self.task_state_store.get_task(target_task_id) if not target_session_id and state:
if state: target_session_id = state.get("session_id")
target_session_id = state.get("session_id")
if not target_task_id and target_session_id: if not target_task_id and target_session_id:
recent_tasks = self.task_state_store.get_session_tasks(target_session_id, limit=1) recent_tasks = self.task_state_store.get_session_tasks(target_session_id, limit=1)
@ -426,8 +428,27 @@ class RuntimeController:
final_weight = max(min_weight, min(max_weight, user_weight)) final_weight = max(min_weight, min(max_weight, user_weight))
task_input = state.get("task_input") if state else None
last_directive = state.get("last_directive") if state else None
feedback_type = feedback_type or "other"
severity = severity or "major"
lesson = self._build_feedback_lesson(
feedback_type=feedback_type,
severity=severity,
feedback=feedback,
correction=correction,
task_input=task_input,
)
metadata = { metadata = {
"feedback_text": feedback, "feedback_text": feedback,
"feedback_type": feedback_type,
"severity": severity,
"correction": correction,
"assistant_answer": assistant_answer,
"task_input": task_input,
"last_directive": last_directive,
"overrides": { "overrides": {
"correctness": correctness_override, "correctness": correctness_override,
"usefulness": usefulness_override, "usefulness": usefulness_override,
@ -436,7 +457,7 @@ class RuntimeController:
"source": "user", "source": "user",
} }
feedback_text = f"User feedback: {feedback}" feedback_text = lesson
if correctness_override is not None: if correctness_override is not None:
feedback_text += f" | Correctness corrected to: {correctness_override}" feedback_text += f" | Correctness corrected to: {correctness_override}"
if usefulness_override is not None: if usefulness_override is not None:
@ -444,21 +465,91 @@ class RuntimeController:
if safety_override is not None: if safety_override is not None:
feedback_text += f" | Safety corrected to: {safety_override}" feedback_text += f" | Safety corrected to: {safety_override}"
retry_result = None
stored = False
store_error = None
try: try:
self._memory_interface.insert( if remember and self._memory_interface:
text=feedback_text, self._memory_interface.insert(
kind="critique", text=feedback_text,
source="user", kind="critique",
task_id=target_task_id, source="user",
session_id=target_session_id, task_id=target_task_id,
weight=final_weight, session_id=target_session_id,
metadata=metadata, weight=final_weight,
) metadata=metadata,
return { )
"status": "ok", stored = True
"message": "Feedback saved", elif remember and not self._memory_interface:
"task_id": target_task_id, store_error = "Memory not available"
"session_id": target_session_id,
}
except Exception as e: except Exception as e:
return {"status": "error", "message": str(e)} store_error = str(e)
if retry and task_input:
retry_input = self._build_retry_input(
task_input=task_input,
feedback=feedback,
feedback_type=feedback_type,
correction=correction,
)
retry_task = UserTask(
session_id=target_session_id or "feedback-retry",
input=retry_input,
context={
"feedback_retry": True,
"original_task_id": target_task_id,
"feedback_type": feedback_type,
"severity": severity,
"correction": correction,
},
)
retry_result = self.handle_task(retry_task)
status = "ok" if stored or not remember else "error"
return {
"status": status,
"message": "Feedback saved" if stored else (store_error or "Feedback accepted"),
"stored": stored,
"task_id": target_task_id,
"session_id": target_session_id,
"lesson": lesson,
"retry_result": retry_result,
}
def _build_feedback_lesson(
self,
feedback_type: str,
severity: str,
feedback: str,
correction: str | None,
task_input: str | None,
) -> str:
parts = [
"User critique lesson.",
f"Error type: {feedback_type}.",
f"Severity: {severity}.",
]
if task_input:
parts.append(f"Original task: {task_input}")
if feedback:
parts.append(f"What was wrong: {feedback}")
if correction:
parts.append(f"Preferred correction: {correction}")
return " | ".join(parts)
def _build_retry_input(
self,
task_input: str,
feedback: str,
feedback_type: str,
correction: str | None,
) -> str:
retry_input = (
f"Повтори задачу с учетом обратной связи.\n"
f"Исходная задача: {task_input}\n"
f"Тип ошибки: {feedback_type}\n"
f"Что было неверно: {feedback}\n"
)
if correction:
retry_input += f"Как должно быть: {correction}\n"
return retry_input

View File

@ -1,5 +1,6 @@
from app.api.server import chat, health, resolve_permission, resolve_secret from app.api.server import chat, critic_feedback, health, resolve_permission, resolve_secret
from app.core.permission_resolution import PermissionResolutionRequest, SecretResolutionRequest from app.core.permission_resolution import PermissionResolutionRequest, SecretResolutionRequest
from app.api.server import CriticFeedbackRequest
from app.core.contracts import UserTask from app.core.contracts import UserTask
@ -25,3 +26,20 @@ def test_resolve_permission_handler_allows_completion() -> None:
def test_resolve_secret_handler_requires_pending_request() -> None: def test_resolve_secret_handler_requires_pending_request() -> None:
body = resolve_secret(SecretResolutionRequest(task_id="missing", secret="x")) body = resolve_secret(SecretResolutionRequest(task_id="missing", secret="x"))
assert body["status"] == "failed" assert body["status"] == "failed"
def test_structured_feedback_can_be_accepted_without_memory_write() -> None:
initial = chat(UserTask(input="feedback target"))
body = critic_feedback(
CriticFeedbackRequest(
task_id=initial["task_id"],
feedback="wrong answer",
feedback_type="hallucination",
severity="major",
correction="check first",
remember=False,
)
)
assert body["status"] == "ok"
assert body["stored"] is False
assert "hallucination" in body["lesson"]