Add structured response feedback
This commit is contained in:
parent
2ba18e0b35
commit
77c2e37b95
|
|
@ -13,6 +13,12 @@ class CriticFeedbackRequest(BaseModel):
|
|||
feedback: str
|
||||
task_id: str | None = None
|
||||
session_id: str | None = None
|
||||
feedback_type: str | None = None
|
||||
severity: str | None = None
|
||||
correction: str | None = None
|
||||
remember: bool = True
|
||||
retry: bool = False
|
||||
assistant_answer: str | None = None
|
||||
correctness_override: float | None = None
|
||||
usefulness_override: float | None = None
|
||||
safety_override: float | None = None
|
||||
|
|
@ -87,6 +93,12 @@ def critic_feedback(request: CriticFeedbackRequest) -> dict[str, object]:
|
|||
feedback=request.feedback,
|
||||
task_id=request.task_id,
|
||||
session_id=request.session_id,
|
||||
feedback_type=request.feedback_type,
|
||||
severity=request.severity,
|
||||
correction=request.correction,
|
||||
remember=request.remember,
|
||||
retry=request.retry,
|
||||
assistant_answer=request.assistant_answer,
|
||||
correctness_override=request.correctness_override,
|
||||
usefulness_override=request.usefulness_override,
|
||||
safety_override=request.safety_override,
|
||||
|
|
|
|||
|
|
@ -68,6 +68,14 @@
|
|||
font: inherit;
|
||||
cursor: pointer;
|
||||
}
|
||||
button.secondary {
|
||||
background: #ffffff;
|
||||
color: var(--accent);
|
||||
border: 1px solid var(--border);
|
||||
}
|
||||
button.danger {
|
||||
background: #b42318;
|
||||
}
|
||||
.messages, .events {
|
||||
display: grid;
|
||||
gap: 12px;
|
||||
|
|
@ -95,6 +103,48 @@
|
|||
gap: 10px;
|
||||
margin-top: 12px;
|
||||
}
|
||||
.feedback-actions {
|
||||
display: flex;
|
||||
gap: 8px;
|
||||
flex-wrap: wrap;
|
||||
margin-top: 10px;
|
||||
}
|
||||
dialog {
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 8px;
|
||||
padding: 0;
|
||||
width: min(560px, calc(100vw - 32px));
|
||||
color: var(--text);
|
||||
}
|
||||
dialog::backdrop {
|
||||
background: rgba(20, 18, 15, 0.4);
|
||||
}
|
||||
.modal-body {
|
||||
display: grid;
|
||||
gap: 12px;
|
||||
padding: 18px;
|
||||
background: var(--panel);
|
||||
}
|
||||
select {
|
||||
width: 100%;
|
||||
border: 1px solid var(--border);
|
||||
background: #fff;
|
||||
border-radius: 12px;
|
||||
padding: 10px;
|
||||
font: inherit;
|
||||
}
|
||||
label {
|
||||
display: grid;
|
||||
gap: 6px;
|
||||
}
|
||||
label.inline {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
}
|
||||
label.inline input {
|
||||
width: auto;
|
||||
}
|
||||
@media (max-width: 860px) {
|
||||
.layout { grid-template-columns: 1fr; }
|
||||
}
|
||||
|
|
@ -117,6 +167,53 @@
|
|||
<div class="events" id="events"></div>
|
||||
</aside>
|
||||
</div>
|
||||
<dialog id="feedbackDialog">
|
||||
<form method="dialog" class="modal-body" id="feedbackForm">
|
||||
<strong>Что было неверно?</strong>
|
||||
<label>
|
||||
Тип ошибки
|
||||
<select id="feedbackType">
|
||||
<option value="misunderstood_task">Неправильно понял задачу</option>
|
||||
<option value="wrong_tool">Выбрал не тот инструмент</option>
|
||||
<option value="wrong_command">Выполнил не ту команду</option>
|
||||
<option value="should_have_checked">Ответил без проверки</option>
|
||||
<option value="hallucination">Выдумал факт</option>
|
||||
<option value="incomplete">Неполный ответ</option>
|
||||
<option value="unsafe">Опасное действие</option>
|
||||
<option value="bad_format">Плохой формат ответа</option>
|
||||
<option value="other">Другое</option>
|
||||
</select>
|
||||
</label>
|
||||
<label>
|
||||
Критичность
|
||||
<select id="feedbackSeverity">
|
||||
<option value="minor">Мелкая ошибка</option>
|
||||
<option value="major" selected>Существенная ошибка</option>
|
||||
<option value="critical">Критическая ошибка</option>
|
||||
</select>
|
||||
</label>
|
||||
<label>
|
||||
Комментарий
|
||||
<textarea id="feedbackText" placeholder="Что именно было неверно?"></textarea>
|
||||
</label>
|
||||
<label>
|
||||
Как должно было быть
|
||||
<textarea id="feedbackCorrection" placeholder="Корректировка или желаемое поведение"></textarea>
|
||||
</label>
|
||||
<label class="inline">
|
||||
<input type="checkbox" id="feedbackRemember" checked />
|
||||
Запомнить для похожих задач
|
||||
</label>
|
||||
<label class="inline">
|
||||
<input type="checkbox" id="feedbackRetry" />
|
||||
Исправить ответ сейчас
|
||||
</label>
|
||||
<div>
|
||||
<button id="submitFeedbackBtn" value="submit">Отправить</button>
|
||||
<button class="secondary" value="cancel">Отмена</button>
|
||||
</div>
|
||||
</form>
|
||||
</dialog>
|
||||
</main>
|
||||
<script>
|
||||
const messages = document.getElementById("messages");
|
||||
|
|
@ -130,6 +227,17 @@
|
|||
let activePermissionBubble = null;
|
||||
let activeSecretBubble = null;
|
||||
let activePasswordBubble = null;
|
||||
let currentTaskId = null;
|
||||
let currentSessionId = "web-session";
|
||||
let pendingFeedback = null;
|
||||
const feedbackDialog = document.getElementById("feedbackDialog");
|
||||
const feedbackForm = document.getElementById("feedbackForm");
|
||||
const feedbackType = document.getElementById("feedbackType");
|
||||
const feedbackSeverity = document.getElementById("feedbackSeverity");
|
||||
const feedbackText = document.getElementById("feedbackText");
|
||||
const feedbackCorrection = document.getElementById("feedbackCorrection");
|
||||
const feedbackRemember = document.getElementById("feedbackRemember");
|
||||
const feedbackRetry = document.getElementById("feedbackRetry");
|
||||
|
||||
function addBubble(title, body) {
|
||||
const el = document.createElement("div");
|
||||
|
|
@ -137,6 +245,7 @@
|
|||
el.innerHTML = `<strong>${title}</strong><div>${body}</div>`;
|
||||
messages.appendChild(el);
|
||||
messages.scrollTop = messages.scrollHeight;
|
||||
return el;
|
||||
}
|
||||
|
||||
function escapeHtml(value) {
|
||||
|
|
@ -147,11 +256,47 @@
|
|||
}
|
||||
|
||||
function addSystemMessage(title, text) {
|
||||
addBubble(title, `<div>${escapeHtml(text)}</div>`);
|
||||
return addBubble(title, `<div>${escapeHtml(text)}</div>`);
|
||||
}
|
||||
|
||||
function addJsonBubble(title, data) {
|
||||
addBubble(title, `<pre>${escapeHtml(JSON.stringify(data, null, 2))}</pre>`);
|
||||
return addBubble(title, `<pre>${escapeHtml(JSON.stringify(data, null, 2))}</pre>`);
|
||||
}
|
||||
|
||||
function addFeedbackControls(bubble, answerText) {
|
||||
if (!currentTaskId || !bubble) return;
|
||||
const actions = document.createElement("div");
|
||||
actions.className = "feedback-actions";
|
||||
actions.innerHTML = `
|
||||
<button class="secondary" data-kind="correct">Верно</button>
|
||||
<button class="danger" data-kind="wrong">Неверно</button>
|
||||
`;
|
||||
actions.querySelector('[data-kind="correct"]').addEventListener("click", async () => {
|
||||
const data = await submitFeedback({
|
||||
feedback_type: "correct",
|
||||
severity: "minor",
|
||||
feedback: "User marked response as correct.",
|
||||
correction: "",
|
||||
remember: true,
|
||||
retry: false,
|
||||
assistant_answer: answerText
|
||||
});
|
||||
if (data?.status === "ok") {
|
||||
actions.remove();
|
||||
addSystemMessage("Feedback", "Оценка сохранена.");
|
||||
}
|
||||
});
|
||||
actions.querySelector('[data-kind="wrong"]').addEventListener("click", () => {
|
||||
pendingFeedback = { answerText, actions };
|
||||
feedbackText.value = "";
|
||||
feedbackCorrection.value = "";
|
||||
feedbackType.value = "misunderstood_task";
|
||||
feedbackSeverity.value = "major";
|
||||
feedbackRemember.checked = true;
|
||||
feedbackRetry.checked = false;
|
||||
feedbackDialog.showModal();
|
||||
});
|
||||
bubble.appendChild(actions);
|
||||
}
|
||||
|
||||
function renderRuntimeResult(result, status) {
|
||||
|
|
@ -180,13 +325,15 @@
|
|||
return;
|
||||
}
|
||||
if (result.message && !result.step_results) {
|
||||
addSystemMessage("Runtime", result.message);
|
||||
const bubble = addSystemMessage("Runtime", result.message);
|
||||
addFeedbackControls(bubble, result.message);
|
||||
}
|
||||
if (result.step_results && Array.isArray(result.step_results)) {
|
||||
for (const step of result.step_results) {
|
||||
const toolResult = step.result?.result || step.result;
|
||||
if (toolResult && toolResult.output) {
|
||||
addBubble("💻", escapeHtml(toolResult.output));
|
||||
const bubble = addBubble("💻", escapeHtml(toolResult.output));
|
||||
addFeedbackControls(bubble, String(toolResult.output));
|
||||
} else if (toolResult && toolResult.error) {
|
||||
addSystemMessage("❌", toolResult.error);
|
||||
}
|
||||
|
|
@ -194,10 +341,12 @@
|
|||
return;
|
||||
}
|
||||
if (typeof result.output === "string") {
|
||||
addBubble("Runtime", `<pre>${escapeHtml(result.output)}</pre>`);
|
||||
const bubble = addBubble("Runtime", `<pre>${escapeHtml(result.output)}</pre>`);
|
||||
addFeedbackControls(bubble, result.output);
|
||||
return;
|
||||
}
|
||||
addJsonBubble("Runtime", result);
|
||||
const bubble = addJsonBubble("Runtime", result);
|
||||
addFeedbackControls(bubble, JSON.stringify(result));
|
||||
}
|
||||
|
||||
function addEvent(event) {
|
||||
|
|
@ -224,7 +373,8 @@
|
|||
if (directive.type === "respond") {
|
||||
const text = directive.payload?.text || directive.payload?.message || "";
|
||||
if (text) {
|
||||
addBubble("🤖", escapeHtml(text));
|
||||
const bubble = addBubble("🤖", escapeHtml(text));
|
||||
addFeedbackControls(bubble, text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -235,7 +385,8 @@
|
|||
for (const step of execResult.step_results) {
|
||||
const toolResult = step.result?.result || step.result;
|
||||
if (toolResult && toolResult.output) {
|
||||
addBubble("💻", escapeHtml(toolResult.output));
|
||||
const bubble = addBubble("💻", escapeHtml(toolResult.output));
|
||||
addFeedbackControls(bubble, String(toolResult.output));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -458,10 +609,11 @@
|
|||
}
|
||||
|
||||
async function sendTask() {
|
||||
const taskId = "web-" + Date.now();
|
||||
const body = {
|
||||
input: promptEl.value || "browser task",
|
||||
task_id: "web-" + Date.now(),
|
||||
session_id: "web-session",
|
||||
task_id: taskId,
|
||||
session_id: currentSessionId,
|
||||
context: {}
|
||||
};
|
||||
if (!promptEl.value.trim()) return;
|
||||
|
|
@ -483,6 +635,8 @@
|
|||
lastPermissionRequest = null;
|
||||
lastSecretRequest = null;
|
||||
lastPasswordRequest = null;
|
||||
currentTaskId = data.task_id || taskId;
|
||||
currentSessionId = body.session_id;
|
||||
clearPermissionControls();
|
||||
clearSecretControls();
|
||||
clearPasswordControls();
|
||||
|
|
@ -504,6 +658,63 @@
|
|||
};
|
||||
}
|
||||
|
||||
async function submitFeedback(payload) {
|
||||
if (!currentTaskId) {
|
||||
addSystemMessage("Feedback", "Нет активной задачи для оценки.");
|
||||
return;
|
||||
}
|
||||
const response = await fetch("/critic/feedback", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
task_id: currentTaskId,
|
||||
session_id: currentSessionId,
|
||||
...payload
|
||||
})
|
||||
});
|
||||
const data = await response.json();
|
||||
if (data.retry_result) {
|
||||
events.innerHTML = "";
|
||||
seenEvents.clear();
|
||||
if (Array.isArray(data.retry_result.events)) {
|
||||
data.retry_result.events.forEach(addEvent);
|
||||
}
|
||||
currentTaskId = data.retry_result.task_id || currentTaskId;
|
||||
renderRuntimeResult(data.retry_result.result, data.retry_result.status);
|
||||
}
|
||||
if (data.status !== "ok") {
|
||||
addSystemMessage("Feedback", data.message || "Не удалось сохранить feedback.");
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
feedbackForm.addEventListener("submit", async (event) => {
|
||||
event.preventDefault();
|
||||
const submitter = event.submitter;
|
||||
if (submitter && submitter.value === "cancel") {
|
||||
feedbackDialog.close();
|
||||
return;
|
||||
}
|
||||
const feedback = feedbackText.value.trim() || "User marked response as incorrect.";
|
||||
const data = await submitFeedback({
|
||||
feedback_type: feedbackType.value,
|
||||
severity: feedbackSeverity.value,
|
||||
feedback,
|
||||
correction: feedbackCorrection.value.trim(),
|
||||
remember: feedbackRemember.checked,
|
||||
retry: feedbackRetry.checked,
|
||||
assistant_answer: pendingFeedback?.answerText || ""
|
||||
});
|
||||
if (pendingFeedback?.actions) {
|
||||
pendingFeedback.actions.remove();
|
||||
}
|
||||
pendingFeedback = null;
|
||||
feedbackDialog.close();
|
||||
if (data?.status === "ok") {
|
||||
addSystemMessage("Feedback", "Feedback отправлен.");
|
||||
}
|
||||
});
|
||||
|
||||
sendBtn.addEventListener("click", sendTask);
|
||||
</script>
|
||||
</body>
|
||||
|
|
|
|||
|
|
@ -394,13 +394,16 @@ class RuntimeController:
|
|||
feedback: str,
|
||||
task_id: str | None = None,
|
||||
session_id: str | None = None,
|
||||
feedback_type: str | None = None,
|
||||
severity: str | None = None,
|
||||
correction: str | None = None,
|
||||
remember: bool = True,
|
||||
retry: bool = False,
|
||||
assistant_answer: str | None = None,
|
||||
correctness_override: float | None = None,
|
||||
usefulness_override: float | None = None,
|
||||
safety_override: float | None = None,
|
||||
) -> dict[str, object]:
|
||||
if not self._memory_interface:
|
||||
return {"status": "error", "message": "Memory not available"}
|
||||
|
||||
target_task_id = task_id
|
||||
target_session_id = session_id
|
||||
|
||||
|
|
@ -410,9 +413,8 @@ class RuntimeController:
|
|||
"message": "Either task_id or session_id must be provided",
|
||||
}
|
||||
|
||||
if not target_session_id and target_task_id:
|
||||
state = self.task_state_store.get_task(target_task_id)
|
||||
if state:
|
||||
state = self.task_state_store.get_task(target_task_id) if target_task_id else None
|
||||
if not target_session_id and state:
|
||||
target_session_id = state.get("session_id")
|
||||
|
||||
if not target_task_id and target_session_id:
|
||||
|
|
@ -426,8 +428,27 @@ class RuntimeController:
|
|||
|
||||
final_weight = max(min_weight, min(max_weight, user_weight))
|
||||
|
||||
task_input = state.get("task_input") if state else None
|
||||
last_directive = state.get("last_directive") if state else None
|
||||
feedback_type = feedback_type or "other"
|
||||
severity = severity or "major"
|
||||
|
||||
lesson = self._build_feedback_lesson(
|
||||
feedback_type=feedback_type,
|
||||
severity=severity,
|
||||
feedback=feedback,
|
||||
correction=correction,
|
||||
task_input=task_input,
|
||||
)
|
||||
|
||||
metadata = {
|
||||
"feedback_text": feedback,
|
||||
"feedback_type": feedback_type,
|
||||
"severity": severity,
|
||||
"correction": correction,
|
||||
"assistant_answer": assistant_answer,
|
||||
"task_input": task_input,
|
||||
"last_directive": last_directive,
|
||||
"overrides": {
|
||||
"correctness": correctness_override,
|
||||
"usefulness": usefulness_override,
|
||||
|
|
@ -436,7 +457,7 @@ class RuntimeController:
|
|||
"source": "user",
|
||||
}
|
||||
|
||||
feedback_text = f"User feedback: {feedback}"
|
||||
feedback_text = lesson
|
||||
if correctness_override is not None:
|
||||
feedback_text += f" | Correctness corrected to: {correctness_override}"
|
||||
if usefulness_override is not None:
|
||||
|
|
@ -444,7 +465,11 @@ class RuntimeController:
|
|||
if safety_override is not None:
|
||||
feedback_text += f" | Safety corrected to: {safety_override}"
|
||||
|
||||
retry_result = None
|
||||
stored = False
|
||||
store_error = None
|
||||
try:
|
||||
if remember and self._memory_interface:
|
||||
self._memory_interface.insert(
|
||||
text=feedback_text,
|
||||
kind="critique",
|
||||
|
|
@ -454,11 +479,77 @@ class RuntimeController:
|
|||
weight=final_weight,
|
||||
metadata=metadata,
|
||||
)
|
||||
stored = True
|
||||
elif remember and not self._memory_interface:
|
||||
store_error = "Memory not available"
|
||||
except Exception as e:
|
||||
store_error = str(e)
|
||||
|
||||
if retry and task_input:
|
||||
retry_input = self._build_retry_input(
|
||||
task_input=task_input,
|
||||
feedback=feedback,
|
||||
feedback_type=feedback_type,
|
||||
correction=correction,
|
||||
)
|
||||
retry_task = UserTask(
|
||||
session_id=target_session_id or "feedback-retry",
|
||||
input=retry_input,
|
||||
context={
|
||||
"feedback_retry": True,
|
||||
"original_task_id": target_task_id,
|
||||
"feedback_type": feedback_type,
|
||||
"severity": severity,
|
||||
"correction": correction,
|
||||
},
|
||||
)
|
||||
retry_result = self.handle_task(retry_task)
|
||||
|
||||
status = "ok" if stored or not remember else "error"
|
||||
return {
|
||||
"status": "ok",
|
||||
"message": "Feedback saved",
|
||||
"status": status,
|
||||
"message": "Feedback saved" if stored else (store_error or "Feedback accepted"),
|
||||
"stored": stored,
|
||||
"task_id": target_task_id,
|
||||
"session_id": target_session_id,
|
||||
"lesson": lesson,
|
||||
"retry_result": retry_result,
|
||||
}
|
||||
except Exception as e:
|
||||
return {"status": "error", "message": str(e)}
|
||||
|
||||
def _build_feedback_lesson(
|
||||
self,
|
||||
feedback_type: str,
|
||||
severity: str,
|
||||
feedback: str,
|
||||
correction: str | None,
|
||||
task_input: str | None,
|
||||
) -> str:
|
||||
parts = [
|
||||
"User critique lesson.",
|
||||
f"Error type: {feedback_type}.",
|
||||
f"Severity: {severity}.",
|
||||
]
|
||||
if task_input:
|
||||
parts.append(f"Original task: {task_input}")
|
||||
if feedback:
|
||||
parts.append(f"What was wrong: {feedback}")
|
||||
if correction:
|
||||
parts.append(f"Preferred correction: {correction}")
|
||||
return " | ".join(parts)
|
||||
|
||||
def _build_retry_input(
|
||||
self,
|
||||
task_input: str,
|
||||
feedback: str,
|
||||
feedback_type: str,
|
||||
correction: str | None,
|
||||
) -> str:
|
||||
retry_input = (
|
||||
f"Повтори задачу с учетом обратной связи.\n"
|
||||
f"Исходная задача: {task_input}\n"
|
||||
f"Тип ошибки: {feedback_type}\n"
|
||||
f"Что было неверно: {feedback}\n"
|
||||
)
|
||||
if correction:
|
||||
retry_input += f"Как должно быть: {correction}\n"
|
||||
return retry_input
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from app.api.server import chat, health, resolve_permission, resolve_secret
|
||||
from app.api.server import chat, critic_feedback, health, resolve_permission, resolve_secret
|
||||
from app.core.permission_resolution import PermissionResolutionRequest, SecretResolutionRequest
|
||||
from app.api.server import CriticFeedbackRequest
|
||||
from app.core.contracts import UserTask
|
||||
|
||||
|
||||
|
|
@ -25,3 +26,20 @@ def test_resolve_permission_handler_allows_completion() -> None:
|
|||
def test_resolve_secret_handler_requires_pending_request() -> None:
|
||||
body = resolve_secret(SecretResolutionRequest(task_id="missing", secret="x"))
|
||||
assert body["status"] == "failed"
|
||||
|
||||
|
||||
def test_structured_feedback_can_be_accepted_without_memory_write() -> None:
|
||||
initial = chat(UserTask(input="feedback target"))
|
||||
body = critic_feedback(
|
||||
CriticFeedbackRequest(
|
||||
task_id=initial["task_id"],
|
||||
feedback="wrong answer",
|
||||
feedback_type="hallucination",
|
||||
severity="major",
|
||||
correction="check first",
|
||||
remember=False,
|
||||
)
|
||||
)
|
||||
assert body["status"] == "ok"
|
||||
assert body["stored"] is False
|
||||
assert "hallucination" in body["lesson"]
|
||||
|
|
|
|||
Loading…
Reference in New Issue