Add: распознавание голосовых сообщений

- Добавлен обработчик голосовых сообщений (filters.VOICE) - Команда /stt on|off для включения/выключения распознавания - Голосовые конвертируются в текст через Vosk/Whisper - Распознанный текст обрабатывается как обычное сообщение - Модель загружается при старте бота Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
2026-03-18 20:01:34 +08:00 · 2026-03-18 20:01:34 +08:00 · 9d91a9eed4
parent 5779dd7b14
commit 9d91a9eed4
1 changed files with 116 additions and 3 deletions
--- a/src/bot/main.py
+++ b/src/bot/main.py
@ -13,6 +13,7 @@ from telegram import InlineKeyboardButton, InlineKeyboardMarkup
 from config.config import get_settings
 from src.tools.orchestrator import Orchestrator
 from src.tools.xray import get_xray_client
 from src.speech.speech import SpeechRecognizer
 from src.bot.states import chat_state, ChatMode, XRayState
 from src.bot.config_manager import get_selected_tool, get_selected_model, set_tool
@ -27,6 +28,7 @@ logging.getLogger("asyncssh").setLevel(logging.ERROR)
 settings = get_settings()
 orchestrator = Orchestrator()
 speech_recognizer = SpeechRecognizer()
 DANGEROUS_PATTERNS = [
    r'\bwrite\b', r'\bedit\b', r'\bcopy\b', r'\bmove\b', r'\bdelete\b',
@ -124,7 +126,8 @@ async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
        "/open - Выбрать модель OpenCode\n"
        "/mode confirm/auto - Режим подтверждения\n"
        "/forget - Очистить историю\n"
-        "/xray [email] - Добавить пользователя XRay\n\n"
+        "/xray [email] - Добавить пользователя XRay\n"
        "/stt on|off - Распознавание речи\n\n"
        f"🔧 Текущая модель: {current_tool}"
    )
    if model:
@ -297,6 +300,24 @@ async def forget_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
    await update.message.reply_text("🗑️ История чата очищена.")
 async def stt_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
    """Включение/выключение распознавания речи"""
    if not context.args:
        status = "включено" if speech_recognizer.is_enabled() else "отключено"
        await update.message.reply_text(f"🎤 Распознавание речи: {status}\n\nИспользование: /stt on | off")
        return
    arg = context.args[0].lower()
    if arg == "on":
        speech_recognizer.toggle(True)
        await update.message.reply_text("🎤 Распознавание речи включено")
    elif arg == "off":
        speech_recognizer.toggle(False)
        await update.message.reply_text("🎤 Распознавание речи отключено")
    else:
        await update.message.reply_text("Использование: /stt on | off")
 async def xray_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
    """
    Команда /xray - добавление пользователя XRay
@ -406,6 +427,93 @@ async def handle_xray_email(update: Update, context: ContextTypes.DEFAULT_TYPE):
    await process_xray_email(update, context, email)
 async def handle_voice(update: Update, context: ContextTypes.DEFAULT_TYPE):
    """Обработчик голосовых сообщений"""
    if not speech_recognizer.is_enabled():
        await update.message.reply_text(
            "🎤 Распознавание речи отключено.\n"
            "Включите командой /stt on"
        )
        return
    chat_id = update.effective_chat.id
    # Получаем файл голосового сообщения
    file = await update.message.voice.get_file()
    # Создаем временный файл для сохранения
    temp_dir = "/tmp/valera_voice"
    os.makedirs(temp_dir, exist_ok=True)
    temp_ogg = os.path.join(temp_dir, f"{chat_id}_{file.file_id}.ogg")
    try:
        # Скачиваем файл
        await file.download_to_drive(temp_ogg)
        progress_msg = await update.message.reply_text("🎤 Распознаю голосовое сообщение...")
        # Конвертируем и распознаем
        wav_path = temp_ogg.replace(".ogg", ".wav")
        # Конвертация через ffmpeg
        process = await asyncio.create_subprocess_exec(
            "ffmpeg", "-i", temp_ogg, "-ar", "16000", "-ac", "1", wav_path,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE
        )
        await process.communicate()
        # Распознавание
        text = await speech_recognizer.recognize(wav_path)
        # Удаляем временные файлы
        if os.path.exists(temp_ogg):
            os.remove(temp_ogg)
        if not text:
            await progress_msg.edit_text("❌ Не удалось распознать голосовое сообщение")
            return
        await progress_msg.delete()
        # Показываем распознанный текст
        await update.message.reply_text(f"🎤 Вы сказали:\n{text}")
        # Обрабатываем текст как обычное сообщение
        await process_text_as_message(update, context, text)
    except Exception as e:
        logger.error(f"Ошибка распознавания голоса: {e}")
        if os.path.exists(temp_ogg):
            os.remove(temp_ogg)
        await update.message.reply_text(f"❌ Ошибка распознавания: {e}")
 async def process_text_as_message(update: Update, context: ContextTypes.DEFAULT_TYPE, text: str):
    """Обработка распознанного текста как обычного сообщения"""
    chat_id = update.effective_chat.id
    mode = chat_state.get_mode(chat_id)
    tool = get_selected_tool()
    model = get_selected_model() if tool == "opencode" else None
    # Проверяем не ждем ли мы email для XRay
    if chat_state.get_xray_state(chat_id) == XRayState.WAITING_EMAIL:
        email = text.strip()
        await process_xray_email(update, context, email)
        return
    thinking_msg = await update.message.reply_text("🤔 Думаю...")
    result, success = await orchestrator.ask(text, chat_id, tool, model, yolo=True)
    text_result = result.strip() if result else ""
    if not text_result:
        text_result = "⚠️ Пустой ответ от модели."
    text_result = text_result[:4096]
    await thinking_msg.edit_text(text_result, parse_mode="Markdown")
 async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
    prompt = update.message.text
    chat_id = update.effective_chat.id
@ -439,6 +547,9 @@ def main():
    application = builder.build()
    # Загружаем модель распознавания речи
    speech_recognizer.load_model()
    application.add_handler(CommandHandler("start", start))
    application.add_handler(CommandHandler("help", help_command))
    application.add_handler(CommandHandler("mode", mode_command))
@ -446,7 +557,9 @@ def main():
    application.add_handler(CommandHandler("open", open_command))
    application.add_handler(CommandHandler("forget", forget_command))
    application.add_handler(CommandHandler("xray", xray_command))
    application.add_handler(CommandHandler("stt", stt_command))
    application.add_handler(CallbackQueryHandler(confirm_callback))
    application.add_handler(MessageHandler(filters.VOICE, handle_voice))
    application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
    application.run_polling(allowed_updates=Update.ALL_TYPES)