From 96c62b4c3ec6f36a89fc84387a5b6e730ccfbe6f Mon Sep 17 00:00:00 2001 From: mirivlad Date: Wed, 18 Mar 2026 20:24:15 +0800 Subject: [PATCH] =?UTF-8?q?Fix:=20=D0=BF=D0=B5=D1=80=D0=B5=D0=BA=D0=BB?= =?UTF-8?q?=D1=8E=D1=87=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BD=D0=B0=20Whisper?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20=D1=80=D0=B0=D1=81=D0=BF=D0=BE=D0=B7?= =?UTF-8?q?=D0=BD=D0=B0=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D1=80=D0=B5=D1=87?= =?UTF-8?q?=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Whisper tiny модель вместо Vosk (весит ~100MB вместо 2GB) - Модель скачивается автоматически при первом запуске - Исправлена ошибка async/синхронного вызова распознавания - Обновлены настройки в .env Co-authored-by: Qwen-Coder --- src/speech/speech.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/speech/speech.py b/src/speech/speech.py index e4f8749..30cdf65 100644 --- a/src/speech/speech.py +++ b/src/speech/speech.py @@ -28,15 +28,14 @@ class SpeechRecognizer: model_path = os.path.expanduser("~/.vosk/models/vosk-model-ru") if not os.path.exists(model_path): - logger.warning(f"Модель Vosk не найдена по пути {model_path}") - return + logger.warning(f"Модель Vosk не найдена по пути {model_path}. Переключаюсь на Whisper") + self.model_name = "whisper" - self.model = Model(model_path) - - elif self.model_name == "whisper": + if self.model_name == "whisper": from faster_whisper import WhisperModel - self.model = WhisperModel("small", device="cpu", compute_type="int8") + logger.warning("Первый запуск Whisper может занять время - скачивается модель (~100MB)") + self.model = WhisperModel("tiny", device="cpu", compute_type="int8") except Exception as e: logger.error(f"Ошибка загрузки модели распознавания: {e}") @@ -69,12 +68,12 @@ class SpeechRecognizer: async def recognize(self, audio_path: str) -> Optional[str]: if not self.enabled or not self.model: return None - + try: if self.model_name == "vosk": return await self._recognize_vosk(audio_path) elif self.model_name == "whisper": - return await self._recognize_whisper(audio_path) + return self._recognize_whisper(audio_path) except Exception as e: logger.error(f"Ошибка распознавания: {e}") return None @@ -98,14 +97,14 @@ class SpeechRecognizer: result = json.loads(rec.FinalResult()) return result.get("text", "") - async def _recognize_whisper(self, audio_path: str) -> Optional[str]: + def _recognize_whisper(self, audio_path: str) -> Optional[str]: segments, info = self.model.transcribe(audio_path, language="ru") - + text_parts = [] - async for segment in segments: + for segment in segments: text_parts.append(segment.text) - - return " ".join(text_parts) + + return " ".join(text_parts).strip() def toggle(self, enabled: bool): self.enabled = enabled