diff --git a/src/speech/speech.py b/src/speech/speech.py index e4f8749..30cdf65 100644 --- a/src/speech/speech.py +++ b/src/speech/speech.py @@ -28,15 +28,14 @@ class SpeechRecognizer: model_path = os.path.expanduser("~/.vosk/models/vosk-model-ru") if not os.path.exists(model_path): - logger.warning(f"Модель Vosk не найдена по пути {model_path}") - return + logger.warning(f"Модель Vosk не найдена по пути {model_path}. Переключаюсь на Whisper") + self.model_name = "whisper" - self.model = Model(model_path) - - elif self.model_name == "whisper": + if self.model_name == "whisper": from faster_whisper import WhisperModel - self.model = WhisperModel("small", device="cpu", compute_type="int8") + logger.warning("Первый запуск Whisper может занять время - скачивается модель (~100MB)") + self.model = WhisperModel("tiny", device="cpu", compute_type="int8") except Exception as e: logger.error(f"Ошибка загрузки модели распознавания: {e}") @@ -69,12 +68,12 @@ class SpeechRecognizer: async def recognize(self, audio_path: str) -> Optional[str]: if not self.enabled or not self.model: return None - + try: if self.model_name == "vosk": return await self._recognize_vosk(audio_path) elif self.model_name == "whisper": - return await self._recognize_whisper(audio_path) + return self._recognize_whisper(audio_path) except Exception as e: logger.error(f"Ошибка распознавания: {e}") return None @@ -98,14 +97,14 @@ class SpeechRecognizer: result = json.loads(rec.FinalResult()) return result.get("text", "") - async def _recognize_whisper(self, audio_path: str) -> Optional[str]: + def _recognize_whisper(self, audio_path: str) -> Optional[str]: segments, info = self.model.transcribe(audio_path, language="ru") - + text_parts = [] - async for segment in segments: + for segment in segments: text_parts.append(segment.text) - - return " ".join(text_parts) + + return " ".join(text_parts).strip() def toggle(self, enabled: bool): self.enabled = enabled