Fix: переключение на Whisper для распознавания речи
- Whisper tiny модель вместо Vosk (весит ~100MB вместо 2GB) - Модель скачивается автоматически при первом запуске - Исправлена ошибка async/синхронного вызова распознавания - Обновлены настройки в .env Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
parent
9d91a9eed4
commit
96c62b4c3e
|
|
@ -28,15 +28,14 @@ class SpeechRecognizer:
|
||||||
|
|
||||||
model_path = os.path.expanduser("~/.vosk/models/vosk-model-ru")
|
model_path = os.path.expanduser("~/.vosk/models/vosk-model-ru")
|
||||||
if not os.path.exists(model_path):
|
if not os.path.exists(model_path):
|
||||||
logger.warning(f"Модель Vosk не найдена по пути {model_path}")
|
logger.warning(f"Модель Vosk не найдена по пути {model_path}. Переключаюсь на Whisper")
|
||||||
return
|
self.model_name = "whisper"
|
||||||
|
|
||||||
self.model = Model(model_path)
|
if self.model_name == "whisper":
|
||||||
|
|
||||||
elif self.model_name == "whisper":
|
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
self.model = WhisperModel("small", device="cpu", compute_type="int8")
|
logger.warning("Первый запуск Whisper может занять время - скачивается модель (~100MB)")
|
||||||
|
self.model = WhisperModel("tiny", device="cpu", compute_type="int8")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Ошибка загрузки модели распознавания: {e}")
|
logger.error(f"Ошибка загрузки модели распознавания: {e}")
|
||||||
|
|
@ -69,12 +68,12 @@ class SpeechRecognizer:
|
||||||
async def recognize(self, audio_path: str) -> Optional[str]:
|
async def recognize(self, audio_path: str) -> Optional[str]:
|
||||||
if not self.enabled or not self.model:
|
if not self.enabled or not self.model:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if self.model_name == "vosk":
|
if self.model_name == "vosk":
|
||||||
return await self._recognize_vosk(audio_path)
|
return await self._recognize_vosk(audio_path)
|
||||||
elif self.model_name == "whisper":
|
elif self.model_name == "whisper":
|
||||||
return await self._recognize_whisper(audio_path)
|
return self._recognize_whisper(audio_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Ошибка распознавания: {e}")
|
logger.error(f"Ошибка распознавания: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
@ -98,14 +97,14 @@ class SpeechRecognizer:
|
||||||
result = json.loads(rec.FinalResult())
|
result = json.loads(rec.FinalResult())
|
||||||
return result.get("text", "")
|
return result.get("text", "")
|
||||||
|
|
||||||
async def _recognize_whisper(self, audio_path: str) -> Optional[str]:
|
def _recognize_whisper(self, audio_path: str) -> Optional[str]:
|
||||||
segments, info = self.model.transcribe(audio_path, language="ru")
|
segments, info = self.model.transcribe(audio_path, language="ru")
|
||||||
|
|
||||||
text_parts = []
|
text_parts = []
|
||||||
async for segment in segments:
|
for segment in segments:
|
||||||
text_parts.append(segment.text)
|
text_parts.append(segment.text)
|
||||||
|
|
||||||
return " ".join(text_parts)
|
return " ".join(text_parts).strip()
|
||||||
|
|
||||||
def toggle(self, enabled: bool):
|
def toggle(self, enabled: bool):
|
||||||
self.enabled = enabled
|
self.enabled = enabled
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue