From 96c62b4c3ec6f36a89fc84387a5b6e730ccfbe6f Mon Sep 17 00:00:00 2001
From: mirivlad <mirivlad@mirv.top>
Date: Wed, 18 Mar 2026 20:24:15 +0800
Subject: [PATCH] =?UTF-8?q?Fix:=20=D0=BF=D0=B5=D1=80=D0=B5=D0=BA=D0=BB?=
 =?UTF-8?q?=D1=8E=D1=87=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BD=D0=B0=20Whisper?=
 =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20=D1=80=D0=B0=D1=81=D0=BF=D0=BE=D0=B7?=
 =?UTF-8?q?=D0=BD=D0=B0=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D1=80=D0=B5=D1=87?=
 =?UTF-8?q?=D0=B8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Whisper tiny модель вместо Vosk (весит ~100MB вместо 2GB)
- Модель скачивается автоматически при первом запуске
- Исправлена ошибка async/синхронного вызова распознавания
- Обновлены настройки в .env

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
---
 src/speech/speech.py | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/src/speech/speech.py b/src/speech/speech.py
index e4f8749..30cdf65 100644
--- a/src/speech/speech.py
+++ b/src/speech/speech.py
@@ -28,15 +28,14 @@ class SpeechRecognizer:
 
                 model_path = os.path.expanduser("~/.vosk/models/vosk-model-ru")
                 if not os.path.exists(model_path):
-                    logger.warning(f"Модель Vosk не найдена по пути {model_path}")
-                    return
+                    logger.warning(f"Модель Vosk не найдена по пути {model_path}. Переключаюсь на Whisper")
+                    self.model_name = "whisper"
 
-                self.model = Model(model_path)
-
-            elif self.model_name == "whisper":
+            if self.model_name == "whisper":
                 from faster_whisper import WhisperModel
 
-                self.model = WhisperModel("small", device="cpu", compute_type="int8")
+                logger.warning("Первый запуск Whisper может занять время - скачивается модель (~100MB)")
+                self.model = WhisperModel("tiny", device="cpu", compute_type="int8")
 
         except Exception as e:
             logger.error(f"Ошибка загрузки модели распознавания: {e}")
@@ -69,12 +68,12 @@ class SpeechRecognizer:
     async def recognize(self, audio_path: str) -> Optional[str]:
         if not self.enabled or not self.model:
             return None
-        
+
         try:
             if self.model_name == "vosk":
                 return await self._recognize_vosk(audio_path)
             elif self.model_name == "whisper":
-                return await self._recognize_whisper(audio_path)
+                return self._recognize_whisper(audio_path)
         except Exception as e:
             logger.error(f"Ошибка распознавания: {e}")
             return None
@@ -98,14 +97,14 @@ class SpeechRecognizer:
         result = json.loads(rec.FinalResult())
         return result.get("text", "")
     
-    async def _recognize_whisper(self, audio_path: str) -> Optional[str]:
+    def _recognize_whisper(self, audio_path: str) -> Optional[str]:
         segments, info = self.model.transcribe(audio_path, language="ru")
-        
+
         text_parts = []
-        async for segment in segments:
+        for segment in segments:
             text_parts.append(segment.text)
-        
-        return " ".join(text_parts)
+
+        return " ".join(text_parts).strip()
     
     def toggle(self, enabled: bool):
         self.enabled = enabled