#!/usr/bin/env python3 """ RSS Reader Tool - инструмент для чтения RSS/Atom лент. Бот может использовать этот инструмент автономно для получения новостей из подписанных лент пользователя. """ import sys import json import logging import sqlite3 import subprocess import os import re from pathlib import Path from datetime import datetime, timedelta from typing import List, Dict, Any, Optional from email.utils import parsedate_to_datetime from bot.tools import BaseTool, ToolResult, register_tool logger = logging.getLogger(__name__) class RSSTool(BaseTool): """Инструмент для работы с RSS лентами.""" name = "rss_tool" description = "Чтение RSS/Atom новостных лент. Управление подписками, получение новостей, дайджесты." category = "news" def __init__(self, db_path: str = None): self.db_path = Path(db_path) if db_path else Path(__file__).parent.parent.parent / "rss.db" self.lock_file = Path("/tmp/rss_fetch.lock") self.fetch_interval_minutes = 5 def _init_db(self): """Инициализировать БД.""" conn = sqlite3.connect(self.db_path) c = conn.cursor() c.execute(''' CREATE TABLE IF NOT EXISTS feeds ( id INTEGER PRIMARY KEY AUTOINCREMENT, url TEXT NOT NULL UNIQUE, title TEXT, last_fetched DATETIME, created_at DATETIME DEFAULT CURRENT_TIMESTAMP ) ''') c.execute(''' CREATE TABLE IF NOT EXISTS news ( id INTEGER PRIMARY KEY AUTOINCREMENT, feed_id INTEGER NOT NULL, guid TEXT NOT NULL, pub_date DATETIME, title TEXT, description TEXT, content TEXT, link TEXT, digest_flag INTEGER DEFAULT 0, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (feed_id) REFERENCES feeds(id), UNIQUE(feed_id, guid) ) ''') c.execute('CREATE INDEX IF NOT EXISTS idx_news_feed ON news(feed_id)') c.execute('CREATE INDEX IF NOT EXISTS idx_news_date ON news(pub_date)') conn.commit() conn.close() def _parse_feed(self, xml_content: str) -> List[Dict]: """Парсить RSS/Atom XML.""" items = [] # Remove CDATA markers xml = re.sub(r'', '', xml) # Find all items xml_items = re.findall(r']*>(.*?)', xml, re.DOTALL) for item in xml_items: # Title title_match = re.search(r'(.*?)', item, re.DOTALL) title = title_match.group(1).strip()[:500] if title_match else "" # GUID guid_match = re.search(r']*>(.*?)', item, re.DOTALL) guid = guid_match.group(1).strip() if guid_match else "" # Link link_match = re.search(r'(.*?)', item, re.DOTALL) link = link_match.group(1).strip() if link_match else "" # PubDate pub_match = re.search(r'(.*?)', item, re.DOTALL) pub = pub_match.group(1).strip() if pub_match else "" if not guid and link: guid = link if title and guid: items.append({'title': title, 'link': link, 'guid': guid, 'pub': pub}) return items def _insert_news(self, feed_id: int, title: str, link: str, guid: str, pub: str): """Вставить новость в БД.""" pdate = None if pub: try: dt = parsedate_to_datetime(pub) pdate = dt.strftime('%Y-%m-%d %H:%M:%S') except: pass if not pdate: pdate = datetime.now().strftime('%Y-%m-%d %H:%M:%S') if not link: link = guid conn = sqlite3.connect(self.db_path) c = conn.cursor() c.execute(''' INSERT OR IGNORE INTO news (feed_id, guid, pub_date, title, link) VALUES (?, ?, ?, ?, ?) ''', (feed_id, guid, pdate, title, link)) conn.commit() conn.close() async def fetch(self) -> ToolResult: """Получить свежие новости из всех лент.""" self._init_db() # Lock if self.lock_file.exists(): logger.warning("Другой fetch уже выполняется") return ToolResult( success=False, error="Другой процесс fetch уже выполняется", metadata={'status': 'locked'} ) with open(self.lock_file, 'w') as f: f.write(str(os.getpid())) try: total = 0 conn = sqlite3.connect(self.db_path) c = conn.cursor() c.execute("SELECT id, url FROM feeds") feeds = c.fetchall() conn.close() for feed_id, url in feeds: # Check last fetch conn = sqlite3.connect(self.db_path) c = conn.cursor() c.execute("SELECT last_fetched FROM feeds WHERE id = ?", (feed_id,)) row = c.fetchone() conn.close() if row and row[0]: last = datetime.strptime(row[0], '%Y-%m-%d %H:%M:%S') mins = (datetime.now() - last).total_seconds() / 60 if mins < self.fetch_interval_minutes: logger.info(f"Пропуск {url} ({int(mins)} мин назад)") continue logger.info(f"Получение: {url}") result = subprocess.run( ['curl', '-sL', '-m', '30', '-A', 'Mozilla/5.0', url], capture_output=True ) if result.returncode == 0 and result.stdout: # Декодируем с обработкой ошибок кодировки try: content = result.stdout.decode('utf-8', errors='ignore') except Exception: content = result.stdout.decode('latin-1', errors='ignore') count = 0 for item in self._parse_feed(content): self._insert_news(feed_id, item['title'], item['link'], item['guid'], item['pub']) count += 1 if count > 0: logger.info(f"Добавлено {count} элементов") total += count # Update last_fetched conn = sqlite3.connect(self.db_path) c = conn.cursor() c.execute("UPDATE feeds SET last_fetched = datetime('now') WHERE id = ?", (feed_id,)) conn.commit() conn.close() return ToolResult( success=True, data={'total_new_items': total}, metadata={'status': 'completed', 'action': 'fetch'} ) finally: self.lock_file.unlink(missing_ok=True) async def list_news(self, limit: int = 20, feed_id: Optional[int] = None, search: Optional[str] = None, undigested_only: bool = False) -> ToolResult: """Получить список новостей.""" self._init_db() conditions = ["1=1"] params = [] if feed_id: conditions.append(f"feed_id = ?") params.append(feed_id) if search: conditions.append(f"title LIKE ?") params.append(f"%{search}%") if undigested_only: conditions.append("digest_flag = 0") query = f""" SELECT id, feed_id, title, pub_date, link, digest_flag FROM news WHERE {' AND '.join(conditions)} ORDER BY created_at DESC, id DESC LIMIT ? """ params.append(limit) conn = sqlite3.connect(self.db_path) c = conn.cursor() c.execute(query, params) rows = c.fetchall() conn.close() news_list = [] for row in rows: news_list.append({ 'id': row[0], 'feed_id': row[1], 'title': row[2], 'pub_date': row[3], 'link': row[4], 'digest_flag': bool(row[5]) }) return ToolResult( success=True, data=news_list, metadata={'count': len(news_list), 'limit': limit, 'action': 'list'} ) async def add_feed(self, url: str, title: Optional[str] = None) -> ToolResult: """Добавить RSS ленту.""" self._init_db() conn = sqlite3.connect(self.db_path) c = conn.cursor() try: c.execute("INSERT INTO feeds (url, title) VALUES (?, ?)", (url, title or url)) conn.commit() return ToolResult( success=True, data={'url': url, 'title': title}, metadata={'status': 'added', 'action': 'add_feed'} ) except sqlite3.IntegrityError: return ToolResult( success=False, error=f"Лента уже существует: {url}" ) finally: conn.close() async def list_feeds(self) -> ToolResult: """Получить список всех лент.""" self._init_db() conn = sqlite3.connect(self.db_path) c = conn.cursor() c.execute("SELECT id, url, title, last_fetched, created_at FROM feeds ORDER BY id") rows = c.fetchall() conn.close() feeds = [] for row in rows: feeds.append({ 'id': row[0], 'url': row[1], 'title': row[2], 'last_fetched': row[3], 'created_at': row[4] }) return ToolResult( success=True, data=feeds, metadata={'count': len(feeds), 'action': 'list_feeds'} ) async def mark_digest(self, news_id: int) -> ToolResult: """Отметить новость как прочитанную (в дайджесте).""" self._init_db() conn = sqlite3.connect(self.db_path) c = conn.cursor() c.execute("UPDATE news SET digest_flag=1 WHERE id=?", (news_id,)) if c.rowcount == 0: conn.close() return ToolResult( success=False, error=f"Новость не найдена: {news_id}" ) conn.commit() conn.close() return ToolResult( success=True, data={'id': news_id}, metadata={'status': 'marked'} ) async def execute(self, action: str = "list", **kwargs) -> ToolResult: """ Выполнить действие с RSS. Args: action: Действие - fetch, list, add_feed, list_feeds, mark_digest kwargs: Дополнительные аргументы для действия """ actions = { 'fetch': self.fetch, 'list': lambda: self.list_news( limit=kwargs.get('limit', 20), feed_id=kwargs.get('feed_id'), search=kwargs.get('search'), undigested_only=kwargs.get('undigested_only', False) ), 'add_feed': lambda: self.add_feed( url=kwargs.get('url'), title=kwargs.get('title') ), 'list_feeds': self.list_feeds, 'mark_digest': lambda: self.mark_digest(news_id=kwargs.get('news_id')) } if action not in actions: return ToolResult( success=False, error=f"Неизвестное действие: {action}. Доступные: {list(actions.keys())}" ) logger.info(f"RSS действие: {action} с аргументами: {kwargs}") return await actions[action]() # Автоматическая регистрация при импорте @register_tool class RSSToolAuto(RSSTool): """Авто-регистрируемая версия RSSTool.""" pass