362 lines
12 KiB
Python
362 lines
12 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
RSS Reader Tool - инструмент для чтения RSS/Atom лент.
|
||
|
||
Бот может использовать этот инструмент автономно для получения новостей
|
||
из подписанных лент пользователя.
|
||
"""
|
||
|
||
import sys
|
||
import json
|
||
import logging
|
||
import sqlite3
|
||
import subprocess
|
||
import os
|
||
import re
|
||
from pathlib import Path
|
||
from datetime import datetime, timedelta
|
||
from typing import List, Dict, Any, Optional
|
||
from email.utils import parsedate_to_datetime
|
||
|
||
from bot.tools import BaseTool, ToolResult, register_tool
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class RSSTool(BaseTool):
|
||
"""Инструмент для работы с RSS лентами."""
|
||
|
||
name = "rss_reader"
|
||
description = "Чтение RSS/Atom новостных лент. Управление подписками, получение новостей, дайджесты."
|
||
category = "news"
|
||
|
||
def __init__(self, db_path: str = None):
|
||
self.db_path = Path(db_path) if db_path else Path(__file__).parent.parent.parent / "rss.db"
|
||
self.lock_file = Path("/tmp/rss_fetch.lock")
|
||
self.fetch_interval_minutes = 5
|
||
|
||
def _init_db(self):
|
||
"""Инициализировать БД."""
|
||
conn = sqlite3.connect(self.db_path)
|
||
c = conn.cursor()
|
||
c.execute('''
|
||
CREATE TABLE IF NOT EXISTS feeds (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
url TEXT NOT NULL UNIQUE,
|
||
title TEXT,
|
||
last_fetched DATETIME,
|
||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||
)
|
||
''')
|
||
c.execute('''
|
||
CREATE TABLE IF NOT EXISTS news (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
feed_id INTEGER NOT NULL,
|
||
guid TEXT NOT NULL,
|
||
pub_date DATETIME,
|
||
title TEXT,
|
||
description TEXT,
|
||
content TEXT,
|
||
link TEXT,
|
||
digest_flag INTEGER DEFAULT 0,
|
||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||
FOREIGN KEY (feed_id) REFERENCES feeds(id),
|
||
UNIQUE(feed_id, guid)
|
||
)
|
||
''')
|
||
c.execute('CREATE INDEX IF NOT EXISTS idx_news_feed ON news(feed_id)')
|
||
c.execute('CREATE INDEX IF NOT EXISTS idx_news_date ON news(pub_date)')
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
def _parse_feed(self, xml_content: str) -> List[Dict]:
|
||
"""Парсить RSS/Atom XML."""
|
||
items = []
|
||
|
||
# Remove CDATA markers
|
||
xml = re.sub(r'<!\[CDATA\[', '', xml_content)
|
||
xml = re.sub(r'\]\]>', '', xml)
|
||
|
||
# Find all items
|
||
xml_items = re.findall(r'<item[^>]*>(.*?)</item>', xml, re.DOTALL)
|
||
|
||
for item in xml_items:
|
||
# Title
|
||
title_match = re.search(r'<title>(.*?)</title>', item, re.DOTALL)
|
||
title = title_match.group(1).strip()[:500] if title_match else ""
|
||
|
||
# GUID
|
||
guid_match = re.search(r'<guid[^>]*>(.*?)</guid>', item, re.DOTALL)
|
||
guid = guid_match.group(1).strip() if guid_match else ""
|
||
|
||
# Link
|
||
link_match = re.search(r'<link>(.*?)</link>', item, re.DOTALL)
|
||
link = link_match.group(1).strip() if link_match else ""
|
||
|
||
# PubDate
|
||
pub_match = re.search(r'<pubDate>(.*?)</pubDate>', item, re.DOTALL)
|
||
pub = pub_match.group(1).strip() if pub_match else ""
|
||
|
||
if not guid and link:
|
||
guid = link
|
||
|
||
if title and guid:
|
||
items.append({'title': title, 'link': link, 'guid': guid, 'pub': pub})
|
||
|
||
return items
|
||
|
||
def _insert_news(self, feed_id: int, title: str, link: str, guid: str, pub: str):
|
||
"""Вставить новость в БД."""
|
||
pdate = None
|
||
if pub:
|
||
try:
|
||
dt = parsedate_to_datetime(pub)
|
||
pdate = dt.strftime('%Y-%m-%d %H:%M:%S')
|
||
except:
|
||
pass
|
||
|
||
if not pdate:
|
||
pdate = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||
|
||
if not link:
|
||
link = guid
|
||
|
||
conn = sqlite3.connect(self.db_path)
|
||
c = conn.cursor()
|
||
c.execute('''
|
||
INSERT OR IGNORE INTO news (feed_id, guid, pub_date, title, link)
|
||
VALUES (?, ?, ?, ?, ?)
|
||
''', (feed_id, guid, pdate, title, link))
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
async def fetch(self) -> ToolResult:
|
||
"""Получить свежие новости из всех лент."""
|
||
self._init_db()
|
||
|
||
# Lock
|
||
if self.lock_file.exists():
|
||
logger.warning("Другой fetch уже выполняется")
|
||
return ToolResult(
|
||
success=False,
|
||
error="Другой процесс fetch уже выполняется",
|
||
metadata={'status': 'locked'}
|
||
)
|
||
|
||
with open(self.lock_file, 'w') as f:
|
||
f.write(str(os.getpid()))
|
||
|
||
try:
|
||
total = 0
|
||
conn = sqlite3.connect(self.db_path)
|
||
c = conn.cursor()
|
||
c.execute("SELECT id, url FROM feeds")
|
||
feeds = c.fetchall()
|
||
conn.close()
|
||
|
||
for feed_id, url in feeds:
|
||
# Check last fetch
|
||
conn = sqlite3.connect(self.db_path)
|
||
c = conn.cursor()
|
||
c.execute("SELECT last_fetched FROM feeds WHERE id = ?", (feed_id,))
|
||
row = c.fetchone()
|
||
conn.close()
|
||
|
||
if row and row[0]:
|
||
last = datetime.strptime(row[0], '%Y-%m-%d %H:%M:%S')
|
||
mins = (datetime.now() - last).total_seconds() / 60
|
||
if mins < self.fetch_interval_minutes:
|
||
logger.info(f"Пропуск {url} ({int(mins)} мин назад)")
|
||
continue
|
||
|
||
logger.info(f"Получение: {url}")
|
||
|
||
result = subprocess.run(
|
||
['curl', '-sL', '-m', '30', '-A', 'Mozilla/5.0', url],
|
||
capture_output=True, text=True
|
||
)
|
||
|
||
if result.returncode == 0 and result.stdout:
|
||
count = 0
|
||
for item in self._parse_feed(result.stdout):
|
||
self._insert_news(feed_id, item['title'], item['link'], item['guid'], item['pub'])
|
||
count += 1
|
||
|
||
if count > 0:
|
||
logger.info(f"Добавлено {count} элементов")
|
||
total += count
|
||
|
||
# Update last_fetched
|
||
conn = sqlite3.connect(self.db_path)
|
||
c = conn.cursor()
|
||
c.execute("UPDATE feeds SET last_fetched = datetime('now') WHERE id = ?", (feed_id,))
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
return ToolResult(
|
||
success=True,
|
||
data={'total_new_items': total},
|
||
metadata={'status': 'completed'}
|
||
)
|
||
finally:
|
||
self.lock_file.unlink(missing_ok=True)
|
||
|
||
async def list_news(self, limit: int = 20, feed_id: Optional[int] = None,
|
||
search: Optional[str] = None, undigested_only: bool = False) -> ToolResult:
|
||
"""Получить список новостей."""
|
||
self._init_db()
|
||
|
||
conditions = ["1=1"]
|
||
params = []
|
||
|
||
if feed_id:
|
||
conditions.append(f"feed_id = ?")
|
||
params.append(feed_id)
|
||
|
||
if search:
|
||
conditions.append(f"title LIKE ?")
|
||
params.append(f"%{search}%")
|
||
|
||
if undigested_only:
|
||
conditions.append("digest_flag = 0")
|
||
|
||
query = f"""
|
||
SELECT id, feed_id, title, pub_date, link, digest_flag
|
||
FROM news WHERE {' AND '.join(conditions)}
|
||
ORDER BY pub_date DESC LIMIT ?
|
||
"""
|
||
params.append(limit)
|
||
|
||
conn = sqlite3.connect(self.db_path)
|
||
c = conn.cursor()
|
||
c.execute(query, params)
|
||
rows = c.fetchall()
|
||
conn.close()
|
||
|
||
news_list = []
|
||
for row in rows:
|
||
news_list.append({
|
||
'id': row[0],
|
||
'feed_id': row[1],
|
||
'title': row[2],
|
||
'pub_date': row[3],
|
||
'link': row[4],
|
||
'digest_flag': bool(row[5])
|
||
})
|
||
|
||
return ToolResult(
|
||
success=True,
|
||
data=news_list,
|
||
metadata={'count': len(news_list), 'limit': limit}
|
||
)
|
||
|
||
async def add_feed(self, url: str, title: Optional[str] = None) -> ToolResult:
|
||
"""Добавить RSS ленту."""
|
||
self._init_db()
|
||
|
||
conn = sqlite3.connect(self.db_path)
|
||
c = conn.cursor()
|
||
try:
|
||
c.execute("INSERT INTO feeds (url, title) VALUES (?, ?)", (url, title or url))
|
||
conn.commit()
|
||
return ToolResult(
|
||
success=True,
|
||
data={'url': url, 'title': title},
|
||
metadata={'status': 'added'}
|
||
)
|
||
except sqlite3.IntegrityError:
|
||
return ToolResult(
|
||
success=False,
|
||
error=f"Лента уже существует: {url}"
|
||
)
|
||
finally:
|
||
conn.close()
|
||
|
||
async def list_feeds(self) -> ToolResult:
|
||
"""Получить список всех лент."""
|
||
self._init_db()
|
||
|
||
conn = sqlite3.connect(self.db_path)
|
||
c = conn.cursor()
|
||
c.execute("SELECT id, url, title, last_fetched, created_at FROM feeds ORDER BY id")
|
||
rows = c.fetchall()
|
||
conn.close()
|
||
|
||
feeds = []
|
||
for row in rows:
|
||
feeds.append({
|
||
'id': row[0],
|
||
'url': row[1],
|
||
'title': row[2],
|
||
'last_fetched': row[3],
|
||
'created_at': row[4]
|
||
})
|
||
|
||
return ToolResult(
|
||
success=True,
|
||
data=feeds,
|
||
metadata={'count': len(feeds)}
|
||
)
|
||
|
||
async def mark_digest(self, news_id: int) -> ToolResult:
|
||
"""Отметить новость как прочитанную (в дайджесте)."""
|
||
self._init_db()
|
||
|
||
conn = sqlite3.connect(self.db_path)
|
||
c = conn.cursor()
|
||
c.execute("UPDATE news SET digest_flag=1 WHERE id=?", (news_id,))
|
||
if c.rowcount == 0:
|
||
conn.close()
|
||
return ToolResult(
|
||
success=False,
|
||
error=f"Новость не найдена: {news_id}"
|
||
)
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
return ToolResult(
|
||
success=True,
|
||
data={'id': news_id},
|
||
metadata={'status': 'marked'}
|
||
)
|
||
|
||
async def execute(self, action: str = "list", **kwargs) -> ToolResult:
|
||
"""
|
||
Выполнить действие с RSS.
|
||
|
||
Args:
|
||
action: Действие - fetch, list, add_feed, list_feeds, mark_digest
|
||
kwargs: Дополнительные аргументы для действия
|
||
"""
|
||
actions = {
|
||
'fetch': self.fetch,
|
||
'list': lambda: self.list_news(
|
||
limit=kwargs.get('limit', 20),
|
||
feed_id=kwargs.get('feed_id'),
|
||
search=kwargs.get('search'),
|
||
undigested_only=kwargs.get('undigested_only', False)
|
||
),
|
||
'add_feed': lambda: self.add_feed(
|
||
url=kwargs.get('url'),
|
||
title=kwargs.get('title')
|
||
),
|
||
'list_feeds': self.list_feeds,
|
||
'mark_digest': lambda: self.mark_digest(news_id=kwargs.get('news_id'))
|
||
}
|
||
|
||
if action not in actions:
|
||
return ToolResult(
|
||
success=False,
|
||
error=f"Неизвестное действие: {action}. Доступные: {list(actions.keys())}"
|
||
)
|
||
|
||
logger.info(f"RSS действие: {action} с аргументами: {kwargs}")
|
||
return await actions[action]()
|
||
|
||
|
||
# Автоматическая регистрация при импорте
|
||
@register_tool
|
||
class RSSToolAuto(RSSTool):
|
||
"""Авто-регистрируемая версия RSSTool."""
|
||
pass
|