#!/usr/bin/env python3 """ RSS Reader - Based on sebsu/RSS-reader-in-bash parsing logic """ import sys import sqlite3 import subprocess import os from datetime import datetime DB_FILE = "rss.db" LOCK_FILE = "fetch.lock" FETCH_INTERVAL_MINUTES = 5 def log_info(msg): print(f"\033[0;32m[INFO]\033[0m {msg}") def log_warn(msg): print(f"\033[1;33m[WARN]\033[0m {msg}", file=sys.stderr) def log_error(msg): print(f"\033[0;31m[ERROR]\033[0m {msg}", file=sys.stderr) def check_deps(): for cmd in ['sqlite3', 'curl']: if not subprocess.call(['which', cmd], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) == 0: log_error(f"Missing: {cmd}") sys.exit(1) def init_db(): conn = sqlite3.connect(DB_FILE) c = conn.cursor() c.execute(''' CREATE TABLE IF NOT EXISTS feeds ( id INTEGER PRIMARY KEY AUTOINCREMENT, url TEXT NOT NULL UNIQUE, title TEXT, last_fetched DATETIME, created_at DATETIME DEFAULT CURRENT_TIMESTAMP ) ''') c.execute(''' CREATE TABLE IF NOT EXISTS news ( id INTEGER PRIMARY KEY AUTOINCREMENT, feed_id INTEGER NOT NULL, guid TEXT NOT NULL, pub_date DATETIME, title TEXT, description TEXT, content TEXT, link TEXT, digest_flag INTEGER DEFAULT 0, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (feed_id) REFERENCES feeds(id), UNIQUE(feed_id, guid) ) ''') c.execute('CREATE INDEX IF NOT EXISTS idx_news_feed ON news(feed_id)') c.execute('CREATE INDEX IF NOT EXISTS idx_news_date ON news(pub_date)') conn.commit() conn.close() def parse_feed(xml_content): """Parse RSS/Atom and yield items""" import re # Remove CDATA markers xml = re.sub(r'', '', xml) # Find all items items = re.findall(r']*>(.*?)', xml, re.DOTALL) for item in items: # Title title_match = re.search(r'(.*?)', item, re.DOTALL) title = title_match.group(1).strip()[:500] if title_match else "" # GUID guid_match = re.search(r']*>(.*?)', item, re.DOTALL) guid = guid_match.group(1).strip() if guid_match else "" # Link link_match = re.search(r'(.*?)', item, re.DOTALL) link = link_match.group(1).strip() if link_match else "" # PubDate pub_match = re.search(r'(.*?)', item, re.DOTALL) pub = pub_match.group(1).strip() if pub_match else "" if not guid and link: guid = link if title and guid: yield {'title': title, 'link': link, 'guid': guid, 'pub': pub} def insert_news(feed_id, title, link, guid, pub): """Insert news item into DB""" from email.utils import parsedate_to_datetime # Parse date pdate = None if pub: try: dt = parsedate_to_datetime(pub) pdate = dt.strftime('%Y-%m-%d %H:%M:%S') except: pass if not pdate: pdate = datetime.now().strftime('%Y-%m-%d %H:%M:%S') if not link: link = guid conn = sqlite3.connect(DB_FILE) c = conn.cursor() c.execute(''' INSERT OR IGNORE INTO news (feed_id, guid, pub_date, title, link) VALUES (?, ?, ?, ?, ?) ''', (feed_id, guid, pdate, title, link)) conn.commit() conn.close() def cmd_fetch(): check_deps() init_db() # Lock if os.path.exists(LOCK_FILE): log_warn("Another fetch running, skipping...") return with open(LOCK_FILE, 'w') as f: f.write(str(os.getpid())) try: log_info("Fetching feeds...") total = 0 conn = sqlite3.connect(DB_FILE) c = conn.cursor() c.execute("SELECT id, url FROM feeds") feeds = c.fetchall() conn.close() for feed_id, url in feeds: # Check last fetch conn = sqlite3.connect(DB_FILE) c = conn.cursor() c.execute("SELECT last_fetched FROM feeds WHERE id = ?", (feed_id,)) row = c.fetchone() conn.close() if row and row[0]: last = datetime.strptime(row[0], '%Y-%m-%d %H:%M:%S') mins = (datetime.now() - last).total_seconds() / 60 if mins < FETCH_INTERVAL_MINUTES: log_info(f"Skipping {url} ({int(mins)} min ago)") continue log_info(f"Fetching: {url}") import subprocess result = subprocess.run( ['curl', '-sL', '-m', '30', '-A', 'Mozilla/5.0', url], capture_output=True, text=True ) if result.returncode == 0 and result.stdout: count = 0 for item in parse_feed(result.stdout): insert_news(feed_id, item['title'], item['link'], item['guid'], item['pub']) count += 1 if count > 0: log_info(f"Added {count} items") total += count # Update last_fetched conn = sqlite3.connect(DB_FILE) c = conn.cursor() c.execute("UPDATE feeds SET last_fetched = datetime('now') WHERE id = ?", (feed_id,)) conn.commit() conn.close() log_info(f"Total new items: {total}") finally: os.remove(LOCK_FILE) def cmd_list(args): check_deps() init_db() limit = 20 sort = "pub_date" order = "DESC" feed_filter = "" from_date = "" to_date = "" search = "" digested = "" format_type = "plain" i = 0 while i < len(args): if args[i] in ['-n', '--limit']: limit = args[i+1] i += 2 elif args[i] == '--sort': sort = args[i+1] i += 2 elif args[i] == '--order': order = args[i+1] i += 2 elif args[i] == '--feed': feed_filter = f"AND n.feed_id={args[i+1]}" i += 2 elif args[i] == '--from': from_date = f"AND n.pub_date>='{args[i+1]}'" i += 2 elif args[i] == '--to': to_date = f"AND n.pub_date<='{args[i+1]}'" i += 2 elif args[i] == '--search': search = f"AND n.title LIKE '%{args[i+1]}%'" i += 2 elif args[i] == '--digested': digested = "AND n.digest_flag=1" i += 1 elif args[i] == '--undigested': digested = "AND n.digest_flag=0" i += 1 elif args[i] == '--format': format_type = args[i+1] i += 2 else: i += 1 order_by = "ORDER BY n.pub_date DESC" if sort == "title": order_by = f"ORDER BY n.title {order}" query = f""" SELECT n.id, n.feed_id, n.title, n.pub_date, n.link, n.digest_flag FROM news n WHERE 1=1 {feed_filter} {from_date} {to_date} {search} {digested} {order_by} LIMIT {limit} """ conn = sqlite3.connect(DB_FILE) c = conn.cursor() c.execute(query) rows = c.fetchall() conn.close() if format_type == "json": import json result = [] for row in rows: result.append({ 'id': row[0], 'feed_id': row[1], 'title': row[2], 'pub_date': row[3], 'link': row[4], 'digest_flag': row[5] }) print(json.dumps(result, ensure_ascii=False, indent=2)) elif format_type == "csv": print("id,feed_id,title,pub_date,link,digest_flag") for row in rows: print(",".join([str(x) for x in row])) else: for row in rows: print(f"{row[0]}\t{row[1]}\t{row[2][:50]}\t{row[3]}\t{row[4][:50]}\t{row[5]}") def cmd_digest(news_id): check_deps() init_db() if not news_id: log_error("Missing ID") return conn = sqlite3.connect(DB_FILE) c = conn.cursor() c.execute("UPDATE news SET digest_flag=1 WHERE id=?", (news_id,)) if c.rowcount == 0: log_error(f"Not found: {news_id}") else: log_info(f"Marked {news_id} as digested") conn.commit() conn.close() def cmd_clean(days): check_deps() init_db() if not days: log_error("Missing days") return conn = sqlite3.connect(DB_FILE) c = conn.cursor() c.execute("DELETE FROM news WHERE pub_date < datetime('now', '-{} days')".format(days)) deleted = c.rowcount conn.commit() conn.close() log_info(f"Deleted {deleted} items") def show_help(): print(""" RSS Reader - Based on sebsu/RSS-reader-in-bash Usage: rss_reader.py [CMD] [OPTIONS] Commands: (default) Run --fetch --fetch Fetch all feeds --list List news --digest ID Mark as digested --clean N Delete older than N days Options: -n, --limit N Limit (default: 20) --sort FIELD date|title --order DIR asc|desc --feed ID Filter by feed ID --from DATE From date --to DATE To date --search WORD Search in title --digested Only digested --undigested Only undigested --format FMT plain|json|csv Examples: rss_reader.py # Fetch all rss_reader.py --list -n 10 # List 10 items rss_reader.py --digest 123 # Mark 123 rss_reader.py --clean 30 # Delete old DB: rss.db """) if __name__ == "__main__": if len(sys.argv) == 1 or sys.argv[1] in ['-h', '--help']: show_help() elif sys.argv[1] == '--fetch': cmd_fetch() elif sys.argv[1] == '--list': cmd_list(sys.argv[2:]) elif sys.argv[1] == '--digest': cmd_digest(sys.argv[2] if len(sys.argv) > 2 else None) elif sys.argv[1] == '--clean': cmd_clean(sys.argv[2] if len(sys.argv) > 2 else None) else: log_error(f"Unknown: {sys.argv[1]}") show_help()