diff --git a/tools/ddgs_search.py b/tools/ddgs_search.py deleted file mode 100755 index 56c98de..0000000 --- a/tools/ddgs_search.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python3 -""" -DDGS Search Script - Python script to perform web searches using ddgs -""" - -import sys -import json -import argparse - -def search_ddgs(query, max_results=10): - """ - Perform a search using ddgs (DuckDuckGo Search) - - Args: - query (str): Search query - max_results (int): Maximum number of results to return - - Returns: - list: List of search results with title, url, and description - """ - try: - from ddgs import DDGS - except ImportError: - print(json.dumps({"error": "ddgs library not found. Please install it using: pip install ddgs"})) - sys.exit(1) - - try: - ddgs = DDGS() - results = ddgs.text(query, max_results=max_results) - - # Format results to include only necessary fields - formatted_results = [] - for result in results: - formatted_result = { - "title": result.get("title", ""), - "href": result.get("href", ""), - "body": result.get("body", "") - } - formatted_results.append(formatted_result) - - return formatted_results - except Exception as e: - print(json.dumps({"error": str(e)})) - sys.exit(1) - -def main(): - parser = argparse.ArgumentParser(description="DDGS Search Script") - parser.add_argument("query", help="Search query") - parser.add_argument("--num-results", "-n", type=int, default=10, - help="Number of search results to return (default: 10)") - - args = parser.parse_args() - - results = search_ddgs(args.query, args.num_results) - print(json.dumps(results, ensure_ascii=False, indent=2)) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/tools/rss_reader.py b/tools/rss_reader.py deleted file mode 100755 index a840441..0000000 --- a/tools/rss_reader.py +++ /dev/null @@ -1,355 +0,0 @@ -#!/usr/bin/env python3 -""" -RSS Reader - Based on sebsu/RSS-reader-in-bash parsing logic -""" - -import sys -import sqlite3 -import subprocess -import os -from datetime import datetime - -DB_FILE = "rss.db" -LOCK_FILE = "fetch.lock" -FETCH_INTERVAL_MINUTES = 5 - -def log_info(msg): - print(f"\033[0;32m[INFO]\033[0m {msg}") - -def log_warn(msg): - print(f"\033[1;33m[WARN]\033[0m {msg}", file=sys.stderr) - -def log_error(msg): - print(f"\033[0;31m[ERROR]\033[0m {msg}", file=sys.stderr) - -def check_deps(): - for cmd in ['sqlite3', 'curl']: - if not subprocess.call(['which', cmd], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) == 0: - log_error(f"Missing: {cmd}") - sys.exit(1) - -def init_db(): - conn = sqlite3.connect(DB_FILE) - c = conn.cursor() - c.execute(''' - CREATE TABLE IF NOT EXISTS feeds ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - url TEXT NOT NULL UNIQUE, - title TEXT, - last_fetched DATETIME, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP - ) - ''') - c.execute(''' - CREATE TABLE IF NOT EXISTS news ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - feed_id INTEGER NOT NULL, - guid TEXT NOT NULL, - pub_date DATETIME, - title TEXT, - description TEXT, - content TEXT, - link TEXT, - digest_flag INTEGER DEFAULT 0, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY (feed_id) REFERENCES feeds(id), - UNIQUE(feed_id, guid) - ) - ''') - c.execute('CREATE INDEX IF NOT EXISTS idx_news_feed ON news(feed_id)') - c.execute('CREATE INDEX IF NOT EXISTS idx_news_date ON news(pub_date)') - conn.commit() - conn.close() - -def parse_feed(xml_content): - """Parse RSS/Atom and yield items""" - import re - - # Remove CDATA markers - xml = re.sub(r'', '', xml) - - # Find all items - items = re.findall(r']*>(.*?)', xml, re.DOTALL) - - for item in items: - # Title - title_match = re.search(r'(.*?)', item, re.DOTALL) - title = title_match.group(1).strip()[:500] if title_match else "" - - # GUID - guid_match = re.search(r']*>(.*?)', item, re.DOTALL) - guid = guid_match.group(1).strip() if guid_match else "" - - # Link - link_match = re.search(r'(.*?)', item, re.DOTALL) - link = link_match.group(1).strip() if link_match else "" - - # PubDate - pub_match = re.search(r'(.*?)', item, re.DOTALL) - pub = pub_match.group(1).strip() if pub_match else "" - - if not guid and link: - guid = link - - if title and guid: - yield {'title': title, 'link': link, 'guid': guid, 'pub': pub} - -def insert_news(feed_id, title, link, guid, pub): - """Insert news item into DB""" - from email.utils import parsedate_to_datetime - - # Parse date - pdate = None - if pub: - try: - dt = parsedate_to_datetime(pub) - pdate = dt.strftime('%Y-%m-%d %H:%M:%S') - except: - pass - - if not pdate: - pdate = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - - if not link: - link = guid - - conn = sqlite3.connect(DB_FILE) - c = conn.cursor() - c.execute(''' - INSERT OR IGNORE INTO news (feed_id, guid, pub_date, title, link) - VALUES (?, ?, ?, ?, ?) - ''', (feed_id, guid, pdate, title, link)) - conn.commit() - conn.close() - -def cmd_fetch(): - check_deps() - init_db() - - # Lock - if os.path.exists(LOCK_FILE): - log_warn("Another fetch running, skipping...") - return - - with open(LOCK_FILE, 'w') as f: - f.write(str(os.getpid())) - - try: - log_info("Fetching feeds...") - total = 0 - - conn = sqlite3.connect(DB_FILE) - c = conn.cursor() - c.execute("SELECT id, url FROM feeds") - feeds = c.fetchall() - conn.close() - - for feed_id, url in feeds: - # Check last fetch - conn = sqlite3.connect(DB_FILE) - c = conn.cursor() - c.execute("SELECT last_fetched FROM feeds WHERE id = ?", (feed_id,)) - row = c.fetchone() - conn.close() - - if row and row[0]: - last = datetime.strptime(row[0], '%Y-%m-%d %H:%M:%S') - mins = (datetime.now() - last).total_seconds() / 60 - if mins < FETCH_INTERVAL_MINUTES: - log_info(f"Skipping {url} ({int(mins)} min ago)") - continue - - log_info(f"Fetching: {url}") - - import subprocess - result = subprocess.run( - ['curl', '-sL', '-m', '30', '-A', 'Mozilla/5.0', url], - capture_output=True, text=True - ) - - if result.returncode == 0 and result.stdout: - count = 0 - for item in parse_feed(result.stdout): - insert_news(feed_id, item['title'], item['link'], item['guid'], item['pub']) - count += 1 - - if count > 0: - log_info(f"Added {count} items") - total += count - - # Update last_fetched - conn = sqlite3.connect(DB_FILE) - c = conn.cursor() - c.execute("UPDATE feeds SET last_fetched = datetime('now') WHERE id = ?", (feed_id,)) - conn.commit() - conn.close() - - log_info(f"Total new items: {total}") - finally: - os.remove(LOCK_FILE) - -def cmd_list(args): - check_deps() - init_db() - - limit = 20 - sort = "pub_date" - order = "DESC" - feed_filter = "" - from_date = "" - to_date = "" - search = "" - digested = "" - format_type = "plain" - - i = 0 - while i < len(args): - if args[i] in ['-n', '--limit']: - limit = args[i+1] - i += 2 - elif args[i] == '--sort': - sort = args[i+1] - i += 2 - elif args[i] == '--order': - order = args[i+1] - i += 2 - elif args[i] == '--feed': - feed_filter = f"AND n.feed_id={args[i+1]}" - i += 2 - elif args[i] == '--from': - from_date = f"AND n.pub_date>='{args[i+1]}'" - i += 2 - elif args[i] == '--to': - to_date = f"AND n.pub_date<='{args[i+1]}'" - i += 2 - elif args[i] == '--search': - search = f"AND n.title LIKE '%{args[i+1]}%'" - i += 2 - elif args[i] == '--digested': - digested = "AND n.digest_flag=1" - i += 1 - elif args[i] == '--undigested': - digested = "AND n.digest_flag=0" - i += 1 - elif args[i] == '--format': - format_type = args[i+1] - i += 2 - else: - i += 1 - - order_by = "ORDER BY n.pub_date DESC" - if sort == "title": - order_by = f"ORDER BY n.title {order}" - - query = f""" - SELECT n.id, n.feed_id, n.title, n.pub_date, n.link, n.digest_flag - FROM news n WHERE 1=1 {feed_filter} {from_date} {to_date} {search} {digested} - {order_by} LIMIT {limit} - """ - - conn = sqlite3.connect(DB_FILE) - c = conn.cursor() - c.execute(query) - rows = c.fetchall() - conn.close() - - if format_type == "json": - import json - result = [] - for row in rows: - result.append({ - 'id': row[0], 'feed_id': row[1], 'title': row[2], - 'pub_date': row[3], 'link': row[4], 'digest_flag': row[5] - }) - print(json.dumps(result, ensure_ascii=False, indent=2)) - elif format_type == "csv": - print("id,feed_id,title,pub_date,link,digest_flag") - for row in rows: - print(",".join([str(x) for x in row])) - else: - for row in rows: - print(f"{row[0]}\t{row[1]}\t{row[2][:50]}\t{row[3]}\t{row[4][:50]}\t{row[5]}") - -def cmd_digest(news_id): - check_deps() - init_db() - - if not news_id: - log_error("Missing ID") - return - - conn = sqlite3.connect(DB_FILE) - c = conn.cursor() - c.execute("UPDATE news SET digest_flag=1 WHERE id=?", (news_id,)) - if c.rowcount == 0: - log_error(f"Not found: {news_id}") - else: - log_info(f"Marked {news_id} as digested") - conn.commit() - conn.close() - -def cmd_clean(days): - check_deps() - init_db() - - if not days: - log_error("Missing days") - return - - conn = sqlite3.connect(DB_FILE) - c = conn.cursor() - c.execute("DELETE FROM news WHERE pub_date < datetime('now', '-{} days')".format(days)) - deleted = c.rowcount - conn.commit() - conn.close() - log_info(f"Deleted {deleted} items") - -def show_help(): - print(""" -RSS Reader - Based on sebsu/RSS-reader-in-bash - -Usage: rss_reader.py [CMD] [OPTIONS] - -Commands: - (default) Run --fetch - --fetch Fetch all feeds - --list List news - --digest ID Mark as digested - --clean N Delete older than N days - -Options: - -n, --limit N Limit (default: 20) - --sort FIELD date|title - --order DIR asc|desc - --feed ID Filter by feed ID - --from DATE From date - --to DATE To date - --search WORD Search in title - --digested Only digested - --undigested Only undigested - --format FMT plain|json|csv - -Examples: - rss_reader.py # Fetch all - rss_reader.py --list -n 10 # List 10 items - rss_reader.py --digest 123 # Mark 123 - rss_reader.py --clean 30 # Delete old - -DB: rss.db - """) - -if __name__ == "__main__": - if len(sys.argv) == 1 or sys.argv[1] in ['-h', '--help']: - show_help() - elif sys.argv[1] == '--fetch': - cmd_fetch() - elif sys.argv[1] == '--list': - cmd_list(sys.argv[2:]) - elif sys.argv[1] == '--digest': - cmd_digest(sys.argv[2] if len(sys.argv) > 2 else None) - elif sys.argv[1] == '--clean': - cmd_clean(sys.argv[2] if len(sys.argv) > 2 else None) - else: - log_error(f"Unknown: {sys.argv[1]}") - show_help()