From a1c6f4b8f33e2b2028ca5cf3e6b1b1f632db5041 Mon Sep 17 00:00:00 2001 From: neodarz Date: Mon, 11 Mar 2019 12:31:06 +0100 Subject: Initial commit --- README.md | 1 + app.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 README.md create mode 100644 app.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..3770777 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +Simple RSS feed search using feedly.com diff --git a/app.py b/app.py new file mode 100644 index 0000000..b4a311e --- /dev/null +++ b/app.py @@ -0,0 +1,56 @@ +# Based on this work : https://dvenkatsagar.github.io/tutorials/python/2015/10/26/ddlv/ + +# The standard library modules +import os +import sys +import re +import sys +from urllib.parse import unquote + +# The BeautifulSoup module +from bs4 import BeautifulSoup + +# The selenium module +from selenium import webdriver +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.common.by import By + +if len(sys.argv) != 2: + print("Usage:") + print(" "+str(sys.argv[0]+" ")) + sys.exit() + +url = sys.argv[1] + +print("Starting...") + +driver = webdriver.Firefox() + +# Load the feedly search page +driver.get("https://feedly.com/i/discover/sources/search/feed/"+url) + +print("Searching...") + +# Wait some crappy javascript is loaded +try: + WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.CLASS_NAME, "item-header"))) +except: + print("Timeout: 5 seconds, nothing found") + driver.close() + sys.exit() + +# Find the link and go to the feed page information to find the correct feed link +driver.find_element_by_css_selector("a.item-header").click() + +parser = BeautifulSoup(driver.page_source,"lxml") + +# Get the only title who have an data-uri who is the feed link +for title in parser.find_all('h1'): + if "data-uri" in title.attrs: + data_uri = re.sub('^feed%2F', '', title.attrs["data-uri"].split("/")[1]) + # Transform %2F to human readble char and print it + print("Found: "+str(unquote(data_uri))) + +driver.close() -- cgit v1.2.1