diff options
author | neodarz <neodarz@neodarz.net> | 2019-03-11 12:31:06 +0100 |
---|---|---|
committer | neodarz <neodarz@neodarz.net> | 2019-03-11 12:31:06 +0100 |
commit | a1c6f4b8f33e2b2028ca5cf3e6b1b1f632db5041 (patch) | |
tree | d0a7ea07bd3a465ad22c45bf6ef15466fe898fc3 | |
download | feed_finder-master.tar.xz feed_finder-master.zip |
-rw-r--r-- | README.md | 1 | ||||
-rw-r--r-- | app.py | 56 |
2 files changed, 57 insertions, 0 deletions
diff --git a/README.md b/README.md new file mode 100644 index 0000000..3770777 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +Simple RSS feed search using feedly.com @@ -0,0 +1,56 @@ +# Based on this work : https://dvenkatsagar.github.io/tutorials/python/2015/10/26/ddlv/ + +# The standard library modules +import os +import sys +import re +import sys +from urllib.parse import unquote + +# The BeautifulSoup module +from bs4 import BeautifulSoup + +# The selenium module +from selenium import webdriver +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.common.by import By + +if len(sys.argv) != 2: + print("Usage:") + print(" "+str(sys.argv[0]+" <url>")) + sys.exit() + +url = sys.argv[1] + +print("Starting...") + +driver = webdriver.Firefox() + +# Load the feedly search page +driver.get("https://feedly.com/i/discover/sources/search/feed/"+url) + +print("Searching...") + +# Wait some crappy javascript is loaded +try: + WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.CLASS_NAME, "item-header"))) +except: + print("Timeout: 5 seconds, nothing found") + driver.close() + sys.exit() + +# Find the link and go to the feed page information to find the correct feed link +driver.find_element_by_css_selector("a.item-header").click() + +parser = BeautifulSoup(driver.page_source,"lxml") + +# Get the only title who have an data-uri who is the feed link +for title in parser.find_all('h1'): + if "data-uri" in title.attrs: + data_uri = re.sub('^feed%2F', '', title.attrs["data-uri"].split("/")[1]) + # Transform %2F to human readble char and print it + print("Found: "+str(unquote(data_uri))) + +driver.close() |