aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorneodarz <neodarz@neodarz.net>2019-03-11 12:31:06 +0100
committerneodarz <neodarz@neodarz.net>2019-03-11 12:31:06 +0100
commita1c6f4b8f33e2b2028ca5cf3e6b1b1f632db5041 (patch)
treed0a7ea07bd3a465ad22c45bf6ef15466fe898fc3
downloadfeed_finder-master.tar.xz
feed_finder-master.zip
Initial commitHEADmaster
-rw-r--r--README.md1
-rw-r--r--app.py56
2 files changed, 57 insertions, 0 deletions
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3770777
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+Simple RSS feed search using feedly.com
diff --git a/app.py b/app.py
new file mode 100644
index 0000000..b4a311e
--- /dev/null
+++ b/app.py
@@ -0,0 +1,56 @@
+# Based on this work : https://dvenkatsagar.github.io/tutorials/python/2015/10/26/ddlv/
+
+# The standard library modules
+import os
+import sys
+import re
+import sys
+from urllib.parse import unquote
+
+# The BeautifulSoup module
+from bs4 import BeautifulSoup
+
+# The selenium module
+from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.by import By
+
+if len(sys.argv) != 2:
+ print("Usage:")
+ print(" "+str(sys.argv[0]+" <url>"))
+ sys.exit()
+
+url = sys.argv[1]
+
+print("Starting...")
+
+driver = webdriver.Firefox()
+
+# Load the feedly search page
+driver.get("https://feedly.com/i/discover/sources/search/feed/"+url)
+
+print("Searching...")
+
+# Wait some crappy javascript is loaded
+try:
+ WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.CLASS_NAME, "item-header")))
+except:
+ print("Timeout: 5 seconds, nothing found")
+ driver.close()
+ sys.exit()
+
+# Find the link and go to the feed page information to find the correct feed link
+driver.find_element_by_css_selector("a.item-header").click()
+
+parser = BeautifulSoup(driver.page_source,"lxml")
+
+# Get the only title who have an data-uri who is the feed link
+for title in parser.find_all('h1'):
+ if "data-uri" in title.attrs:
+ data_uri = re.sub('^feed%2F', '', title.attrs["data-uri"].split("/")[1])
+ # Transform %2F to human readble char and print it
+ print("Found: "+str(unquote(data_uri)))
+
+driver.close()