pyblog: Fix bug in exclude list feature

Previously exclude_list is a list of relative paths, and it is hard to deal with "./" resulted from joining an empty root with a name, thus exclude list items in the root directory (e.g., ./template.html) aren't really excluded. Not to mention the awkward trailing slash handling, and other resolution issues (mainly ..). In this commit we switch to an absolute exclude_list, eliminating the aforementioned bug and simplifing exclude_list queries.
author: Zhiming Wang <zmwangx@gmail.com> 2016-02-14 14:53:21 -0800
committer: Zhiming Wang <zmwangx@gmail.com> 2016-02-14 14:58:55 -0800
commit: 3b538a7206670e73018a3acb00ee52f517dae6cd (patch)
tree: 3660ced96e886d6655db39b51963987a6afa5175
parent: d0c911e6163033aaa8129a18125948d76fa9b8d4 (diff)
download: my_new_personal_website-3b538a7206670e73018a3acb00ee52f517dae6cd.tar.xz
my_new_personal_website-3b538a7206670e73018a3acb00ee52f517dae6cd.zip
1 files changed, 9 insertions, 8 deletions
diff --git a/pyblog b/pyblog
index 10cde721..d23b43c5 100755
--- a/pyblog
+++ b/pyblog
@@ -390,7 +390,8 @@ def generate_sitemap(feed):
     sitemap.append(make_sitemap_url_element(BLOG_HOME, feed.updated, "daily", 1.0))
     # other top level pages
     for name in os.listdir(BUILDDIR):
-        if not name.endswith(".html") or name == "index.html":
+        if (not name.endswith(".html") or name == "index.html" or
+            re.match("google[a-z0-9]+\.html", name)):  # exclude Google's site ownership verification file
             continue
         link = urllib.parse.urljoin(BLOG_HOME, name)
         fullpath = os.path.join(BUILDDIR, name)
@@ -760,21 +761,20 @@ def generate_blog(fresh=False, report_total_errors=True):
     if exclude_list is None:
         try:
             with open(EXCLUDELIST) as fp:
-                exclude_list = [line.rstrip() for line in list(fp)
+                exclude_list = [os.path.abspath(os.path.join(SOURCEDIR, line.rstrip()))
+                                for line in list(fp)
                                 if line.strip() != "" and not line.startswith('#')]
         except OSError:
             exclude_list = []
         generate_blog.exclude_list = exclude_list  # assign to static variable for the future
 
     for root, dirs, files in os.walk(SOURCEDIR):
-        relroot = os.path.relpath(root, start=SOURCEDIR)
-
-        # If relroot is in exclude list, skip all files and remove all subdirs from traversal list.
-        # os.path.join(relroot, "") is basically the path with a slash.
-        if relroot in exclude_list or os.path.join(relroot, "") in exclude_list:
+        # If root is in exclude list, skip all files and remove all subdirs from traversal list.
+        if root in exclude_list:
             dirs[:] = []
             continue
 
+        relroot = os.path.relpath(root, start=SOURCEDIR)
         dstroot = os.path.join(BUILDDIR, relroot)
         if not os.path.isdir(dstroot):
             if os.path.exists(dstroot):
@@ -782,8 +782,9 @@ def generate_blog(fresh=False, report_total_errors=True):
             os.mkdir(dstroot, mode=0o755)
 
         for name in files:
-            if name.startswith('.') or os.path.join(relroot, name) in exclude_list:
+            if name.startswith('.') or os.path.join(root, name) in exclude_list:
                 continue
+
             extension = name.split(".")[-1]
             if extension not in ["css", "jpg", "md", "png", "svg", "ico", "txt",
                                  "eot", "ttf", "woff", "woff2"]:
author	Zhiming Wang <zmwangx@gmail.com>	2016-02-14 14:53:21 -0800
committer	Zhiming Wang <zmwangx@gmail.com>	2016-02-14 14:58:55 -0800
commit	3b538a7206670e73018a3acb00ee52f517dae6cd (patch)
tree	3660ced96e886d6655db39b51963987a6afa5175
parent	d0c911e6163033aaa8129a18125948d76fa9b8d4 (diff)
download	my_new_personal_website-3b538a7206670e73018a3acb00ee52f517dae6cd.tar.xz my_new_personal_website-3b538a7206670e73018a3acb00ee52f517dae6cd.zip