aboutsummaryrefslogtreecommitdiff
path: root/plugins/sitemap_generator.rb
diff options
context:
space:
mode:
authorBrandon Mathis <brandon@imathis.com>2011-07-19 09:06:54 -0400
committerBrandon Mathis <brandon@imathis.com>2011-07-19 09:06:54 -0400
commit17c59fb1d1bf3e0c05137af4b4bd09ae271a2d31 (patch)
treea4b3b5d43173f9b02ec4b6401cb6e14f6e716a35 /plugins/sitemap_generator.rb
parent873a604e144c53cfc5465a790e43db5b7ebb429e (diff)
downloadmy_new_personal_website-17c59fb1d1bf3e0c05137af4b4bd09ae271a2d31.tar.xz
my_new_personal_website-17c59fb1d1bf3e0c05137af4b4bd09ae271a2d31.zip
Moved plugins to root directory. I'm ditching the idea of shipping plugins with themes until it's more obviously necessary. This way it's easier to merge and update plugins.
Diffstat (limited to 'plugins/sitemap_generator.rb')
-rw-r--r--plugins/sitemap_generator.rb308
1 files changed, 308 insertions, 0 deletions
diff --git a/plugins/sitemap_generator.rb b/plugins/sitemap_generator.rb
new file mode 100644
index 00000000..8b6cf78c
--- /dev/null
+++ b/plugins/sitemap_generator.rb
@@ -0,0 +1,308 @@
+# Sitemap.xml Generator is a Jekyll plugin that generates a sitemap.xml file by
+# traversing all of the available posts and pages.
+#
+# How To Use:
+# 1) Copy source file into your _plugins folder within your Jekyll project.
+# 2) Change modify the url variable in _config.yml to reflect your domain name.
+# 3) Run Jekyll: jekyll --server to re-generate your site.
+#
+# Variables:
+# * Change SITEMAP_FILE_NAME if you want your sitemap to be called something
+# other than sitemap.xml.
+# * Change the PAGES_INCLUDE_POSTS list to include any pages that are looping
+# through your posts (e.g. "index.html", "archive.html", etc.). This will
+# ensure that right after you make a new post, the last modified date will
+# be updated to reflect the new post.
+# * A sitemap.xml should be included in your _site folder.
+# * If there are any files you don't want included in the sitemap, add them
+# to the EXCLUDED_FILES list. The name should match the name of the source
+# file.
+# * If you want to include the optional changefreq and priority attributes,
+# simply include custom variables in the YAML Front Matter of that file.
+# The names of these custom variables are defined below in the
+# CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME and PRIORITY_CUSTOM_VARIABLE_NAME
+# constants.
+#
+# Notes:
+# * The last modified date is determined by the latest from the following:
+# system modified date of the page or post, system modified date of
+# included layout, system modified date of included layout within that
+# layout, ...
+#
+# Author: Michael Levin
+# Site: http://www.kinnetica.com
+# Distributed Under A Creative Commons License
+# - http://creativecommons.org/licenses/by/3.0/
+#
+# Modified for Octopress by John W. Long
+#
+require 'rexml/document'
+
+module Jekyll
+
+ # Change SITEMAP_FILE_NAME if you would like your sitemap file
+ # to be called something else
+ SITEMAP_FILE_NAME = "sitemap.xml"
+
+ # Any files to exclude from being included in the sitemap.xml
+ EXCLUDED_FILES = ["atom.xml"]
+
+ # Any files that include posts, so that when a new post is added, the last
+ # modified date of these pages should take that into account
+ PAGES_INCLUDE_POSTS = ["index.html"]
+
+ # Custom variable names for changefreq and priority elements
+ # These names are used within the YAML Front Matter of pages or posts
+ # for which you want to include these properties
+ CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME = "change_frequency"
+ PRIORITY_CUSTOM_VARIABLE_NAME = "priority"
+
+ class Post
+ attr_accessor :name
+
+ def full_path_to_source
+ File.join(@base, @name)
+ end
+
+ def location_on_server
+ "#{site.config['url']}#{url}"
+ end
+ end
+
+ class Page
+ attr_accessor :name
+
+ def full_path_to_source
+ File.join(@base, @dir, @name)
+ end
+
+ def location_on_server
+ location = "#{site.config['url']}#{@dir}#{url}"
+ location.gsub(/index.html$/, "")
+ end
+ end
+
+ class Layout
+ def full_path_to_source
+ File.join(@base, @name)
+ end
+ end
+
+ # Recover from strange exception when starting server without --auto
+ class SitemapFile < StaticFile
+ def write(dest)
+ begin
+ super(dest)
+ rescue
+ end
+
+ true
+ end
+ end
+
+ class SitemapGenerator < Generator
+
+ # Valid values allowed by sitemap.xml spec for change frequencies
+ VALID_CHANGE_FREQUENCY_VALUES = ["always", "hourly", "daily", "weekly",
+ "monthly", "yearly", "never"]
+
+ # Goes through pages and posts and generates sitemap.xml file
+ #
+ # Returns nothing
+ def generate(site)
+ sitemap = REXML::Document.new << REXML::XMLDecl.new("1.0", "UTF-8")
+
+ urlset = REXML::Element.new "urlset"
+ urlset.add_attribute("xmlns",
+ "http://www.sitemaps.org/schemas/sitemap/0.9")
+
+ @last_modified_post_date = fill_posts(site, urlset)
+ fill_pages(site, urlset)
+
+ sitemap.add_element(urlset)
+
+ # File I/O: create sitemap.xml file and write out pretty-printed XML
+ file = File.new(File.join(site.dest, SITEMAP_FILE_NAME), "w")
+ formatter = REXML::Formatters::Pretty.new(4)
+ formatter.compact = true
+ formatter.write(sitemap, file)
+ file.close
+
+ # Keep the sitemap.xml file from being cleaned by Jekyll
+ site.static_files << Jekyll::SitemapFile.new(site, site.dest, "/", SITEMAP_FILE_NAME)
+ end
+
+ # Create url elements for all the posts and find the date of the latest one
+ #
+ # Returns last_modified_date of latest post
+ def fill_posts(site, urlset)
+ last_modified_date = nil
+ site.posts.each do |post|
+ if !excluded?(post.name)
+ url = fill_url(site, post)
+ urlset.add_element(url)
+ end
+
+ path = post.full_path_to_source
+ date = File.mtime(path)
+ last_modified_date = date if last_modified_date == nil or date > last_modified_date
+ end
+
+ last_modified_date
+ end
+
+ # Create url elements for all the normal pages and find the date of the
+ # index to use with the pagination pages
+ #
+ # Returns last_modified_date of index page
+ def fill_pages(site, urlset)
+ site.pages.each do |page|
+ if !excluded?(page.name)
+ path = page.full_path_to_source
+ if File.exists?(path)
+ url = fill_url(site, page)
+ urlset.add_element(url)
+ end
+ end
+ end
+ end
+
+ # Fill data of each URL element: location, last modified,
+ # change frequency (optional), and priority.
+ #
+ # Returns url REXML::Element
+ def fill_url(site, page_or_post)
+ url = REXML::Element.new "url"
+
+ loc = fill_location(page_or_post)
+ url.add_element(loc)
+
+ lastmod = fill_last_modified(site, page_or_post)
+ url.add_element(lastmod) if lastmod
+
+ if (page_or_post.data[CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME])
+ change_frequency =
+ page_or_post.data[CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME].downcase
+
+ if (valid_change_frequency?(change_frequency))
+ changefreq = REXML::Element.new "changefreq"
+ changefreq.text = change_frequency
+ url.add_element(changefreq)
+ else
+ puts "ERROR: Invalid Change Frequency In #{page_or_post.name}"
+ end
+ end
+
+ if (page_or_post.data[PRIORITY_CUSTOM_VARIABLE_NAME])
+ priority_value = page_or_post.data[PRIORITY_CUSTOM_VARIABLE_NAME]
+ if valid_priority?(priority_value)
+ priority = REXML::Element.new "priority"
+ priority.text = page_or_post.data[PRIORITY_CUSTOM_VARIABLE_NAME]
+ url.add_element(priority)
+ else
+ puts "ERROR: Invalid Priority In #{page_or_post.name}"
+ end
+ end
+
+ url
+ end
+
+ # Get URL location of page or post
+ #
+ # Returns the location of the page or post
+ def fill_location(page_or_post)
+ loc = REXML::Element.new "loc"
+ loc.text = page_or_post.location_on_server
+
+ loc
+ end
+
+ # Fill lastmod XML element with the last modified date for the page or post.
+ #
+ # Returns lastmod REXML::Element or nil
+ def fill_last_modified(site, page_or_post)
+ path = page_or_post.full_path_to_source
+
+ lastmod = REXML::Element.new "lastmod"
+ date = File.mtime(path)
+ latest_date = find_latest_date(date, site, page_or_post)
+
+ if @last_modified_post_date == nil
+ # This is a post
+ lastmod.text = latest_date.iso8601
+ else
+ # This is a page
+ if posts_included?(page_or_post.name)
+ # We want to take into account the last post date
+ final_date = greater_date(latest_date, @last_modified_post_date)
+ lastmod.text = final_date.iso8601
+ else
+ lastmod.text = latest_date.iso8601
+ end
+ end
+ lastmod
+ end
+
+ # Go through the page/post and any implemented layouts and get the latest
+ # modified date
+ #
+ # Returns formatted output of latest date of page/post and any used layouts
+ def find_latest_date(latest_date, site, page_or_post)
+ layouts = site.layouts
+ layout = layouts[page_or_post.data["layout"]]
+ while layout
+ path = layout.full_path_to_source
+ date = File.mtime(path)
+
+ latest_date = date if (date > latest_date)
+
+ layout = layouts[layout.data["layout"]]
+ end
+
+ latest_date
+ end
+
+ # Which of the two dates is later
+ #
+ # Returns latest of two dates
+ def greater_date(date1, date2)
+ if (date1 >= date2)
+ date1
+ else
+ date2
+ end
+ end
+
+ # Is the page or post listed as something we want to exclude?
+ #
+ # Returns boolean
+ def excluded?(name)
+ EXCLUDED_FILES.include? name
+ end
+
+ def posts_included?(name)
+ PAGES_INCLUDE_POSTS.include? name
+ end
+
+ # Is the change frequency value provided valid according to the spec
+ #
+ # Returns boolean
+ def valid_change_frequency?(change_frequency)
+ VALID_CHANGE_FREQUENCY_VALUES.include? change_frequency
+ end
+
+ # Is the priority value provided valid according to the spec
+ #
+ # Returns boolean
+ def valid_priority?(priority)
+ begin
+ priority_val = Float(priority)
+ return true if priority_val >= 0.0 and priority_val <= 1.0
+ rescue ArgumentError
+ end
+
+ false
+ end
+ end
+end
+