diff options
Diffstat (limited to 'plugins/sitemap_generator.rb')
-rw-r--r-- | plugins/sitemap_generator.rb | 312 |
1 files changed, 0 insertions, 312 deletions
diff --git a/plugins/sitemap_generator.rb b/plugins/sitemap_generator.rb deleted file mode 100644 index a08590bf..00000000 --- a/plugins/sitemap_generator.rb +++ /dev/null @@ -1,312 +0,0 @@ -# Sitemap.xml Generator is a Jekyll plugin that generates a sitemap.xml file by -# traversing all of the available posts and pages. -# -# How To Use: -# 1) Copy source file into your _plugins folder within your Jekyll project. -# 2) Change modify the url variable in _config.yml to reflect your domain name. -# 3) Run Jekyll: jekyll --server to re-generate your site. -# -# Variables: -# * Change SITEMAP_FILE_NAME if you want your sitemap to be called something -# other than sitemap.xml. -# * Change the PAGES_INCLUDE_POSTS list to include any pages that are looping -# through your posts (e.g. "index.html", "archive.html", etc.). This will -# ensure that right after you make a new post, the last modified date will -# be updated to reflect the new post. -# * A sitemap.xml should be included in your _site folder. -# * If there are any files you don't want included in the sitemap, add them -# to the EXCLUDED_FILES list. The name should match the name of the source -# file. -# * If you want to include the optional changefreq and priority attributes, -# simply include custom variables in the YAML Front Matter of that file. -# The names of these custom variables are defined below in the -# CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME and PRIORITY_CUSTOM_VARIABLE_NAME -# constants. -# -# Notes: -# * The last modified date is determined by the latest from the following: -# system modified date of the page or post, system modified date of -# included layout, system modified date of included layout within that -# layout, ... -# -# Author: Michael Levin -# Site: http://www.kinnetica.com -# Distributed Under A Creative Commons License -# - http://creativecommons.org/licenses/by/3.0/ -# -# Modified for Octopress by John W. Long -# -require 'rexml/document' -require 'fileutils' - -module Jekyll - - # Change SITEMAP_FILE_NAME if you would like your sitemap file - # to be called something else - SITEMAP_FILE_NAME = "sitemap.xml" - - # Any files to exclude from being included in the sitemap.xml - EXCLUDED_FILES = ["atom.xml"] - - # Any files that include posts, so that when a new post is added, the last - # modified date of these pages should take that into account - PAGES_INCLUDE_POSTS = ["index.html"] - - # Custom variable names for changefreq and priority elements - # These names are used within the YAML Front Matter of pages or posts - # for which you want to include these properties - CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME = "change_frequency" - PRIORITY_CUSTOM_VARIABLE_NAME = "priority" - - class Post - attr_accessor :name - - def full_path_to_source - File.join(@base, @name) - end - - def location_on_server - "#{site.config['url']}#{url}" - end - end - - class Page - attr_accessor :name - - def full_path_to_source - File.join(@base, @dir, @name) - end - - def location_on_server - location = "#{site.config['url']}#{@dir}#{url}" - location.gsub(/index.html$/, "") - end - end - - class Layout - def full_path_to_source - File.join(@base, @name) - end - end - - # Recover from strange exception when starting server without --auto - class SitemapFile < StaticFile - def write(dest) - begin - super(dest) - rescue - end - - true - end - end - - class SitemapGenerator < Generator - - # Valid values allowed by sitemap.xml spec for change frequencies - VALID_CHANGE_FREQUENCY_VALUES = ["always", "hourly", "daily", "weekly", - "monthly", "yearly", "never"] - - # Goes through pages and posts and generates sitemap.xml file - # - # Returns nothing - def generate(site) - sitemap = REXML::Document.new << REXML::XMLDecl.new("1.0", "UTF-8") - - urlset = REXML::Element.new "urlset" - urlset.add_attribute("xmlns", - "http://www.sitemaps.org/schemas/sitemap/0.9") - - @last_modified_post_date = fill_posts(site, urlset) - fill_pages(site, urlset) - - sitemap.add_element(urlset) - - # File I/O: create sitemap.xml file and write out pretty-printed XML - unless File.exists?(site.dest) - FileUtils.mkdir_p(site.dest) - end - file = File.new(File.join(site.dest, SITEMAP_FILE_NAME), "w") - formatter = REXML::Formatters::Pretty.new(4) - formatter.compact = true - formatter.write(sitemap, file) - file.close - - # Keep the sitemap.xml file from being cleaned by Jekyll - site.static_files << Jekyll::SitemapFile.new(site, site.dest, "/", SITEMAP_FILE_NAME) - end - - # Create url elements for all the posts and find the date of the latest one - # - # Returns last_modified_date of latest post - def fill_posts(site, urlset) - last_modified_date = nil - site.posts.each do |post| - if !excluded?(post.name) - url = fill_url(site, post) - urlset.add_element(url) - end - - path = post.full_path_to_source - date = File.mtime(path) - last_modified_date = date if last_modified_date == nil or date > last_modified_date - end - - last_modified_date - end - - # Create url elements for all the normal pages and find the date of the - # index to use with the pagination pages - # - # Returns last_modified_date of index page - def fill_pages(site, urlset) - site.pages.each do |page| - if !excluded?(page.name) - path = page.full_path_to_source - if File.exists?(path) - url = fill_url(site, page) - urlset.add_element(url) - end - end - end - end - - # Fill data of each URL element: location, last modified, - # change frequency (optional), and priority. - # - # Returns url REXML::Element - def fill_url(site, page_or_post) - url = REXML::Element.new "url" - - loc = fill_location(page_or_post) - url.add_element(loc) - - lastmod = fill_last_modified(site, page_or_post) - url.add_element(lastmod) if lastmod - - if (page_or_post.data[CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME]) - change_frequency = - page_or_post.data[CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME].downcase - - if (valid_change_frequency?(change_frequency)) - changefreq = REXML::Element.new "changefreq" - changefreq.text = change_frequency - url.add_element(changefreq) - else - puts "ERROR: Invalid Change Frequency In #{page_or_post.name}" - end - end - - if (page_or_post.data[PRIORITY_CUSTOM_VARIABLE_NAME]) - priority_value = page_or_post.data[PRIORITY_CUSTOM_VARIABLE_NAME] - if valid_priority?(priority_value) - priority = REXML::Element.new "priority" - priority.text = page_or_post.data[PRIORITY_CUSTOM_VARIABLE_NAME] - url.add_element(priority) - else - puts "ERROR: Invalid Priority In #{page_or_post.name}" - end - end - - url - end - - # Get URL location of page or post - # - # Returns the location of the page or post - def fill_location(page_or_post) - loc = REXML::Element.new "loc" - loc.text = page_or_post.location_on_server - - loc - end - - # Fill lastmod XML element with the last modified date for the page or post. - # - # Returns lastmod REXML::Element or nil - def fill_last_modified(site, page_or_post) - path = page_or_post.full_path_to_source - - lastmod = REXML::Element.new "lastmod" - date = File.mtime(path) - latest_date = find_latest_date(date, site, page_or_post) - - if @last_modified_post_date == nil - # This is a post - lastmod.text = latest_date.iso8601 - else - # This is a page - if posts_included?(page_or_post.name) - # We want to take into account the last post date - final_date = greater_date(latest_date, @last_modified_post_date) - lastmod.text = final_date.iso8601 - else - lastmod.text = latest_date.iso8601 - end - end - lastmod - end - - # Go through the page/post and any implemented layouts and get the latest - # modified date - # - # Returns formatted output of latest date of page/post and any used layouts - def find_latest_date(latest_date, site, page_or_post) - layouts = site.layouts - layout = layouts[page_or_post.data["layout"]] - while layout - path = layout.full_path_to_source - date = File.mtime(path) - - latest_date = date if (date > latest_date) - - layout = layouts[layout.data["layout"]] - end - - latest_date - end - - # Which of the two dates is later - # - # Returns latest of two dates - def greater_date(date1, date2) - if (date1 >= date2) - date1 - else - date2 - end - end - - # Is the page or post listed as something we want to exclude? - # - # Returns boolean - def excluded?(name) - EXCLUDED_FILES.include? name - end - - def posts_included?(name) - PAGES_INCLUDE_POSTS.include? name - end - - # Is the change frequency value provided valid according to the spec - # - # Returns boolean - def valid_change_frequency?(change_frequency) - VALID_CHANGE_FREQUENCY_VALUES.include? change_frequency - end - - # Is the priority value provided valid according to the spec - # - # Returns boolean - def valid_priority?(priority) - begin - priority_val = Float(priority) - return true if priority_val >= 0.0 and priority_val <= 1.0 - rescue ArgumentError - end - - false - end - end -end - |