diff options
Diffstat (limited to '')
-rw-r--r-- | plugins/rubypants.rb | 489 |
1 files changed, 489 insertions, 0 deletions
diff --git a/plugins/rubypants.rb b/plugins/rubypants.rb new file mode 100644 index 00000000..e4f4502f --- /dev/null +++ b/plugins/rubypants.rb @@ -0,0 +1,489 @@ +# +# = RubyPants -- SmartyPants ported to Ruby +# +# Ported by Christian Neukirchen <mailto:chneukirchen@gmail.com> +# Copyright (C) 2004 Christian Neukirchen +# +# Incooporates ideas, comments and documentation by Chad Miller +# Copyright (C) 2004 Chad Miller +# +# Original SmartyPants by John Gruber +# Copyright (C) 2003 John Gruber +# + +# +# = RubyPants -- SmartyPants ported to Ruby +# +# == Synopsis +# +# RubyPants is a Ruby port of the smart-quotes library SmartyPants. +# +# The original "SmartyPants" is a free web publishing plug-in for +# Movable Type, Blosxom, and BBEdit that easily translates plain ASCII +# punctuation characters into "smart" typographic punctuation HTML +# entities. +# +# +# == Description +# +# RubyPants can perform the following transformations: +# +# * Straight quotes (<tt>"</tt> and <tt>'</tt>) into "curly" quote +# HTML entities +# * Backticks-style quotes (<tt>``like this''</tt>) into "curly" quote +# HTML entities +# * Dashes (<tt>--</tt> and <tt>---</tt>) into en- and em-dash +# entities +# * Three consecutive dots (<tt>...</tt> or <tt>. . .</tt>) into an +# ellipsis entity +# +# This means you can write, edit, and save your posts using plain old +# ASCII straight quotes, plain dashes, and plain dots, but your +# published posts (and final HTML output) will appear with smart +# quotes, em-dashes, and proper ellipses. +# +# RubyPants does not modify characters within <tt><pre></tt>, +# <tt><code></tt>, <tt><kbd></tt>, <tt><math></tt> or +# <tt><script></tt> tag blocks. Typically, these tags are used to +# display text where smart quotes and other "smart punctuation" would +# not be appropriate, such as source code or example markup. +# +# +# == Backslash Escapes +# +# If you need to use literal straight quotes (or plain hyphens and +# periods), RubyPants accepts the following backslash escape sequences +# to force non-smart punctuation. It does so by transforming the +# escape sequence into a decimal-encoded HTML entity: +# +# \\ \" \' \. \- \` +# +# This is useful, for example, when you want to use straight quotes as +# foot and inch marks: 6'2" tall; a 17" iMac. (Use <tt>6\'2\"</tt> +# resp. <tt>17\"</tt>.) +# +# +# == Algorithmic Shortcomings +# +# One situation in which quotes will get curled the wrong way is when +# apostrophes are used at the start of leading contractions. For +# example: +# +# 'Twas the night before Christmas. +# +# In the case above, RubyPants will turn the apostrophe into an +# opening single-quote, when in fact it should be a closing one. I +# don't think this problem can be solved in the general case--every +# word processor I've tried gets this wrong as well. In such cases, +# it's best to use the proper HTML entity for closing single-quotes +# ("<tt>’</tt>") by hand. +# +# +# == Bugs +# +# To file bug reports or feature requests (except see above) please +# send email to: mailto:chneukirchen@gmail.com +# +# If the bug involves quotes being curled the wrong way, please send +# example text to illustrate. +# +# +# == Authors +# +# John Gruber did all of the hard work of writing this software in +# Perl for Movable Type and almost all of this useful documentation. +# Chad Miller ported it to Python to use with Pyblosxom. +# +# Christian Neukirchen provided the Ruby port, as a general-purpose +# library that follows the *Cloth API. +# +# +# == Copyright and License +# +# === SmartyPants license: +# +# Copyright (c) 2003 John Gruber +# (http://daringfireball.net) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name "SmartyPants" nor the names of its contributors +# may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# This software is provided by the copyright holders and contributors +# "as is" and any express or implied warranties, including, but not +# limited to, the implied warranties of merchantability and fitness +# for a particular purpose are disclaimed. In no event shall the +# copyright owner or contributors be liable for any direct, indirect, +# incidental, special, exemplary, or consequential damages (including, +# but not limited to, procurement of substitute goods or services; +# loss of use, data, or profits; or business interruption) however +# caused and on any theory of liability, whether in contract, strict +# liability, or tort (including negligence or otherwise) arising in +# any way out of the use of this software, even if advised of the +# possibility of such damage. +# +# === RubyPants license +# +# RubyPants is a derivative work of SmartyPants and smartypants.py. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# This software is provided by the copyright holders and contributors +# "as is" and any express or implied warranties, including, but not +# limited to, the implied warranties of merchantability and fitness +# for a particular purpose are disclaimed. In no event shall the +# copyright owner or contributors be liable for any direct, indirect, +# incidental, special, exemplary, or consequential damages (including, +# but not limited to, procurement of substitute goods or services; +# loss of use, data, or profits; or business interruption) however +# caused and on any theory of liability, whether in contract, strict +# liability, or tort (including negligence or otherwise) arising in +# any way out of the use of this software, even if advised of the +# possibility of such damage. +# +# +# == Links +# +# John Gruber:: http://daringfireball.net +# SmartyPants:: http://daringfireball.net/projects/smartypants +# +# Chad Miller:: http://web.chad.org +# +# Christian Neukirchen:: http://kronavita.de/chris +# + + +class RubyPants < String + + # Create a new RubyPants instance with the text in +string+. + # + # Allowed elements in the options array: + # + # 0 :: do nothing + # 1 :: enable all, using only em-dash shortcuts + # 2 :: enable all, using old school en- and em-dash shortcuts (*default*) + # 3 :: enable all, using inverted old school en and em-dash shortcuts + # -1 :: stupefy (translate HTML entities to their ASCII-counterparts) + # + # If you don't like any of these defaults, you can pass symbols to change + # RubyPants' behavior: + # + # <tt>:quotes</tt> :: quotes + # <tt>:backticks</tt> :: backtick quotes (``double'' only) + # <tt>:allbackticks</tt> :: backtick quotes (``double'' and `single') + # <tt>:dashes</tt> :: dashes + # <tt>:oldschool</tt> :: old school dashes + # <tt>:inverted</tt> :: inverted old school dashes + # <tt>:ellipses</tt> :: ellipses + # <tt>:convertquotes</tt> :: convert <tt>"</tt> entities to + # <tt>"</tt> for Dreamweaver users + # <tt>:stupefy</tt> :: translate RubyPants HTML entities + # to their ASCII counterparts. + # + def initialize(string, options=[2]) + super string + @options = [*options] + end + + # Apply SmartyPants transformations. + def to_html + do_quotes = do_backticks = do_dashes = do_ellipses = do_stupify = nil + convert_quotes = false + + if @options.include? 0 + # Do nothing. + return self + elsif @options.include? 1 + # Do everything, turn all options on. + do_quotes = do_backticks = do_ellipses = true + do_dashes = :normal + elsif @options.include? 2 + # Do everything, turn all options on, use old school dash shorthand. + do_quotes = do_backticks = do_ellipses = true + do_dashes = :oldschool + elsif @options.include? 3 + # Do everything, turn all options on, use inverted old school + # dash shorthand. + do_quotes = do_backticks = do_ellipses = true + do_dashes = :inverted + elsif @options.include?(-1) + do_stupefy = true + else + do_quotes = @options.include? :quotes + do_backticks = @options.include? :backticks + do_backticks = :both if @options.include? :allbackticks + do_dashes = :normal if @options.include? :dashes + do_dashes = :oldschool if @options.include? :oldschool + do_dashes = :inverted if @options.include? :inverted + do_ellipses = @options.include? :ellipses + convert_quotes = @options.include? :convertquotes + do_stupefy = @options.include? :stupefy + end + + # Parse the HTML + tokens = tokenize + + # Keep track of when we're inside <pre> or <code> tags. + in_pre = false + + # Here is the result stored in. + result = "" + + # This is a cheat, used to get some context for one-character + # tokens that consist of just a quote char. What we do is remember + # the last character of the previous text token, to use as context + # to curl single- character quote tokens correctly. + prev_token_last_char = nil + + tokens.each { |token| + if token.first == :tag + result << token[1] + if token[1] =~ %r!<(/?)(?:pre|code|kbd|script|math)[\s>]! + in_pre = ($1 != "/") # Opening or closing tag? + end + else + t = token[1] + + # Remember last char of this token before processing. + last_char = t[-1].chr + + unless in_pre + t = process_escapes t + + t.gsub!(/"/, '"') if convert_quotes + + if do_dashes + t = educate_dashes t if do_dashes == :normal + t = educate_dashes_oldschool t if do_dashes == :oldschool + t = educate_dashes_inverted t if do_dashes == :inverted + end + + t = educate_ellipses t if do_ellipses + + # Note: backticks need to be processed before quotes. + if do_backticks + t = educate_backticks t + t = educate_single_backticks t if do_backticks == :both + end + + if do_quotes + if t == "'" + # Special case: single-character ' token + if prev_token_last_char =~ /\S/ + t = "’" + else + t = "‘" + end + elsif t == '"' + # Special case: single-character " token + if prev_token_last_char =~ /\S/ + t = "”" + else + t = "“" + end + else + # Normal case: + t = educate_quotes t + end + end + + t = stupefy_entities t if do_stupefy + end + + prev_token_last_char = last_char + result << t + end + } + + # Done + result + end + + protected + + # Return the string, with after processing the following backslash + # escape sequences. This is useful if you want to force a "dumb" quote + # or other character to appear. + # + # Escaped are: + # \\ \" \' \. \- \` + # + def process_escapes(str) + str.gsub('\\\\', '\'). + gsub('\"', '"'). + gsub("\\\'", '''). + gsub('\.', '.'). + gsub('\-', '-'). + gsub('\`', '`') + end + + # The string, with each instance of "<tt>--</tt>" translated to an + # em-dash HTML entity. + # + def educate_dashes(str) + str.gsub(/--/, '—') + end + + # The string, with each instance of "<tt>--</tt>" translated to an + # en-dash HTML entity, and each "<tt>---</tt>" translated to an + # em-dash HTML entity. + # + def educate_dashes_oldschool(str) + str.gsub(/---/, '—').gsub(/--/, '–') + end + + # Return the string, with each instance of "<tt>--</tt>" translated + # to an em-dash HTML entity, and each "<tt>---</tt>" translated to + # an en-dash HTML entity. Two reasons why: First, unlike the en- and + # em-dash syntax supported by +educate_dashes_oldschool+, it's + # compatible with existing entries written before SmartyPants 1.1, + # back when "<tt>--</tt>" was only used for em-dashes. Second, + # em-dashes are more common than en-dashes, and so it sort of makes + # sense that the shortcut should be shorter to type. (Thanks to + # Aaron Swartz for the idea.) + # + def educate_dashes_inverted(str) + str.gsub(/---/, '–').gsub(/--/, '—') + end + + # Return the string, with each instance of "<tt>...</tt>" translated + # to an ellipsis HTML entity. Also converts the case where there are + # spaces between the dots. + # + def educate_ellipses(str) + str.gsub('...', '…').gsub('. . .', '…') + end + + # Return the string, with "<tt>``backticks''</tt>"-style single quotes + # translated into HTML curly quote entities. + # + def educate_backticks(str) + str.gsub("``", '“').gsub("''", '”') + end + + # Return the string, with "<tt>`backticks'</tt>"-style single quotes + # translated into HTML curly quote entities. + # + def educate_single_backticks(str) + str.gsub("`", '‘').gsub("'", '’') + end + + # Return the string, with "educated" curly quote HTML entities. + # + def educate_quotes(str) + punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]' + + str = str.dup + + # Special case if the very first character is a quote followed by + # punctuation at a non-word-break. Close the quotes by brute + # force: + str.gsub!(/^'(?=#{punct_class}\B)/, '’') + str.gsub!(/^"(?=#{punct_class}\B)/, '”') + + # Special case for double sets of quotes, e.g.: + # <p>He said, "'Quoted' words in a larger quote."</p> + str.gsub!(/"'(?=\w)/, '“‘') + str.gsub!(/'"(?=\w)/, '‘“') + + # Special case for decade abbreviations (the '80s): + str.gsub!(/'(?=\d\ds)/, '’') + + close_class = %![^\ \t\r\n\\[\{\(\-]! + dec_dashes = '–|—' + + # Get most opening single quotes: + str.gsub!(/(\s| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)'(?=\w)/, + '\1‘') + # Single closing quotes: + str.gsub!(/(#{close_class})'/, '\1’') + str.gsub!(/'(\s|s\b|$)/, '’\1') + # Any remaining single quotes should be opening ones: + str.gsub!(/'/, '‘') + + # Get most opening double quotes: + str.gsub!(/(\s| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)"(?=\w)/, + '\1“') + # Double closing quotes: + str.gsub!(/(#{close_class})"/, '\1”') + str.gsub!(/"(\s|s\b|$)/, '”\1') + # Any remaining quotes should be opening ones: + str.gsub!(/"/, '“') + + str + end + + # Return the string, with each RubyPants HTML entity translated to + # its ASCII counterpart. + # + # Note: This is not reversible (but exactly the same as in SmartyPants) + # + def stupefy_entities(str) + str. + gsub(/–/, '-'). # en-dash + gsub(/—/, '--'). # em-dash + + gsub(/‘/, "'"). # open single quote + gsub(/’/, "'"). # close single quote + + gsub(/“/, '"'). # open double quote + gsub(/”/, '"'). # close double quote + + gsub(/…/, '...') # ellipsis + end + + # Return an array of the tokens comprising the string. Each token is + # either a tag (possibly with nested, tags contained therein, such + # as <tt><a href="<MTFoo>"></tt>, or a run of text between + # tags. Each element of the array is a two-element array; the first + # is either :tag or :text; the second is the actual value. + # + # Based on the <tt>_tokenize()</tt> subroutine from Brad Choate's + # MTRegex plugin. <http://www.bradchoate.com/past/mtregex.php> + # + # This is actually the easier variant using tag_soup, as used by + # Chad Miller in the Python port of SmartyPants. + # + def tokenize + tag_soup = /([^<]*)(<[^>]*>)/ + + tokens = [] + + prev_end = 0 + scan(tag_soup) { + tokens << [:text, $1] if $1 != "" + tokens << [:tag, $2] + + prev_end = $~.end(0) + } + + if prev_end < size + tokens << [:text, self[prev_end..-1]] + end + + tokens + end +end |