diff options
Diffstat (limited to 'rubypants.rb')
-rw-r--r-- | rubypants.rb | 483 |
1 files changed, 0 insertions, 483 deletions
diff --git a/rubypants.rb b/rubypants.rb deleted file mode 100644 index 6897228b..00000000 --- a/rubypants.rb +++ /dev/null @@ -1,483 +0,0 @@ -# -# = RubyPants - SmartyPants ported to Ruby -# -# Ported by Christian Neukirchen <mailto:chneukirchen@gmail.com> -# Copyright (C) 2004 Christian Neukirchen -# -# Incooporates ideas, comments and documentation by Chad Miller -# Copyright (C) 2004 Chad Miller -# -# Original SmartyPants by John Gruber -# Copyright (C) 2003 John Gruber -# - -# -# = RubyPants - SmartyPants ported to Ruby -# -# == Synopsis -# -# RubyPants is a Ruby port of the smart-quotes library SmartyPants. -# -# The original "SmartyPants" is a free web publishing plug-in for -# Movable Type, Blosxom, and BBEdit that easily translates plain ASCII -# punctuation characters into "smart" typographic punctuation HTML -# entities. -# -# -# == Description -# -# RubyPants can perform the following transformations: -# -# * Straight quotes (<tt>"</tt> and <tt>'</tt>) into "curly" quote -# HTML entities -# * Backticks-style quotes (<tt>``like this''</tt>) into "curly" quote -# HTML entities -# * Dashes (<tt>--</tt> and <tt>---</tt>) into en- and em-dash -# entities -# * Three consecutive dots (<tt>...</tt> or <tt>. . .</tt>) into an -# ellipsis entity -# -# This means you can write, edit, and save your posts using plain old -# ASCII straight quotes, plain dashes, and plain dots, but your -# published posts (and final HTML output) will appear with smart -# quotes, em-dashes, and proper ellipses. -# -# RubyPants does not modify characters within <tt><pre></tt>, -# <tt><code></tt>, <tt><kbd></tt>, <tt><math></tt> or -# <tt><script></tt> tag blocks. Typically, these tags are used to -# display text where smart quotes and other "smart punctuation" would -# not be appropriate, such as source code or example markup. -# -# -# == Backslash Escapes -# -# If you need to use literal straight quotes (or plain hyphens and -# periods), RubyPants accepts the following backslash escape sequences -# to force non-smart punctuation. It does so by transforming the -# escape sequence into a decimal-encoded HTML entity: -# -# \\ \" \' \. \- \` -# -# This is useful, for example, when you want to use straight quotes as -# foot and inch marks: 6'2" tall; a 17" iMac. (Use <tt>6\'2\"</tt> -# resp. <tt>17\"</tt>.) -# -# -# == Algorithmic Shortcomings -# -# One situation in which quotes will get curled the wrong way is when -# apostrophes are used at the start of leading contractions. For -# example: -# -# 'Twas the night before Christmas. -# -# In the case above, RubyPants will turn the apostrophe into an -# opening single-quote, when in fact it should be a closing one. I -# don't think this problem can be solved in the general case--every -# word processor I've tried gets this wrong as well. In such cases, -# it's best to use the proper HTML entity for closing single-quotes -# (``’``) by hand. -# -# -# == Bugs -# -# To file bug reports or feature requests (except see above) please -# send email to: mailto:chneukirchen@gmail.com -# -# If the bug involves quotes being curled the wrong way, please send -# example text to illustrate. -# -# -# == Authors -# -# John Gruber did all of the hard work of writing this software in -# Perl for Movable Type and almost all of this useful documentation. -# Chad Miller ported it to Python to use with Pyblosxom. -# -# Christian Neukirchen provided the Ruby port, as a general-purpose -# library that follows the *Cloth api. -# -# -# == Copyright and License -# -# === SmartyPants license: -# -# Copyright (c) 2003 John Gruber -# (http://daringfireball.net) -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# -# * Neither the name "SmartyPants" nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# This software is provided by the copyright holders and contributors -# "as is" and any express or implied warranties, including, but not -# limited to, the implied warranties of merchantability and fitness -# for a particular purpose are disclaimed. In no event shall the -# copyright owner or contributors be liable for any direct, indirect, -# incidental, special, exemplary, or consequential damages (including, -# but not limited to, procurement of substitute goods or services; -# loss of use, data, or profits; or business interruption) however -# caused and on any theory of liability, whether in contract, strict -# liability, or tort (including negligence or otherwise) arising in -# any way out of the use of this software, even if advised of the -# possibility of such damage. -# -# === RubyPants license -# -# RubyPants is a derivative work of SmartyPants and smartypants.py. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# -# This software is provided by the copyright holders and contributors -# "as is" and any express or implied warranties, including, but not -# limited to, the implied warranties of merchantability and fitness -# for a particular purpose are disclaimed. In no event shall the -# copyright owner or contributors be liable for any direct, indirect, -# incidental, special, exemplary, or consequential damages (including, -# but not limited to, procurement of substitute goods or services; -# loss of use, data, or profits; or business interruption) however -# caused and on any theory of liability, whether in contract, strict -# liability, or tort (including negligence or otherwise) arising in -# any way out of the use of this software, even if advised of the -# possibility of such damage. -# -# -# == Links -# -# John Gruber:: http://daringfireball.net -# SmartyPants:: http://daringfireball.net/projects/smartypants -# -# Chad Miller:: http://web.chad.org -# -# Christian Neukirchen:: http://kronavita.de/chris -# - - -class RubyPants < String - VERSION = "0.1" - - # Allowed elements in the options array: - # - # 0 :: do nothing - # 1 :: set all - # 2 :: set all, using old school en- and em- dash shortcuts - # 3 :: set all, using inverted old school en and em- dash shortcuts - # -1 :: stupefy (translate HTML entities to their ASCII-counterparts) - # - # <tt>:quotes</tt> :: quotes - # <tt>:backticks</tt> :: backtick quotes (``double'' only) - # <tt>:allbackticks</tt> :: backtick quotes (``double'' and `single') - # <tt>:dashes</tt> :: dashes - # <tt>:oldschool</tt> :: old school dashes - # <tt>:inverted</tt> :: inverted old school dashes - # <tt>:ellipses</tt> :: ellipses - # <tt>:convertquotes</tt> :: convert <tt>"</tt> entities to - # <tt>"</tt> for Dreamweaver users - # <tt>:stupefy</tt> :: translate SmartyPants HTML entities - # to their ASCII counterparts. - # - def initialize(string, options=[2]) - super string - @options = [*options] - end - - # Apply SmartyPants transformations. - def to_html - do_quotes = do_backticks = do_dashes = do_ellipses = do_stupify = nil - convert_quotes = false - - if @options.include? 0 - # Do nothing. - return self - elsif @options.include? 1 - # Do everything, turn all options on. - do_quotes = do_backticks = do_ellipses = true - do_dashes = :normal - elsif @options.include? 2 - # Do everything, turn all options on, use old school dash shorthand. - do_quotes = do_backticks = do_ellipses = true - do_dashes = :oldschool - elsif @options.include? 3 - # Do everything, turn all options on, use inverted old school - # dash shorthand. - do_quotes = do_backticks = do_ellipses = true - do_dashes = :inverted - elsif @options.include?(-1) - do_stupefy = true - else - do_quotes = @options.include? :quotes - do_backticks = @options.include? :backticks - do_backticks = :both if @options.include? :allbackticks - do_dashes = :normal if @options.include? :dashes - do_dashes = :oldschool if @options.include? :oldschool - do_dashes = :inverted if @options.include? :inverted - do_ellipses = @options.include? :ellipses - convert_quotes = @options.include? :convertquotes - do_stupefy = @options.include? :stupefy - end - - # Parse the HTML - tokens = tokenize - - # Keep track of when we're inside <pre> or <code> tags. - in_pre = false - - # Here is the result stored in. - result = "" - - # This is a cheat, used to get some context for one-character - # tokens that consist of just a quote char. What we do is remember - # the last character of the previous text token, to use as context - # to curl single- character quote tokens correctly. - prev_token_last_char = "" - - tokens.each { |token| - if token.first == :tag - result << token[1] - if token[1] =~ %r!<(/?)(?:pre|code|kbd|script|math)[\s>]! - in_pre = ($1 != "/") # Opening or closing tag? - end - else - t = token[1] - - # Remember last char of this token before processing. - last_char = t[-1] - - unless in_pre - t = process_escapes t - - t.gsub!(/"/, '"') if convert_quotes - - if do_dashes - t = educate_dashes t if do_dashes == :normal - t = educate_dashes_oldschool t if do_dashes == :oldschool - t = educate_dashes_inverted t if do_dashes == :inverted - end - - t = educate_ellipses t if do_ellipses - - # Note: backticks need to be processed before quotes. - if do_backticks - t = educate_backticks t - t = educate_single_backticks t if do_backticks == :both - end - - if do_quotes - if t == "'" - # Special case: single-character ' token - if prev_token_last_char =~ /\S/ - t = "’" - else - t = "‘" - end - elsif t == '"' - # Special case: single-character " token - if prev_token_last_char =~ /\S/ - t = "”" - else - t = "“" - end - else - # Normal case: - t = educate_quotes t - end - end - - t = stupefy_entities t if do_stupefy - end - - prev_token_last_char = last_char - result << t - end - } - - # Done - result - end - - protected - - # Return the string, with after processing the following backslash - # escape sequences. This is useful if you want to force a "dumb" quote - # or other character to appear. - # - # Escaped are: - # \\ \" \' \. \- \` - # - def process_escapes(str) - str.gsub(/\\\\/, '\'). - gsub(/\\"/, '"'). - gsub(/\\'/, '''). - gsub(/\\\./, '.'). - gsub(/\\-/, '-'). - gsub(/\\`/, '`') - end - - # The string, with each instance of "<tt>--</tt>" translated to an - # em-dash HTML entity. - # - def educate_dashes(str) - str.gsub(/--/, '—') - end - - # The string, with each instance of "<tt>--</tt>" translated to an - # en-dash HTML entity, and each "<tt>---</tt>" translated to an - # em-dash HTML entity. - # - def educate_dashes_oldschool(str) - str.gsub(/---/, '—').gsub(/--/, '–') - end - - # Return the string, with each instance of "<tt>--</tt>" translated - # to an em-dash HTML entity, and each "<tt>---</tt>" translated to - # an en-dash HTML entity. Two reasons why: First, unlike the en- and - # em-dash syntax supported by +educate_dashes_oldschool+, it's - # compatible with existing entries written before SmartyPants 1.1, - # back when "<tt>--</tt>" was only used for em-dashes. Second, - # em-dashes are more common than en-dashes, and so it sort of makes - # sense that the shortcut should be shorter to type. (Thanks to - # Aaron Swartz for the idea.) - # - def educate_dashes_inverted(str) - str.gsub(/---/, '–').gsub(/--/, '—') - end - - # Return the string, with each instance of "<tt>...</tt>" translated - # to an ellipsis HTML entity. Also converts the case where there are - # spaces between the dots. - # - def educate_ellipses(str) - str.gsub('...', '…').gsub('. . .', '…') - end - - # Return the string, with <tt>``backticks''</tt>-style single quotes - # translated into HTML curly quote entities. - # - def educate_backticks(str) - str.gsub("``", '“').gsub("''", '”') - end - - # Return the string, with <tt>`backticks'</tt>-style single quotes - # translated into HTML curly quote entities. - # - def educate_single_backticks(str) - str.gsub("`", '‘').gsub("'", '’') - end - - # Return the string, with "educated" curly quote HTML entities. - # - def educate_quotes(str) - punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]' - - str = str.dup - - # Special case if the very first character is a quote followed by - # punctuation at a non-word-break. Close the quotes by brute - # force: - str.gsub!(/^'(?=#{punct_class}\B)/, '’') - str.gsub!(/^"(?=#{punct_class}\B)/, '”') - - # Special case for double sets of quotes, e.g.: - # <p>He said, "'Quoted' words in a larger quote."</p> - str.gsub!(/"'(?=\w)/, '“‘') - str.gsub!(/'"(?=\w)/, '‘“') - - # Special case for decade abbreviations (the '80s): - str.gsub!(/'(?=\d\ds)/, '’') - - close_class = %![^\ \t\r\n\\[\{\(\-]! - dec_dashes = '–|—' - - # Get most opening single quotes: - str.gsub!(/(\s| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)'(?=\w)/, - '\1‘') - # Single closing quotes: - str.gsub!(/(#{close_class})'/, '\1’') - str.gsub!(/'(\s|s\b|$)/, '’\1') - # Any remaining single quotes should be opening ones: - str.gsub!(/'/, '‘') - - # Get most opening double quotes: - str.gsub!(/(\s| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)"(?=\w)/, - '\1“') - # Double closing quotes: - str.gsub!(/(#{close_class})"/, '\1”') - str.gsub!(/"(\s|s\b|$)/, '”\1') - # Any remaining quotes should be opening ones: - str.gsub!(/"/, '“') - - str - end - - # Return the string, with each SmartyPants HTML entity translated to - # its ASCII counterpart. - # - def stupefy_entities(str) - str. - gsub(/–/, '-'). # en-dash - gsub(/—/, '--'). # em-dash - - gsub(/‘/, "'"). # open single quote - gsub(/’/, "'"). # close single quote - - gsub(/“/, '"'). # open double quote - gsub(/”/, '"'). # close double quote - - gsub(/…/, '...') # ellipsis - end - - # Return an array of the tokens comprising the string. Each token is - # either a tag (possibly with nested, tags contained therein, such - # as <tt><a href="<MTFoo>"></tt>, or a run of text between - # tags. Each element of the array is a two-element array; the first - # is either :tag or :text; the second is the actual value. - # - # Based on the <tt>_tokenize()</tt> subroutine from Brad Choate's - # MTRegex plugin. <http://www.bradchoate.com/past/mtregex.php> - # - # This is actually the easier variant using tag_soup, as used by - # Chad Miller in the Python port of SmartyPants. - # - def tokenize - tag_soup = /([^<]*)(<[^>]*>)/ - - tokens = [] - - prev_end = 0 - scan(tag_soup) { - tokens << [:text, $1] if $1 != "" - tokens << [:tag, $2] - - prev_end = $~.end(0) - } - - if prev_end < size - tokens << [:text, self[prev_end..-1]] - end - - tokens - end -end
\ No newline at end of file |