Revisions made by tamc up to 04:43 Tue 09 Aug 2005
Note, if you wish to edit or undo these revisions, please follow the links from here. You cannot do it from here, becuase someone may have edited the page more recently and because you may need a password to edit the page.
04:43 Tue 09 Aug 2005
77. [ /^\'/, '‘' ], # single opening78. [ /^"/, '“' ], # double opening79. [ /\'$/, '’' ], # single closing80. [ /\"$/, '”' ], # double closing81. [ /\.{3}$/, '\1…' ], # ellipsis82. [ '--', '\1—' ], # em dash83. [ '->', ' → ' ], # right arrow84. [ '-$', ' – ' ], # en dash- 77. [ /^\'/, '‘' ], # single opening
- 78. [ /^"/, '“' ], # double opening
- 79. [ /\'$/, '’' ], # single closing
- 80. [ /\"$/, '”' ], # double closing
- 81. [ /\.{3}$/, '\1…' ], # ellipsis
- 82. [ '--', '\1—' ], # em dash
- 83. [ '->', ' ? ' ], # right arrow
- 84. [ '-$', ' – ' ], # en dash
86. [ '(TM)', '™' ], # trademark87. [ '(R)', '®' ], # registered88. [ '(C)', '©' ] # copyright- 86. [ '(TM)', '™' ], # trademark
- 87. [ '(R)', '®' ], # registered
- 88. [ '(C)', '©' ] # copyright
492. str.gsub!( '&', '&' )493. str.gsub!( '"', '"' )494. str.gsub!( '<', '<')495. str.gsub!( '>', '>')- 492. str.gsub!( '&', '&' )
- 493. str.gsub!( '"', '"' )
- 494. str.gsub!( '<', '<')
- 495. str.gsub!( '>', '>')
04:43 Tue 09 Aug 2005
0. content moved from [[A re-write of Redcloth textile to html convertor]]- 0. h1. Textile to HTML
- 1.
- 2. Textile is a markup language that is meant to make it quick and easy to write basic web pages for wikis, blogs and other uses. Redcloth is a ruby class that converts textile to html, written by _why. Details are at http://www.whytheluckystiff.net/ruby/redcloth/ . Unfortunately, due to my own failings, I find it hard to understand _why's code, so I wrote a partial reimplementation bellow. This has the benefit of (for me) being clearer and easier to extend. The drawback is that it doesn't do all that _why's version does and is slower. But it is here in case anyone finds it useful.
- 3.
- 4. <pre>
- 5. <code>
- 6. require 'strscan'
- 7.
- 8. # Note: Does not implement anything beyond that on http://hobix.com/textile/
- 9. # i.e. No markdown, link titles, and converting out of range ASCII
- 10. # AND IT IS MUCH SLOWER THAN REDCLOTH !!
- 11. #
- 12. # Bodged together in a few evenings by Tom Counsell (tamc2@cam.ac.uk)
- 13. # Feel free to do whatever you like with the code.
- 14. #
- 15. # Use is the same as for Redcloth (except that none of the options work):
- 16. # html = TextileParser.new( "textile string").to_html
- 17. #
- 18. class TextileParser
- 19.
- 20. # These are applied first
- 21. BLOCK_COMMANDS = [
- 22. [ /<pre.*?>/, :pre_tag ],
- 23. [ /<code.*?>/, :code_tag ],
- 24. [ /<notextile.*?>/, :notextile_tag ],
- 25. [ /h(\d)(.*?)\.\s+/i, :heading_block ],
- 26. [ /bq(.*?)\.\s+/i, :quote_block ],
- 27. [ /(\*+|\#+)\s+/, :list_block ],
- 28. [ /table(.*?)\.\s+/i, :table_block ],
- 29. [ /\|/i, :unspecified_table_block ],
- 30. [ /\{.+?\}\.\s+\|/i, :unspecified_table_block ], # A styled table row
- 31. [ /p(.*?)\.\s+/i, :paragraph_block ],
- 32. [ /fn(\d+)(.*?)\.\s+/i, :footnote_block ],
- 33. [ /\S+/i, :unspecified_block ],
- 34. [ /\s+/i, :skip ],
- 35. ]
- 36.
- 37. # Then these
- 38. COMMANDS = [
- 39. [ /<pre.*?>/, :pre_tag ], # Can be inline as well as in a block
- 40. [ /<code.*?>/, :code_tag ], # Can be inline as well as in a block
- 41. [ /<notextile.*?>/, :notextile_tag ], # Can be inline as well as in a block
- 42. [ /@(\S.*?\S)@/, :quick_escape_code ],
- 43. [ /"(.+?)":(\S*\w\/?)/, :textile_link ],
- 44. [ /\[(.+?)\]:(\S*\w\/?)/, :textile_link_alias ],
- 45. [ /!(.+?)!(\S*)/, :image_link ],
- 46. [ /([A-Z0-9]+)\((.*?)\)/, :acronym ],
- 47. [ /(\S+?)\[(\d+)\]/, :footnote ]
- 48. ]
- 49.
- 50. # Bit of a bodge, but need a different definition of phrase and whitespace in a table
- 51. INLINE_COMMANDS = COMMANDS + [
- 52. [ /\S+/i, :phrase ],
- 53. [ /\s+/i, :space ],
- 54. ]
- 55.
- 56. TABLE_INLINE_COMMANDS = COMMANDS + [
- 57. [ /[^\s\t\r\n\f\|]+/i, :phrase ],
- 58. [ /\s+/i, :space ],
- 59. ]
- 60.
- 61. # Then these are applied to phrases from above
- 62. PHRASE_MODIFIERS = [
- 63. [ '__', 'i' ],
- 64. [ '**', 'b' ],
- 65. [ '_', 'em' ],
- 66. [ '*', 'strong' ],
- 67. [ '??', 'cite' ],
- 68. [ '-', 'del' ],
- 69. [ '+', 'ins' ],
- 70. [ '^', 'sup' ],
- 71. [ '~', 'sub' ],
- 72. [ '%', 'span' ], # How to avoid when people use % as in 3.0% growth?
- 73. ].collect! { |regexp,tag| [ /^#{Regexp.escape(regexp)}/, /#{Regexp.escape(regexp)}$/, tag ] }
- 74.
- 75. # Character substitutions done last to any words
- 76. GLYPHS = [
- 77. [ /^\'/, '‘' ], # single opening
- 78. [ /^"/, '“' ], # double opening
- 79. [ /\'$/, '’' ], # single closing
- 80. [ /\"$/, '”' ], # double closing
- 81. [ /\.{3}$/, '\1…' ], # ellipsis
- 82. [ '--', '\1—' ], # em dash
- 83. [ '->', ' → ' ], # right arrow
- 84. [ '-$', ' – ' ], # en dash
- 85.
- 86. [ '(TM)', '™' ], # trademark
- 87. [ '(R)', '®' ], # registered
- 88. [ '(C)', '©' ] # copyright
- 89. ]
- 90.
- 91. # This is just used to give the ouput html a more beautiful layout
- 92. # All tags in here will get a newline after they are output
- 93. # and the indent for following lines increased by the number
- 94. INDENTING_TAGS = { 'ul' => 1,
- 95. 'ol' => 1,
- 96. 'li' => 0,
- 97. 'blockquote' => 1,
- 98. 'table' => 1,
- 99. 'tr' => 1,
- 100. 'td' => 0,
- 101. 'th' => 0,
- 102. 'p' => 0,
- 103. }
- 104.
- 105. def initialize( text )
- 106. @text = text
- 107. end
- 108.
- 109. def to_html( settings = nil )
- 110. reset
- 111. convert_text
- 112. return html.chomp.chomp # the tests don't have any trailing \ns
- 113. end
- 114.
- 115. private
- 116.
- 117. ## Methods dealing with blocks of text are called first
- 118.
- 119. def convert_text
- 120. until @scanner.eos?
- 121. send( BLOCK_COMMANDS.detect { |regexp, method| @scanner.scan( regexp ) }[1] )
- 122. add_to_html "\n" # Prettier html if extra space between blocks
- 123. end
- 124. insert_any_link_aliases
- 125. html
- 126. end
- 127.
- 128. # These are all the block commands
- 129.
- 130. def paragraph_block
- 131. tag( 'p', parse_attributes( @scanner[1] ) ) do
- 132. standard_paragraph
- 133. end
- 134. end
- 135.
- 136. def quote_block
- 137. tag( "blockquote", parse_attributes( @scanner[1] ) ) do
- 138. tag 'p' do
- 139. standard_paragraph
- 140. end
- 141. end
- 142. end
- 143.
- 144. def unspecified_block
- 145. @scanner.unscan
- 146. tag 'p' do
- 147. standard_paragraph
- 148. end
- 149. end
- 150.
- 151. def heading_block
- 152. tag( "h#{@scanner[1]}", parse_attributes( @scanner[2] ) ) do
- 153. standard_line # Assume titles may only be on one line
- 154. end
- 155. end
- 156.
- 157. def list_block
- 158. ordered = list_ordered? # See what sort of list we have
- 159. depth = list_depth
- 160. @scanner.unscan # So that the lines can be scanned individually
- 161. tag( ordered ? 'ol' : 'ul' ) do
- 162. list_line( ordered, depth ) until end_of_list?( depth )
- 163. end
- 164. end
- 165.
- 166. def table_block
- 167. tag( 'table', parse_attributes( @scanner[1] ) ) do
- 168. table_line until end_of_paragraph?
- 169. end
- 170. end
- 171.
- 172. def unspecified_table_block
- 173. @scanner.unscan
- 174. tag( 'table' ) do
- 175. table_line until end_of_paragraph?
- 176. end
- 177. end
- 178.
- 179. def footnote_block
- 180. number = @scanner[1]
- 181. attributes = parse_attributes( @scanner[2] )
- 182. attributes[:id] = "fn#{number}"
- 183. tag 'p', attributes do
- 184. add_to_html "<sup>#{number}</sup> "
- 185. standard_line until end_of_paragraph?
- 186. end
- 187. end
- 188.
- 189. # Now descend into methods dealing with lines of text
- 190.
- 191. def pre_tag
- 192. escape_tag 'pre'
- 193. end
- 194.
- 195. def code_tag
- 196. escape_tag 'code'
- 197. end
- 198.
- 199. def notextile_tag
- 200. escape_tag 'notextile', false
- 201. end
- 202.
- 203. # This escapes until a matching close tag
- 204. def escape_tag( tag, include_tag_in_output = true )
- 205. add_to_html( @scanner.matched ) if include_tag_in_output
- 206. level = 1
- 207. while level > 0
- 208. break unless @scanner.scan(/(.*?)(<(\/)?#{tag}.*?>)/m) # Breaks if no closing tag
- 209. add_to_html( htmlesc( @scanner[1] || "" ) )
- 210. level = level + ( @scanner[3] ? -1 : 1 )
- 211. add_to_html( htmlesc( @scanner[2] ) ) unless level == 0
- 212. end
- 213. add_to_html "</#{tag}>" if include_tag_in_output
- 214. end
- 215.
- 216. def quick_escape_code
- 217. tag 'code' do
- 218. add_to_html( htmlesc( @scanner[1] ) )
- 219. end
- 220. end
- 221.
- 222. def list_line( ordered, depth )
- 223. tag 'li' do
- 224. @scanner.scan(/(#+|\*+)\s+/)
- 225. if ( list_ordered? == ordered ) && ( list_depth == depth )
- 226. standard_line
- 227. else # Recursive for sub lists
- 228. list_block
- 229. end
- 230. end
- 231. end
- 232.
- 233. def table_line
- 234. # Are their row attributes at that start of the line?
- 235. attributes = @scanner.scan(/(\{.+?\})\.\s+/) ? parse_attributes(@scanner[1]) : {}
- 236. @scanner.scan(/\|/) # Get rid of any leading cell opening
- 237. tag( 'tr', attributes ) do
- 238. table_cell until end_of_table_line?
- 239. end
- 240. end
- 241.
- 242. def standard_paragraph
- 243. until end_of_paragraph?
- 244. send( INLINE_COMMANDS.detect { |regexp, method| @scanner.scan( regexp ) }[1] )
- 245. end
- 246. end
- 247.
- 248. def standard_line
- 249. until end_of_line?
- 250. send( INLINE_COMMANDS.detect { |regexp, method| @scanner.scan( regexp ) }[1] )
- 251. end
- 252. end
- 253.
- 254. # Now descend into methods dealing with phrases
- 255.
- 256. def table_cell
- 257. # Style defined at start of cell ?
- 258. attributes = @scanner.scan(/(_)?(\S*?)\.\s*/) ? parse_attributes(@scanner[2]) : {}
- 259. tag( @scanner[1] ? 'th' : 'td', attributes) do
- 260. until end_of_table_cell?
- 261. send( TABLE_INLINE_COMMANDS.detect { |regexp, method| @scanner.scan( regexp ) }[1] )
- 262. end
- 263. end
- 264. end
- 265.
- 266. def footnote
- 267. add_to_html "#{@scanner[1]}<sup><a href=\"#fn#{@scanner[2]}\">#{@scanner[2]}</a></sup>"
- 268. end
- 269.
- 270. def acronym
- 271. add_to_html "<acronym title=\"#{@scanner[2]}\">#{@scanner[1]}</acronym>"
- 272. end
- 273.
- 274. def phrase
- 275. word = @scanner.matched
- 276.
- 277. return add_to_html( parse_glyphs( word ) ) unless word =~ /\w+/ # If a word is entirely symbols then we will leave it in peace.
- 278.
- 279. # Open tags
- 280. PHRASE_MODIFIERS.each do |start_r, end_r, tag|
- 281. if word =~ start_r
- 282. word = $' # The bit after the match
- 283. # Look for matching brackets that indicate there are attributes
- 284. if word =~ /(\(.+?\)|\{.+?\}|\[.+?\])/
- 285. open_tag( tag, parse_attributes( $1 ) )
- 286. word = $'
- 287. else
- 288. open_tag tag
- 289. end
- 290. break
- 291. end
- 292. end
- 293.
- 294. # Close tags
- 295. end_tag = nil
- 296. PHRASE_MODIFIERS.each do |start_r, end_r, tag|
- 297. if word =~ end_r
- 298. end_tag = tag
- 299. word = $` # The bit before the match
- 300. break
- 301. end
- 302. end
- 303. add_to_html parse_glyphs( word )
- 304. close_tag( end_tag ) if end_tag
- 305. end
- 306.
- 307. def space
- 308. add_to_html @scanner.matched
- 309. end
- 310.
- 311. def image_link
- 312. @scanner.matched =~ /^!([<>]*)(.*?)(!|\((.*?)\)!)($|(:(.+?)$))/
- 313. alignment, src, title, url = $1, $2, $4, $7
- 314. attributes = {}
- 315. attributes[:style] = 'float:right' if alignment == '>'
- 316. attributes[:style] = 'float:left' if alignment == '<'
- 317.
- 318. attributes[:src] = src
- 319. attributes[:alt] = attributes[ :title ] = title if title
- 320. if url
- 321. tag 'a', { :href => url } do
- 322. open_tag 'img', attributes, true
- 323. end
- 324. else
- 325. open_tag 'img', attributes, true
- 326. end
- 327. end
- 328.
- 329. def skip
- 330. # Do nothing !
- 331. end
- 332.
- 333. def textile_link
- 334. add_to_html "<a href=\"#{@scanner[2]}\">#{@scanner[1]}</a>"
- 335. end
- 336.
- 337. def textile_link_alias
- 338. # These are saved for later resubstitution
- 339. @aliases[ @scanner[1] ] = @scanner[2]
- 340. end
- 341.
- 342. # These feels clunky, and is done last
- 343. def insert_any_link_aliases
- 344. @aliases.each do |als, href|
- 345. html.gsub!( /href="#{als}"/, "href=\"#{href}\"" )
- 346. end
- 347. end
- 348.
- 349. # These are helper methods that make sure html is properly closed and indented
- 350.
- 351. def tag( tag, attributes = {} )
- 352. open_tag( tag, attributes )
- 353. yield
- 354. close_tag tag
- 355. end
- 356.
- 357. def close_tag( tag = :all_tags )
- 358. # Check the tag has been opened
- 359. return unless open_tags.include?( tag ) || ( tag == :all_tags )
- 360.
- 361. # Close all tags up to that tag (in case one was not closed)
- 362. until open_tags.empty?
- 363. open_tag = open_tags.pop
- 364. # This is just stuff to make the html look pretty
- 365. if (indent = INDENTING_TAGS[ open_tag ] )
- 366. if indent == 0
- 367. add_to_html "</#{open_tag}>"
- 368. add_to_html "\n"
- 369. else
- 370. add_to_html "\n" unless html =~ /\n$/
- 371. @indent -= indent
- 372. add_to_html( "\t" * @indent )
- 373. add_to_html "</#{open_tag}>"
- 374. add_to_html "\n"
- 375. end
- 376. else
- 377. add_to_html "</#{open_tag}>"
- 378. end
- 379. return if open_tag == tag
- 380. end
- 381. end
- 382.
- 383. def open_tag( tag, attributes = {}, no_close_tag = false )
- 384. add_to_html( "\t" * @indent )
- 385.
- 386. add_to_html "<#{tag}"
- 387.
- 388. attributes.each { |key, value| add_to_html( " #{key.to_s}=\"#{value.to_s}\"" ) }
- 389.
- 390. if no_close_tag
- 391. add_to_html " />"
- 392. return
- 393. end
- 394.
- 395. add_to_html ">"
- 396.
- 397. if (indent = INDENTING_TAGS[ tag ] )
- 398. add_to_html "\n" unless indent == 0
- 399. @indent += indent
- 400. end
- 401. open_tags << tag
- 402. end
- 403.
- 404. def open_tags
- 405. @open_tags ||= []
- 406. end
- 407.
- 408. def parse_attributes( attribute_text )
- 409. return {} unless attribute_text && attribute_text != ""
- 410. a = { :style => "" }
- 411.
- 412. # The hand-entered classes, ids, styles and langs
- 413. # These are replaced with "" so their content cannot be matched below
- 414. a[:lang] = $1 if attribute_text =~ /\[(.+?)\]/
- 415. a[:class] = $1 if attribute_text.sub!(/\((.+?)\)/,'')
- 416. a[:class], a[:id] = $1, $2 if a[:class] =~ /^(.*?)#(.*)$/
- 417. a[:style] << "#{$1};" if attribute_text.sub!(/\{(.+?)\}/,'')
- 418.
- 419. # Various padding and indents
- 420. a[:style] << "padding-left:#{ $1.length }em;" if attribute_text =~ /(\(+)/
- 421. a[:style] << "padding-right:#{ $1.length }em;" if attribute_text =~ /(\)+)/
- 422.
- 423. # The various alignments
- 424. a[:style] << "text-align:left;" if attribute_text =~ /<(?!>)/
- 425. a[:style] << "text-align:right;" if attribute_text =~ /(?!<)>/
- 426. a[:style] << "text-align:justify;" if attribute_text =~ /<>/
- 427. a[:style] << "text-align:center;" if attribute_text =~ /=/
- 428.
- 429. #Various column spans on tables
- 430. a[:colspan] = $1 if attribute_text =~ /\\(\d+)/
- 431. a[:rowspan] = $1 if attribute_text =~ /\/(\d+)/
- 432.
- 433. #Vertical alignments on tables
- 434. a[:style] << "vertical-align:top;" if attribute_text =~ /\^/
- 435. a[:style] << "vertical-align:bottom;" if attribute_text =~ /\~/
- 436.
- 437. # Get rid of any empty attributes before returning
- 438. a.delete_if { |k,v| !v || (v == "") }
- 439. end
- 440.
- 441. def parse_glyphs( word )
- 442. GLYPHS.each do |regexp,replacement|
- 443. word.gsub!( regexp, replacement )
- 444. end
- 445. word
- 446. end
- 447.
- 448. # Now some helper methods for spotting the ends of sections
- 449.
- 450. def end_of_paragraph?
- 451. return true if @scanner.eos?
- 452. @scanner.scan(/\n{2,}/)
- 453. end
- 454.
- 455. def end_of_list?( depth )
- 456. return true if @scanner.eos?
- 457. return true unless @scanner.check(/(#+|\*+)\s+/) # Not a list any more
- 458. return true if list_depth < depth # End of this sub list
- 459. @scanner.scan(/\n{2,}/)
- 460. end
- 461.
- 462. def end_of_line?
- 463. return true if @scanner.eos?
- 464. return true if @scanner.check(/\n{2,}/)
- 465. @scanner.scan(/\n/)
- 466. end
- 467.
- 468. def end_of_table_line?
- 469. return true if @scanner.eos?
- 470. return true if @scanner.check(/\n{2,}/)
- 471. @scanner.scan(/\|\s*\n/)
- 472. end
- 473.
- 474. def end_of_table_cell?
- 475. return true if @scanner.eos?
- 476. return true if @scanner.check(/\n/)
- 477. return true if @scanner.check(/\|\s*\n/)
- 478. @scanner.scan(/\|/)
- 479. end
- 480.
- 481. # Now some random helper methods for decoding
- 482.
- 483. def list_ordered?
- 484. @scanner.matched[0,1] == '#'
- 485. end
- 486.
- 487. def list_depth
- 488. @scanner[1].size
- 489. end
- 490.
- 491. def htmlesc( str )
- 492. str.gsub!( '&', '&' )
- 493. str.gsub!( '"', '"' )
- 494. str.gsub!( '<', '<')
- 495. str.gsub!( '>', '>')
- 496. str
- 497. end
- 498.
- 499. # Now the low level matching functions
- 500.
- 501. def add_to_html( object )
- 502. @html << object.to_s
- 503. end
- 504.
- 505. def html
- 506. @html
- 507. end
- 508.
- 509. def reset
- 510. @html, @scanner = "" , StringScanner.new( @text )
- 511. @aliases = {}
- 512. @indent = 0
- 513. end
- 514. end
- 515. </code>
- 516. </pre>
04:43 Tue 09 Aug 2005
- 0. content moved from [[A re-write of Redcloth textile to html convertor]]