2007-01-22 15:36:51 +01:00
|
|
|
#--
|
|
|
|
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
|
|
|
#
|
|
|
|
# This file is part of Maruku.
|
|
|
|
#
|
|
|
|
# Maruku is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# Maruku is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with Maruku; if not, write to the Free Software
|
|
|
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
#++
|
|
|
|
|
|
|
|
|
|
|
|
module MaRuKu; module In; module Markdown; module SpanLevelParser
|
|
|
|
|
|
|
|
# This class helps me read and sanitize HTML blocks
|
|
|
|
|
|
|
|
# I tried to do this with REXML, but wasn't able to. (suggestions?)
|
|
|
|
|
|
|
|
class HTMLHelper
|
|
|
|
include MaRuKu::Strings
|
|
|
|
|
|
|
|
Tag = %r{^<(/)?(\w+)\s*([^>]*)>}m
|
2007-01-23 16:26:45 +01:00
|
|
|
PartialTag = %r{^<.*}m
|
|
|
|
|
2007-01-22 15:36:51 +01:00
|
|
|
EverythingElse = %r{^[^<]+}m
|
|
|
|
CommentStart = %r{^<!--}x
|
|
|
|
CommentEnd = %r{^.*-->}
|
|
|
|
TO_SANITIZE = ['img','hr']
|
|
|
|
|
|
|
|
attr_reader :rest
|
|
|
|
|
|
|
|
def initialize
|
|
|
|
@rest = ""
|
|
|
|
@tag_stack = []
|
|
|
|
@m = nil
|
|
|
|
@already = ""
|
2007-01-23 16:26:45 +01:00
|
|
|
self.state = :inside_element
|
2007-01-22 15:36:51 +01:00
|
|
|
end
|
2007-01-23 16:26:45 +01:00
|
|
|
|
|
|
|
attr_accessor :state # :inside_element, :inside_tag, :inside_comment,
|
2007-01-22 15:36:51 +01:00
|
|
|
|
|
|
|
def eat_this(line)
|
|
|
|
@rest = line + @rest
|
|
|
|
things_read = 0
|
|
|
|
until @rest.empty?
|
2007-01-23 16:26:45 +01:00
|
|
|
case self.state
|
|
|
|
when :inside_comment
|
|
|
|
if @m = CommentEnd.match(@rest)
|
|
|
|
@already += @m.pre_match + @m.to_s
|
|
|
|
@rest = @m.post_match
|
|
|
|
self.state = :inside_element
|
|
|
|
else
|
|
|
|
@already += @rest
|
|
|
|
@rest = ""
|
|
|
|
self.state = :inside_comment
|
2007-01-22 15:36:51 +01:00
|
|
|
end
|
2007-01-23 16:26:45 +01:00
|
|
|
when :inside_element
|
|
|
|
if @m = CommentStart.match(@rest)
|
|
|
|
things_read += 1
|
|
|
|
@already += @m.pre_match + @m.to_s
|
|
|
|
@rest = @m.post_match
|
|
|
|
self.state = :inside_comment
|
|
|
|
elsif @m = Tag.match(@rest) then
|
|
|
|
things_read += 1
|
|
|
|
handle_tag
|
|
|
|
self.state = :inside_element
|
|
|
|
elsif @m = PartialTag.match(@rest) then
|
|
|
|
@already += @m.pre_match
|
|
|
|
@rest = @m.post_match
|
|
|
|
@partial_tag = @m.to_s
|
|
|
|
self.state = :inside_tag
|
|
|
|
elsif @m = EverythingElse.match(@rest)
|
|
|
|
@already += @m.pre_match + @m.to_s
|
|
|
|
@rest = @m.post_match
|
|
|
|
self.state = :inside_element
|
|
|
|
else
|
|
|
|
error "Malformed HTML: not complete: #{@rest.inspect}"
|
|
|
|
end
|
|
|
|
when :inside_tag
|
|
|
|
if @m = /^[^>]*>/.match(@rest) then
|
|
|
|
@partial_tag += @m.to_s
|
|
|
|
@rest = @partial_tag + @m.post_match
|
|
|
|
@partial_tag = nil
|
|
|
|
self.state = :inside_element
|
|
|
|
else
|
|
|
|
@partial_tag += @rest
|
|
|
|
@rest = ""
|
|
|
|
self.state = :inside_tag
|
2007-01-22 15:36:51 +01:00
|
|
|
end
|
|
|
|
else
|
2007-01-23 16:26:45 +01:00
|
|
|
raise "Bug bug: state = #{self.state.inspect}"
|
2007-01-22 15:36:51 +01:00
|
|
|
end # not inside comment
|
|
|
|
|
|
|
|
# puts inspect
|
|
|
|
# puts "Read: #{@tag_stack.inspect}"
|
|
|
|
break if is_finished? and things_read>0
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2007-01-23 16:26:45 +01:00
|
|
|
def handle_tag()
|
|
|
|
@already += @m.pre_match
|
|
|
|
@rest = @m.post_match
|
|
|
|
|
|
|
|
is_closing = !!@m[1]
|
|
|
|
tag = @m[2]
|
|
|
|
attributes = @m[3]
|
|
|
|
|
|
|
|
|
|
|
|
is_single = false
|
|
|
|
if attributes =~ /\A(.*)\/\Z/
|
|
|
|
attributes = $1
|
|
|
|
is_single = true
|
|
|
|
end
|
2007-01-22 15:36:51 +01:00
|
|
|
|
2007-01-23 16:26:45 +01:00
|
|
|
# puts "READ TAG #{@m.to_s.inspect} tag = #{tag} closing? #{is_closing} single = #{is_single}"
|
|
|
|
|
|
|
|
if TO_SANITIZE.include? tag
|
|
|
|
attributes.strip!
|
|
|
|
# puts "Attributes: #{attributes.inspect}"
|
|
|
|
if attributes.size > 0
|
|
|
|
@already += '<%s %s />' % [tag, attributes]
|
|
|
|
else
|
|
|
|
@already += '<%s />' % [tag]
|
|
|
|
end
|
|
|
|
elsif is_closing
|
|
|
|
@already += @m.to_s
|
|
|
|
if @tag_stack.empty?
|
|
|
|
error "Malformed: closing tag #{tag.inspect} "+
|
|
|
|
"in empty list"
|
|
|
|
end
|
|
|
|
if @tag_stack.last != tag
|
|
|
|
error "Malformed: tag <#{tag}> "+
|
|
|
|
"closes <#{@tag_stack.last}>"
|
|
|
|
end
|
|
|
|
@tag_stack.pop
|
|
|
|
else
|
|
|
|
@already += @m.to_s
|
|
|
|
|
|
|
|
@tag_stack.push(tag) unless is_single
|
|
|
|
end
|
|
|
|
end
|
2007-01-22 15:36:51 +01:00
|
|
|
def error(s)
|
|
|
|
raise Exception, "Error: #{s} \n"+ inspect, caller
|
|
|
|
end
|
|
|
|
|
2007-01-23 16:26:45 +01:00
|
|
|
def inspect; "HTML READER\n state=#{self.state} "+
|
2007-01-22 15:36:51 +01:00
|
|
|
"match=#{@m.to_s.inspect}\n"+
|
|
|
|
"Tag stack = #{@tag_stack.inspect} \n"+
|
|
|
|
"Before:\n"+
|
|
|
|
add_tabs(@already,1,'|')+"\n"+
|
|
|
|
"After:\n"+
|
|
|
|
add_tabs(@rest,1,'|')+"\n"
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
def stuff_you_read
|
|
|
|
@already
|
|
|
|
end
|
|
|
|
|
|
|
|
def is_finished?
|
2007-01-23 16:26:45 +01:00
|
|
|
(self.state == :inside_element) and @tag_stack.empty?
|
2007-01-22 15:36:51 +01:00
|
|
|
end
|
|
|
|
end # html helper
|
|
|
|
|
|
|
|
end end end end
|