2007-06-05 16:34:49 -05:00
|
|
|
require File.join(File.dirname(__FILE__), 'preamble')
|
|
|
|
|
2007-07-04 17:36:59 -05:00
|
|
|
require 'html5/inputstream'
|
2007-06-05 16:34:49 -05:00
|
|
|
|
|
|
|
class HTMLInputStreamTest < Test::Unit::TestCase
|
2007-07-04 17:36:59 -05:00
|
|
|
include HTML5
|
2007-06-05 16:34:49 -05:00
|
|
|
|
2008-01-21 11:59:55 -06:00
|
|
|
def getc stream
|
|
|
|
if String.method_defined? :force_encoding
|
|
|
|
stream.char.force_encoding('binary')
|
|
|
|
else
|
|
|
|
stream.char
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2007-06-05 16:34:49 -05:00
|
|
|
def test_char_ascii
|
2007-06-08 17:26:00 -05:00
|
|
|
stream = HTMLInputStream.new("'", :encoding=>'ascii')
|
2007-06-05 16:34:49 -05:00
|
|
|
assert_equal('ascii', stream.char_encoding)
|
|
|
|
assert_equal("'", stream.char)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_char_null
|
|
|
|
stream = HTMLInputStream.new("\x00")
|
2008-01-21 11:59:55 -06:00
|
|
|
assert_equal("\xef\xbf\xbd", getc(stream))
|
2007-06-05 16:34:49 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_char_utf8
|
2007-06-08 17:26:00 -05:00
|
|
|
stream = HTMLInputStream.new("\xe2\x80\x98", :encoding=>'utf-8')
|
2007-06-05 16:34:49 -05:00
|
|
|
assert_equal('utf-8', stream.char_encoding)
|
2008-01-21 11:59:55 -06:00
|
|
|
assert_equal("\xe2\x80\x98", getc(stream))
|
2007-06-05 16:34:49 -05:00
|
|
|
end
|
|
|
|
|
2007-06-08 17:26:00 -05:00
|
|
|
def test_char_win1252
|
|
|
|
stream = HTMLInputStream.new("\xa2\xc5\xf1\x92\x86")
|
|
|
|
assert_equal('windows-1252', stream.char_encoding)
|
2008-01-21 11:59:55 -06:00
|
|
|
assert_equal("\xc2\xa2", getc(stream))
|
|
|
|
assert_equal("\xc3\x85", getc(stream))
|
|
|
|
assert_equal("\xc3\xb1", getc(stream))
|
|
|
|
assert_equal("\xe2\x80\x99", getc(stream))
|
|
|
|
assert_equal("\xe2\x80\xa0", getc(stream))
|
2007-06-08 17:26:00 -05:00
|
|
|
end
|
|
|
|
|
2007-06-05 16:34:49 -05:00
|
|
|
def test_bom
|
|
|
|
stream = HTMLInputStream.new("\xef\xbb\xbf" + "'")
|
|
|
|
assert_equal('utf-8', stream.char_encoding)
|
|
|
|
assert_equal("'", stream.char)
|
|
|
|
end
|
|
|
|
|
2007-06-06 08:12:03 -05:00
|
|
|
begin
|
|
|
|
require 'iconv'
|
|
|
|
|
|
|
|
def test_utf_16
|
2007-12-17 03:17:43 -06:00
|
|
|
input = Iconv.new('utf-16', 'utf-8').iconv(' '*1025)
|
|
|
|
stream = HTMLInputStream.new(input)
|
|
|
|
assert('utf-16-le', stream.char_encoding)
|
|
|
|
assert_equal(1025, stream.chars_until(' ', true).length)
|
2007-06-06 08:12:03 -05:00
|
|
|
end
|
|
|
|
rescue LoadError
|
|
|
|
puts "iconv not found, skipping iconv tests"
|
2007-06-05 16:34:49 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_newlines
|
|
|
|
stream = HTMLInputStream.new("\xef\xbb\xbf" + "a\nbb\r\nccc\rdddd")
|
2007-06-22 03:12:08 -05:00
|
|
|
assert_equal([1,0], stream.position)
|
2007-06-05 16:34:49 -05:00
|
|
|
assert_equal("a\nbb\n", stream.chars_until('c'))
|
2007-06-08 17:26:00 -05:00
|
|
|
assert_equal([3,0], stream.position)
|
2007-06-05 16:34:49 -05:00
|
|
|
assert_equal("ccc\ndddd", stream.chars_until('x'))
|
2007-06-08 17:26:00 -05:00
|
|
|
assert_equal([4,4], stream.position)
|
2007-06-22 03:12:08 -05:00
|
|
|
assert_equal([1,2,3], stream.instance_eval {@line_lengths})
|
2007-06-05 16:34:49 -05:00
|
|
|
end
|
|
|
|
end
|