diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index ff887fc0..e1a466e9 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -77,6 +77,7 @@ def initialize(arg, encoding=nil) detect_encoding end @line = 0 + @term_encord = {} end # The current buffer (what we're going to read next) @@ -227,7 +228,7 @@ def read(term = nil, min_bytes = 1) def read_until(term) pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/ - term = encode(term) + term = @term_encord[term] ||= encode(term) until str = @scanner.scan_until(pattern) break if @source.nil? break if @source.eof? diff --git a/test/test_document.rb b/test/test_document.rb index cda4354f..609aeba2 100644 --- a/test/test_document.rb +++ b/test/test_document.rb @@ -403,6 +403,40 @@ def test_utf_16 assert_equal(expected_xml, actual_xml) end end + + class ReadUntilTest < Test::Unit::TestCase + def test_utf_8 + xml = <<-EOX.force_encoding("ASCII-8BIT") + +Hello world! +EOX + document = REXML::Document.new(xml) + assert_equal("UTF-8", document.encoding) + assert_equal(">", REXML::XPath.match(document, "/message")[0].attribute("testing").value) + end + + def test_utf_16le + xml = <<-EOX.encode("UTF-16LE").force_encoding("ASCII-8BIT") + +Hello world! +EOX + bom = "\ufeff".encode("UTF-16LE").force_encoding("ASCII-8BIT") + document = REXML::Document.new(bom + xml) + assert_equal("UTF-16", document.encoding) + assert_equal(">", REXML::XPath.match(document, "/message")[0].attribute("testing").value) + end + + def test_utf_16be + xml = <<-EOX.encode("UTF-16BE").force_encoding("ASCII-8BIT") + +Hello world! +EOX + bom = "\ufeff".encode("UTF-16BE").force_encoding("ASCII-8BIT") + document = REXML::Document.new(bom + xml) + assert_equal("UTF-16", document.encoding) + assert_equal(">", REXML::XPath.match(document, "/message")[0].attribute("testing").value) + end + end end end end