diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index b4547ba3..ff72ce44 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -269,10 +269,10 @@ def pull_event @source.ensure_buffer if @document_status == nil start_position = @source.position - if @source.match("/um, true) if md.nil? raise REXML::ParseException.new("Unclosed comment", @source) @@ -281,10 +281,10 @@ def pull_event raise REXML::ParseException.new("Malformed comment", @source) end return [ :comment, md[1] ] - elsif @source.match("DOCTYPE", true) + elsif @source.match?("DOCTYPE", true) base_error_message = "Malformed DOCTYPE" - unless @source.match(/\s+/um, true) - if @source.match(">") + unless @source.match?(/\s+/um, true) + if @source.match?(">") message = "#{base_error_message}: name is missing" else message = "#{base_error_message}: invalid name" @@ -293,10 +293,10 @@ def pull_event raise REXML::ParseException.new(message, @source) end name = parse_name(base_error_message) - if @source.match(/\s*\[/um, true) + if @source.match?(/\s*\[/um, true) id = [nil, nil, nil] @document_status = :in_doctype - elsif @source.match(/\s*>/um, true) + elsif @source.match?(/\s*>/um, true) id = [nil, nil, nil] @document_status = :after_doctype @source.ensure_buffer @@ -308,9 +308,9 @@ def pull_event # For backward compatibility id[1], id[2] = id[2], nil end - if @source.match(/\s*\[/um, true) + if @source.match?(/\s*\[/um, true) @document_status = :in_doctype - elsif @source.match(/\s*>/um, true) + elsif @source.match?(/\s*>/um, true) @document_status = :after_doctype @source.ensure_buffer else @@ -320,7 +320,7 @@ def pull_event end args = [:start_doctype, name, *id] if @document_status == :after_doctype - @source.match(/\s*/um, true) + @source.match?(/\s*/um, true) @stack << [ :end_doctype ] end return args @@ -331,14 +331,14 @@ def pull_event end end if @document_status == :in_doctype - @source.match(/\s*/um, true) # skip spaces + @source.match?(/\s*/um, true) # skip spaces start_position = @source.position - if @source.match("/um, true) raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil? return [ :elementdecl, "") + unless @source.match?(/\s+/um, true) + if @source.match?(">") message = "#{base_error_message}: name is missing" else message = "#{base_error_message}: invalid name" @@ -405,7 +405,7 @@ def pull_event id = parse_id(base_error_message, accept_external_id: true, accept_public_id: true) - unless @source.match(/\s*>/um, true) + unless @source.match?(/\s*>/um, true) message = "#{base_error_message}: garbage before end >" raise REXML::ParseException.new(message, @source) end @@ -419,7 +419,7 @@ def pull_event end elsif match = @source.match(/(%.*?;)\s*/um, true) return [ :externalentity, match[1] ] - elsif @source.match(/\]\s*>/um, true) + elsif @source.match?(/\]\s*>/um, true) @document_status = :after_doctype return [ :end_doctype ] end @@ -428,16 +428,16 @@ def pull_event end end if @document_status == :after_doctype - @source.match(/\s*/um, true) + @source.match?(/\s*/um, true) end begin start_position = @source.position - if @source.match("<", true) + if @source.match?("<", true) # :text's read_until may remain only "<" in buffer. In the # case, buffer is empty here. So we need to fill buffer # here explicitly. @source.ensure_buffer - if @source.match("/", true) + if @source.match?("/", true) @namespaces_restore_stack.pop last_tag = @tags.pop md = @source.match(Private::CLOSE_PATTERN, true) @@ -452,7 +452,7 @@ def pull_event raise REXML::ParseException.new(message, @source) end return [ :end_element, last_tag ] - elsif @source.match("!", true) + elsif @source.match?("!", true) md = @source.match(/([^>]*>)/um) #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" raise REXML::ParseException.new("Malformed node", @source) unless md @@ -470,7 +470,7 @@ def pull_event end raise REXML::ParseException.new( "Declarations can only occur "+ "in the doctype declaration.", @source) - elsif @source.match("?", true) + elsif @source.match?("?", true) return process_instruction else # Get the next tag @@ -651,7 +651,7 @@ def need_source_encoding_update?(xml_declaration_encoding) def parse_name(base_error_message) md = @source.match(Private::NAME_PATTERN, true) unless md - if @source.match(/\S/um) + if @source.match?(/\S/um) message = "#{base_error_message}: invalid name" else message = "#{base_error_message}: name is missing" @@ -693,34 +693,34 @@ def parse_id_invalid_details(accept_external_id:, accept_public_id:) public = /\A\s*PUBLIC/um system = /\A\s*SYSTEM/um - if (accept_external_id or accept_public_id) and @source.match(/#{public}/um) - if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um) + if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um) + if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um) return "public ID literal is missing" end - unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um) + unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um) return "invalid public ID literal" end if accept_public_id - if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um) + if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um) return "system ID literal is missing" end - unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um) + unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um) return "invalid system literal" end "garbage after system literal" else "garbage after public ID literal" end - elsif accept_external_id and @source.match(/#{system}/um) - if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um) + elsif accept_external_id and @source.match?(/#{system}/um) + if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um) return "system literal is missing" end - unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um) + unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um) return "invalid system literal" end "garbage after system literal" else - unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um) + unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um) return "invalid ID type" end "ID type is missing" @@ -729,7 +729,7 @@ def parse_id_invalid_details(accept_external_id:, def process_instruction name = parse_name("Malformed XML: Invalid processing instruction node") - if @source.match(/\s+/um, true) + if @source.match?(/\s+/um, true) match_data = @source.match(/(.*?)\?>/um, true) unless match_data raise ParseException.new("Malformed XML: Unclosed processing instruction", @source) @@ -737,7 +737,7 @@ def process_instruction content = match_data[1] else content = nil - unless @source.match("?>", true) + unless @source.match?("?>", true) raise ParseException.new("Malformed XML: Unclosed processing instruction", @source) end end @@ -767,9 +767,9 @@ def parse_attributes(prefixes) expanded_names = {} closed = false while true - if @source.match(">", true) + if @source.match?(">", true) return attributes, closed - elsif @source.match("/>", true) + elsif @source.match?("/>", true) closed = true return attributes, closed elsif match = @source.match(QNAME, true) @@ -777,7 +777,7 @@ def parse_attributes(prefixes) prefix = match[2] local_part = match[3] - unless @source.match(/\s*=\s*/um, true) + unless @source.match?(/\s*=\s*/um, true) message = "Missing attribute equal: <#{name}>" raise REXML::ParseException.new(message, @source) end @@ -793,7 +793,7 @@ def parse_attributes(prefixes) message = "Missing attribute value end quote: <#{name}>: <#{quote}>" raise REXML::ParseException.new(message, @source) end - @source.match(/\s*/um, true) + @source.match?(/\s*/um, true) if prefix == "xmlns" if local_part == "xml" if value != Private::XML_PREFIXED_NAMESPACE diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index 9370118f..27a6349a 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -136,6 +136,14 @@ def match(pattern, cons=false) end end + def match?(pattern, cons=false) + if cons + !@scanner.skip(pattern).nil? + else + !@scanner.match?(pattern).nil? + end + end + def position @scanner.pos end @@ -277,6 +285,23 @@ def match( pattern, cons=false ) md.nil? ? nil : @scanner end + def match?( pattern, cons=false ) + # To avoid performance issue, we need to increase bytes to read per scan + min_bytes = 1 + while true + if cons + n_matched_bytes = @scanner.skip(pattern) + else + n_matched_bytes = @scanner.match?(pattern) + end + return true if n_matched_bytes + return false if pattern.is_a?(String) + return false if @source.nil? + return false unless read(nil, min_bytes) + min_bytes *= 2 + end + end + def empty? super and ( @source.nil? || @source.eof? ) end