Skip to content

Commit 1b59a4d

Browse files
committed
REXML 3.1.9.1
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_6@67940 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
1 parent 1a38986 commit 1b59a4d

File tree

11 files changed

+607
-80
lines changed

11 files changed

+607
-80
lines changed

lib/rexml/doctype.rb

Lines changed: 52 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,44 @@
77
require_relative 'xmltokens'
88

99
module REXML
10+
class ReferenceWriter
11+
def initialize(id_type,
12+
public_id_literal,
13+
system_literal,
14+
context=nil)
15+
@id_type = id_type
16+
@public_id_literal = public_id_literal
17+
@system_literal = system_literal
18+
if context and context[:prologue_quote] == :apostrophe
19+
@default_quote = "'"
20+
else
21+
@default_quote = "\""
22+
end
23+
end
24+
25+
def write(output)
26+
output << " #{@id_type}"
27+
if @public_id_literal
28+
if @public_id_literal.include?("'")
29+
quote = "\""
30+
else
31+
quote = @default_quote
32+
end
33+
output << " #{quote}#{@public_id_literal}#{quote}"
34+
end
35+
if @system_literal
36+
if @system_literal.include?("'")
37+
quote = "\""
38+
elsif @system_literal.include?("\"")
39+
quote = "'"
40+
else
41+
quote = @default_quote
42+
end
43+
output << " #{quote}#{@system_literal}#{quote}"
44+
end
45+
end
46+
end
47+
1048
# Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
1149
# ... >. DOCTYPES can be used to declare the DTD of a document, as well as
1250
# being used to declare entities used in the document.
@@ -50,6 +88,8 @@ def initialize( first, parent=nil )
5088
super( parent )
5189
@name = first.name
5290
@external_id = first.external_id
91+
@long_name = first.instance_variable_get(:@long_name)
92+
@uri = first.instance_variable_get(:@uri)
5393
elsif first.kind_of? Array
5494
super( parent )
5595
@name = first[0]
@@ -108,19 +148,17 @@ def clone
108148
# Ignored
109149
def write( output, indent=0, transitive=false, ie_hack=false )
110150
f = REXML::Formatters::Default.new
111-
c = context
112-
if c and c[:prologue_quote] == :apostrophe
113-
quote = "'"
114-
else
115-
quote = "\""
116-
end
117151
indent( output, indent )
118152
output << START
119153
output << ' '
120154
output << @name
121-
output << " #{@external_id}" if @external_id
122-
output << " #{quote}#{@long_name}#{quote}" if @long_name
123-
output << " #{quote}#{@uri}#{quote}" if @uri
155+
if @external_id
156+
reference_writer = ReferenceWriter.new(@external_id,
157+
@long_name,
158+
@uri,
159+
context)
160+
reference_writer.write(output)
161+
end
124162
unless @children.empty?
125163
output << ' ['
126164
@children.each { |child|
@@ -259,16 +297,11 @@ def initialize name, middle, pub, sys
259297
end
260298

261299
def to_s
262-
c = nil
263-
c = parent.context if parent
264-
if c and c[:prologue_quote] == :apostrophe
265-
quote = "'"
266-
else
267-
quote = "\""
268-
end
269-
notation = "<!NOTATION #{@name} #{@middle}"
270-
notation << " #{quote}#{@public}#{quote}" if @public
271-
notation << " #{quote}#{@system}#{quote}" if @system
300+
context = nil
301+
context = parent.context if parent
302+
notation = "<!NOTATION #{@name}"
303+
reference_writer = ReferenceWriter.new(@middle, @public, @system, context)
304+
reference_writer.write(notation)
272305
notation << ">"
273306
notation
274307
end

lib/rexml/parsers/baseparser.rb

Lines changed: 139 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ class BaseParser
5050

5151
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
5252
DOCTYPE_END = /\A\s*\]\s*>/um
53-
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
5453
ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
5554
COMMENT_START = /\A<!--/u
5655
COMMENT_PATTERN = /<!--(.*?)-->/um
@@ -61,15 +60,14 @@ class BaseParser
6160
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
6261
INSTRUCTION_START = /\A<\?/u
6362
INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
64-
TAG_MATCH = /^<((?>#{QNAME_STR}))/um
65-
CLOSE_MATCH = /^\s*<\/(#{QNAME_STR})\s*>/um
63+
TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
64+
CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
6665

6766
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
6867
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
6968
STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
7069

7170
ENTITY_START = /\A\s*<!ENTITY/
72-
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
7371
ELEMENTDECL_START = /\A\s*<!ELEMENT/um
7472
ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
7573
SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
@@ -83,9 +81,6 @@ class BaseParser
8381
ATTDEF_RE = /#{ATTDEF}/
8482
ATTLISTDECL_START = /\A\s*<!ATTLIST/um
8583
ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
86-
NOTATIONDECL_START = /\A\s*<!NOTATION/um
87-
PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
88-
SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
8984

9085
TEXT_PATTERN = /\A([^<]*)/um
9186

@@ -103,6 +98,11 @@ class BaseParser
10398
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
10499
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
105100

101+
NOTATIONDECL_START = /\A\s*<!NOTATION/um
102+
EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
103+
EXTERNAL_ID_SYSTEM = /\A\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um
104+
PUBLIC_ID = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um
105+
106106
EREFERENCE = /&(?!#{NAME};)/
107107

108108
DEFAULT_ENTITIES = {
@@ -195,11 +195,9 @@ def pull_event
195195
return [ :end_document ] if empty?
196196
return @stack.shift if @stack.size > 0
197197
#STDERR.puts @source.encoding
198-
@source.read if @source.buffer.size<2
199198
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
200199
if @document_status == nil
201-
#@source.consume( /^\s*/um )
202-
word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
200+
word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
203201
word = word[1] unless word.nil?
204202
#STDERR.puts "WORD = #{word.inspect}"
205203
case word
@@ -224,38 +222,49 @@ def pull_event
224222
when INSTRUCTION_START
225223
return process_instruction
226224
when DOCTYPE_START
227-
md = @source.match( DOCTYPE_PATTERN, true )
225+
base_error_message = "Malformed DOCTYPE"
226+
@source.match(DOCTYPE_START, true)
228227
@nsstack.unshift(curr_ns=Set.new)
229-
identity = md[1]
230-
close = md[2]
231-
identity =~ IDENTITY
232-
name = $1
233-
raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
234-
pub_sys = $2.nil? ? nil : $2.strip
235-
long_name = $4.nil? ? nil : $4.strip
236-
uri = $6.nil? ? nil : $6.strip
237-
args = [ :start_doctype, name, pub_sys, long_name, uri ]
238-
if close == ">"
228+
name = parse_name(base_error_message)
229+
if @source.match(/\A\s*\[/um, true)
230+
id = [nil, nil, nil]
231+
@document_status = :in_doctype
232+
elsif @source.match(/\A\s*>/um, true)
233+
id = [nil, nil, nil]
239234
@document_status = :after_doctype
240-
@source.read if @source.buffer.size<2
241-
md = @source.match(/^\s*/um, true)
242-
@stack << [ :end_doctype ]
243235
else
244-
@document_status = :in_doctype
236+
id = parse_id(base_error_message,
237+
accept_external_id: true,
238+
accept_public_id: false)
239+
if id[0] == "SYSTEM"
240+
# For backward compatibility
241+
id[1], id[2] = id[2], nil
242+
end
243+
if @source.match(/\A\s*\[/um, true)
244+
@document_status = :in_doctype
245+
elsif @source.match(/\A\s*>/um, true)
246+
@document_status = :after_doctype
247+
else
248+
message = "#{base_error_message}: garbage after external ID"
249+
raise REXML::ParseException.new(message, @source)
250+
end
251+
end
252+
args = [:start_doctype, name, *id]
253+
if @document_status == :after_doctype
254+
@source.match(/\A\s*/um, true)
255+
@stack << [ :end_doctype ]
245256
end
246257
return args
247-
when /^\s+/
258+
when /\A\s+/
248259
else
249260
@document_status = :after_doctype
250-
@source.read if @source.buffer.size<2
251-
md = @source.match(/\s*/um, true)
252261
if @source.encoding == "UTF-8"
253262
@source.buffer.force_encoding(::Encoding::UTF_8)
254263
end
255264
end
256265
end
257266
if @document_status == :in_doctype
258-
md = @source.match(/\s*(.*?>)/um)
267+
md = @source.match(/\A\s*(.*?>)/um)
259268
case md[1]
260269
when SYSTEMENTITY
261270
match = @source.match( SYSTEMENTITY, true )[1]
@@ -312,24 +321,35 @@ def pull_event
312321
end
313322
return [ :attlistdecl, element, pairs, contents ]
314323
when NOTATIONDECL_START
315-
md = nil
316-
if @source.match( PUBLIC )
317-
md = @source.match( PUBLIC, true )
318-
vals = [md[1],md[2],md[4],md[6]]
319-
elsif @source.match( SYSTEM )
320-
md = @source.match( SYSTEM, true )
321-
vals = [md[1],md[2],nil,md[4]]
322-
else
323-
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
324+
base_error_message = "Malformed notation declaration"
325+
unless @source.match(/\A\s*<!NOTATION\s+/um, true)
326+
if @source.match(/\A\s*<!NOTATION\s*>/um)
327+
message = "#{base_error_message}: name is missing"
328+
else
329+
message = "#{base_error_message}: invalid declaration name"
330+
end
331+
raise REXML::ParseException.new(message, @source)
324332
end
325-
return [ :notationdecl, *vals ]
333+
name = parse_name(base_error_message)
334+
id = parse_id(base_error_message,
335+
accept_external_id: true,
336+
accept_public_id: true)
337+
unless @source.match(/\A\s*>/um, true)
338+
message = "#{base_error_message}: garbage before end >"
339+
raise REXML::ParseException.new(message, @source)
340+
end
341+
return [:notationdecl, name, *id]
326342
when DOCTYPE_END
327343
@document_status = :after_doctype
328344
@source.match( DOCTYPE_END, true )
329345
return [ :end_doctype ]
330346
end
331347
end
348+
if @document_status == :after_doctype
349+
@source.match(/\A\s*/um, true)
350+
end
332351
begin
352+
@source.read if @source.buffer.size<2
333353
if @source.buffer[0] == ?<
334354
if @source.buffer[1] == ?/
335355
@nsstack.shift
@@ -368,6 +388,7 @@ def pull_event
368388
unless md
369389
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
370390
end
391+
@document_status = :in_element
371392
prefixes = Set.new
372393
prefixes << md[2] if md[2]
373394
@nsstack.unshift(curr_ns=Set.new)
@@ -473,6 +494,85 @@ def need_source_encoding_update?(xml_declaration_encoding)
473494
true
474495
end
475496

497+
def parse_name(base_error_message)
498+
md = @source.match(/\A\s*#{NAME}/um, true)
499+
unless md
500+
if @source.match(/\A\s*\S/um)
501+
message = "#{base_error_message}: invalid name"
502+
else
503+
message = "#{base_error_message}: name is missing"
504+
end
505+
raise REXML::ParseException.new(message, @source)
506+
end
507+
md[1]
508+
end
509+
510+
def parse_id(base_error_message,
511+
accept_external_id:,
512+
accept_public_id:)
513+
if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
514+
pubid = system = nil
515+
pubid_literal = md[1]
516+
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
517+
system_literal = md[2]
518+
system = system_literal[1..-2] if system_literal # Remove quote
519+
["PUBLIC", pubid, system]
520+
elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
521+
pubid = system = nil
522+
pubid_literal = md[1]
523+
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
524+
["PUBLIC", pubid, nil]
525+
elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
526+
system = nil
527+
system_literal = md[1]
528+
system = system_literal[1..-2] if system_literal # Remove quote
529+
["SYSTEM", nil, system]
530+
else
531+
details = parse_id_invalid_details(accept_external_id: accept_external_id,
532+
accept_public_id: accept_public_id)
533+
message = "#{base_error_message}: #{details}"
534+
raise REXML::ParseException.new(message, @source)
535+
end
536+
end
537+
538+
def parse_id_invalid_details(accept_external_id:,
539+
accept_public_id:)
540+
public = /\A\s*PUBLIC/um
541+
system = /\A\s*SYSTEM/um
542+
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
543+
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
544+
return "public ID literal is missing"
545+
end
546+
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
547+
return "invalid public ID literal"
548+
end
549+
if accept_public_id
550+
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
551+
return "system ID literal is missing"
552+
end
553+
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
554+
return "invalid system literal"
555+
end
556+
"garbage after system literal"
557+
else
558+
"garbage after public ID literal"
559+
end
560+
elsif accept_external_id and @source.match(/#{system}/um)
561+
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
562+
return "system literal is missing"
563+
end
564+
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
565+
return "invalid system literal"
566+
end
567+
"garbage after system literal"
568+
else
569+
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
570+
return "invalid ID type"
571+
end
572+
"ID type is missing"
573+
end
574+
end
575+
476576
def process_instruction
477577
match_data = @source.match(INSTRUCTION_PATTERN, true)
478578
unless match_data

lib/rexml/rexml.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
module REXML
2525
COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
2626
DATE = "2008/019"
27-
VERSION = "3.1.9"
27+
VERSION = "3.1.9.1"
2828
REVISION = ""
2929

3030
Copyright = COPYRIGHT

0 commit comments

Comments
 (0)