xpathparser.rb   [plain text]


require 'rexml/namespace'
require 'rexml/xmltokens'

module REXML
	module Parsers
		# You don't want to use this class.  Really.  Use XPath, which is a wrapper
		# for this class.  Believe me.  You don't want to poke around in here.
		# There is strange, dark magic at work in this code.  Beware.  Go back!  Go
		# back while you still can!
		class XPathParser
			include XMLTokens
			LITERAL		= /^'([^']*)'|^"([^"]*)"/u

			def namespaces=( namespaces )
				Functions::namespace_context = namespaces
				@namespaces = namespaces
			end

			def parse path
				path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
				path.gsub!( /\s+([\]\)])/, '\1' )
				parsed = []
				path = LocationPath(path, parsed)
				parsed
			end

			def predicate path
				parsed = []
				Predicate( "[#{path}]", parsed )
				parsed
			end

			def to_string( path )
				string = ""
				while path.size > 0
					case path[0]
					when :ancestor, :ancestor_or_self, :attribute, :child, :descendant, :descendant_or_self, :following, :following_sibling, :namespace, :parent, :preceding, :preceding_sibling, :self
						op = path.shift
						string << "/" unless string.size == 0
						string << op.to_s
						string << "::"
					when :any
						path.shift
						string << "*"
					when :qname
						path.shift
						prefix = path.shift
						name = path.shift
						string << prefix+":" if prefix.size > 0
						string << name
					when :predicate
						path.shift
						string << '['
						string << predicate_to_string( path.shift )
						string << ' ]'
					else
						string << "/" unless string.size == 0
						string << "UNKNOWN("
						string << path.shift.inspect
						string << ")"
					end
				end
				return string
			end

			def predicate_to_string( path )
				string = ""
				case path[0]
				when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :neq, :union
					op = path.shift
					left = predicate_to_string( path.shift )
					right = predicate_to_string( path.shift )
					string << " "
					string << left
					string << " "
					string << op.to_s
					string << " "
					string << right
					string << " "
				when :function
					path.shift
					name = path.shift
					string << name
					string << "( "
					string << predicate_to_string( path.shift )
					string << " )"
				when :literal
					path.shift
					string << " "
					string << path.shift.inspect
					string << " "
				else
					string << " "
					string << to_string( path )
					string << " "
				end
				return string.squeeze(" ")
			end

			private
			#LocationPath
			#	| RelativeLocationPath
			#	| '/' RelativeLocationPath?
			#	| '//' RelativeLocationPath
			def LocationPath path, parsed
        #puts "LocationPath '#{path}'"
        path = path.strip
        if path[0] == ?/
          parsed << :document
					if path[1] == ?/
						parsed << :descendant_or_self
						parsed << :node
						path = path[2..-1]
					else
						path = path[1..-1]
					end
        end
        #puts parsed.inspect
				return RelativeLocationPath( path, parsed ) if path.size > 0
			end

			#RelativeLocationPath
			#	|																										Step
			#		| (AXIS_NAME '::' | '@' | '') 										AxisSpecifier
			#			NodeTest
			#				Predicate
			#		| '.' | '..'																			AbbreviatedStep
			#	|	RelativeLocationPath '/' Step
			#	| RelativeLocationPath '//' Step
			AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
			def RelativeLocationPath path, parsed
				#puts "RelativeLocationPath #{path}"
				while path.size > 0
					# (axis or @ or <child::>) nodetest predicate	>
					# OR																					>	/ Step
					# (. or ..)																		>
					if path[0] == ?.
						if path[1] == ?.
							parsed << :parent
							parsed << :node
							path = path[2..-1]
						else
							parsed << :self
							parsed << :node
							path = path[1..-1]
						end
					else
						if path[0] == ?@
							#puts "ATTRIBUTE"
							parsed << :attribute
							path = path[1..-1]
							# Goto Nodetest
						elsif path =~ AXIS
							parsed << $1.tr('-','_').intern
							path = $'
							# Goto Nodetest
						else
							parsed << :child
						end

						#puts "NODETESTING '#{path}'"
						n = []
						path = NodeTest( path, n)
						#puts "NODETEST RETURNED '#{path}'"

						if path[0] == ?[
							path = Predicate( path, n )
						end

						parsed.concat(n)
					end
					
					if path.size > 0
						if path[0] == ?/
							if path[1] == ?/
								parsed << :descendant_or_self
								parsed << :node
								path = path[2..-1]
							else
								path = path[1..-1]
							end
						else
							return path
						end
					end
				end
				return path
			end

			# Returns a 1-1 map of the nodeset
			# The contents of the resulting array are either:
			# 	true/false, if a positive match
			# 	String, if a name match
			#NodeTest
			#	| ('*' | NCNAME ':' '*' | QNAME)								NameTest
			#	| NODE_TYPE '(' ')'															NodeType
			#	| PI '(' LITERAL ')'														PI
			#		| '[' expr ']'																Predicate
			NCNAMETEST= /^(#{NCNAME_STR}):\*/u
			QNAME 		= Namespace::NAMESPLIT
			NODE_TYPE	= /^(comment|text|node)\(\s*\)/m
			PI				= /^processing-instruction\(/
			def NodeTest path, parsed
        #puts "NodeTest with #{path}"
				res = nil
				case path
				when /^\*/
					path = $'
					parsed << :any
				when NODE_TYPE
					type = $1
					path = $'
					parsed << type.tr('-', '_').intern
				when PI
					path = $'
					literal = nil
					if path !~ /^\s*\)/
						path =~ LITERAL
						literal = $1
						path = $'
						raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
						path = path[1..-1]
					end
					parsed << :processing_instruction
					parsed << literal
				when NCNAMETEST
					#puts "NCNAMETEST"
					prefix = $1
					path = $'
					parsed << :namespace
					parsed << prefix
				when QNAME
					#puts "QNAME"
					prefix = $1
					name = $2
					path = $'
					prefix = "" unless prefix
					parsed << :qname
					parsed << prefix
					parsed << name
				end
				return path
			end

			# Filters the supplied nodeset on the predicate(s)
			def Predicate path, parsed
        #puts "PREDICATE with #{path}"
				return nil unless path[0] == ?[
				predicates = []
				while path[0] == ?[
					path, expr = get_group(path)
					predicates << expr[1..-2] if expr
				end
        #puts "PREDICATES = #{predicates.inspect}"
				predicates.each{ |expr| 
					#puts "ORING #{expr}"
					preds = []
					parsed << :predicate
					parsed << preds
					OrExpr(expr, preds) 
				}
        #puts "PREDICATES = #{predicates.inspect}"
				path
			end

			# The following return arrays of true/false, a 1-1 mapping of the
			# supplied nodeset, except for axe(), which returns a filtered
			# nodeset

			#| OrExpr S 'or' S AndExpr
			#| AndExpr
			def OrExpr path, parsed
				#puts "OR >>> #{path}"
				n = []
				rest = AndExpr( path, n )
				#puts "OR <<< #{rest}"
				if rest != path
					while rest =~ /^\s*( or )/
						n = [ :or, n, [] ]
						rest = AndExpr( $', n[-1] )
					end
				end
				if parsed.size == 0 and n.size != 0
					parsed.replace(n)
				elsif n.size > 0
					parsed << n
				end
				rest
			end

			#| AndExpr S 'and' S EqualityExpr
			#| EqualityExpr
			def AndExpr path, parsed
				#puts "AND >>> #{path}"
				n = []
				rest = EqualityExpr( path, n )
				#puts "AND <<< #{rest}"
				if rest != path
					while rest =~ /^\s*( and )/
						n = [ :and, n, [] ]
						#puts "AND >>> #{rest}"
						rest = EqualityExpr( $', n[-1] )
						#puts "AND <<< #{rest}"
					end
				end
				if parsed.size == 0 and n.size != 0
					parsed.replace(n)
				elsif n.size > 0
					parsed << n
				end
				rest
			end

			#| EqualityExpr ('=' | '!=')  RelationalExpr
			#| RelationalExpr
			def EqualityExpr path, parsed
				#puts "EQUALITY >>> #{path}"
				n = []
				rest = RelationalExpr( path, n )
				#puts "EQUALITY <<< #{rest}"
				if rest != path
					while rest =~ /^\s*(!?=)\s*/
						if $1[0] == ?!
							n = [ :neq, n, [] ]
						else
							n = [ :eq, n, [] ]
						end
						rest = RelationalExpr( $', n[-1] )
					end
				end
				if parsed.size == 0 and n.size != 0
					parsed.replace(n)
				elsif n.size > 0
					parsed << n
				end
				rest
			end

			#| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
			#| AdditiveExpr
			def RelationalExpr path, parsed
				#puts "RELATION >>> #{path}"
				n = []
				rest = AdditiveExpr( path, n )
				#puts "RELATION <<< #{rest}"
				if rest != path
					while rest =~ /^\s*([<>]=?)\s*/
						if $1[0] == ?<
							sym = "lt"
						else
							sym = "gt"
						end
						sym << "eq" if $1[-1] == ?=
						n = [ sym.intern, n, [] ]
						rest = AdditiveExpr( $', n[-1] )
					end
				end
				if parsed.size == 0 and n.size != 0
					parsed.replace(n)
				elsif n.size > 0
					parsed << n
				end
				rest
			end

			#| AdditiveExpr ('+' | S '-') MultiplicativeExpr
			#| MultiplicativeExpr
			def AdditiveExpr path, parsed
				#puts "ADDITIVE >>> #{path}"
				n = []
				rest = MultiplicativeExpr( path, n )
				#puts "ADDITIVE <<< #{rest}"
				if rest != path
					while rest =~ /^\s*(\+| -)\s*/
						if $1[0] == ?+
							n = [ :plus, n, [] ]
						else
							n = [ :minus, n, [] ]
						end
						rest = MultiplicativeExpr( $', n[-1] )
					end
				end
				if parsed.size == 0 and n.size != 0
					parsed.replace(n)
				elsif n.size > 0
					parsed << n
				end
				rest
			end

			#| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
			#| UnaryExpr
			def MultiplicativeExpr path, parsed
				#puts "MULT >>> #{path}"
				n = []
				rest = UnaryExpr( path, n )
				#puts "MULT <<< #{rest}"
				if rest != path
					while rest =~ /^\s*(\*| div | mod )\s*/
						if $1[0] == ?*
							n = [ :mult, n, [] ]
						elsif $1.include?( "div" )
							n = [ :div, n, [] ]
						else
							n = [ :mod, n, [] ]
						end
						rest = UnaryExpr( $', n[-1] )
					end
				end
				if parsed.size == 0 and n.size != 0
					parsed.replace(n)
				elsif n.size > 0
					parsed << n
				end
				rest
			end

			#| '-' UnaryExpr
			#| UnionExpr
			def UnaryExpr path, parsed
				path =~ /^(\-*)/
				path = $'
				if $1 and (($1.size % 2) != 0)
					mult = -1
				else
					mult = 1
				end
				parsed << :neg if mult < 0

				#puts "UNARY >>> #{path}"
				n = []
				path = UnionExpr( path, n )
				#puts "UNARY <<< #{path}"
				parsed.concat( n )
				path
			end

			#| UnionExpr '|' PathExpr
			#| PathExpr
			def UnionExpr path, parsed
				#puts "UNION >>> #{path}"
				n = []
				rest = PathExpr( path, n )
				#puts "UNION <<< #{rest}"
				if rest != path
					while rest =~ /^\s*(\|)\s*/
						n = [ :union, n, [] ]
						rest = PathExpr( $', n[-1] )
					end
				end
				if parsed.size == 0 and n.size != 0
					parsed.replace( n )
				elsif n.size > 0
					parsed << n
				end
				rest
			end

			#| LocationPath
			#| FilterExpr ('/' | '//') RelativeLocationPath
			def PathExpr path, parsed
        path =~ /^\s*/
        path = $'
				#puts "PATH >>> #{path}"
				n = []
				rest = FilterExpr( path, n )
				#puts "PATH <<< '#{rest}'"
				if rest != path
					if rest and rest[0] == ?/
						return RelativeLocationPath(rest, n)
					end
				end
				#puts "BEFORE WITH '#{rest}'"
				rest = LocationPath(rest, n) if rest =~ /^[\/\.\@\[\w_*]/
				parsed.concat(n)
				return rest
			end

			#| FilterExpr Predicate
			#| PrimaryExpr
			def FilterExpr path, parsed
				#puts "FILTER >>> #{path}"
				n = []
				path = PrimaryExpr( path, n )
				#puts "FILTER <<< #{path}"
				path = Predicate(path, n) if path and path[0] == ?[
				#puts "FILTER <<< #{path}"
				parsed.concat(n)
				path
			end

			#| VARIABLE_REFERENCE
			#| '(' expr ')'
			#| LITERAL
			#| NUMBER
			#| FunctionCall
			VARIABLE_REFERENCE	= /^\$(#{NAME_STR})/u
			NUMBER							= /^(\d*\.?\d+)/
			NT				= /^comment|text|processing-instruction|node$/
			def PrimaryExpr path, parsed
				arry = []
				case path
				when VARIABLE_REFERENCE
					varname = $1
					path = $'
					parsed << :variable
					parsed << varname
					#arry << @variables[ varname ]
				when /^(\w[-\w]*)(?:\()/
					fname = $1
					path = $'
					return nil if fname =~ NT
					parsed << :function
					parsed << fname
					path = FunctionCall(path, parsed)
				when LITERAL, NUMBER
					#puts "LITERAL or NUMBER: #$1"
					varname = $1.nil? ? $2 : $1
					path = $'
					parsed << :literal 
					parsed << varname
				when /^\(/ 																							#/
					path, contents = get_group(path)
					contents = contents[1..-2]
					n = []
					OrExpr( contents, n )
					parsed.concat(n)
				end
				path
			end

			#| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
			def FunctionCall rest, parsed
				path, arguments = parse_args(rest)
        argset = []
        for argument in arguments
          args = []
          OrExpr( argument, args )
          argset << args
        end
				parsed << argset
				path
			end

			# get_group( '[foo]bar' ) -> ['bar', '[foo]']
			def get_group string
				ind = 0
				depth = 0
				st = string[0,1]
				en = (st == "(" ? ")" : "]")
				begin
					case string[ind,1]
					when st
						depth += 1
					when en
						depth -= 1
					end
					ind += 1
				end while depth > 0 and ind < string.length
				return nil unless depth==0
				[string[ind..-1], string[0..ind-1]]
			end
			
			def parse_args( string )
				arguments = []
				ind = 0
				depth = 1
				begin
					case string[ind]
					when ?(
						depth += 1
						if depth == 1
							string = string[1..-1]
							ind -= 1
						end
					when ?)
						depth -= 1
						if depth == 0
							s = string[0,ind].strip
							arguments << s unless s == ""
							string = string[ind+1..-1]
						end
					when ?,
						if depth == 1
							s = string[0,ind].strip
							arguments << s unless s == ""
							string = string[ind+1..-1]
							ind = 0
						end
					end
					ind += 1
				end while depth > 0 and ind < string.length
				return nil unless depth==0
				[string,arguments]
			end
		end
	end
end