require 'uri'
require 'stringio'
require 'time'
module Kernel
private
alias open_uri_original_open open
def open(name, *rest, &block) if name.respond_to?(:open)
name.open(*rest, &block)
elsif name.respond_to?(:to_str) &&
%r{\A[A-Za-z][A-Za-z0-9+\-\.]*://} =~ name &&
(uri = URI.parse(name)).respond_to?(:open)
uri.open(*rest, &block)
else
open_uri_original_open(name, *rest, &block)
end
end
module_function :open
end
module OpenURI
Options = {
:proxy => true,
:progress_proc => true,
:content_length_proc => true,
:http_basic_authentication => true,
}
def OpenURI.check_options(options) options.each {|k, v|
next unless Symbol === k
unless Options.include? k
raise ArgumentError, "unrecognized option: #{k}"
end
}
end
def OpenURI.scan_open_optional_arguments(*rest) if !rest.empty? && (String === rest.first || Integer === rest.first)
mode = rest.shift
if !rest.empty? && Integer === rest.first
perm = rest.shift
end
end
return mode, perm, rest
end
def OpenURI.open_uri(name, *rest) uri = URI::Generic === name ? name : URI.parse(name)
mode, perm, rest = OpenURI.scan_open_optional_arguments(*rest)
options = rest.shift if !rest.empty? && Hash === rest.first
raise ArgumentError.new("extra arguments") if !rest.empty?
options ||= {}
OpenURI.check_options(options)
unless mode == nil ||
mode == 'r' || mode == 'rb' ||
mode == File::RDONLY
raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)")
end
io = open_loop(uri, options)
if block_given?
begin
yield io
ensure
io.close
end
else
io
end
end
def OpenURI.open_loop(uri, options) case opt_proxy = options.fetch(:proxy, true)
when true
find_proxy = lambda {|u| u.find_proxy}
when nil, false
find_proxy = lambda {|u| nil}
when String
opt_proxy = URI.parse(opt_proxy)
find_proxy = lambda {|u| opt_proxy}
when URI::Generic
find_proxy = lambda {|u| opt_proxy}
else
raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
end
uri_set = {}
buf = nil
while true
redirect = catch(:open_uri_redirect) {
buf = Buffer.new
uri.buffer_open(buf, find_proxy.call(uri), options)
nil
}
if redirect
if redirect.relative?
redirect = uri + redirect
end
unless OpenURI.redirectable?(uri, redirect)
raise "redirection forbidden: #{uri} -> #{redirect}"
end
if options.include? :http_basic_authentication
options = options.dup
options.delete :http_basic_authentication
end
uri = redirect
raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
uri_set[uri.to_s] = true
else
break
end
end
io = buf.io
io.base_uri = uri
io
end
def OpenURI.redirectable?(uri1, uri2) uri1.scheme.downcase == uri2.scheme.downcase ||
(/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:http|ftp)\z/i =~ uri2.scheme)
end
def OpenURI.open_http(buf, target, proxy, options) if proxy
raise "Non-HTTP proxy URI: #{proxy}" if proxy.class != URI::HTTP
end
if target.userinfo && "1.9.0" <= RUBY_VERSION
raise ArgumentError, "userinfo not supported. [RFC3986]"
end
require 'net/http'
klass = Net::HTTP
if URI::HTTP === target
if proxy
klass = Net::HTTP::Proxy(proxy.host, proxy.port)
end
target_host = target.host
target_port = target.port
request_uri = target.request_uri
else
target_host = proxy.host
target_port = proxy.port
request_uri = target.to_s
end
http = klass.new(target_host, target_port)
if target.class == URI::HTTPS
require 'net/https'
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
store = OpenSSL::X509::Store.new
store.set_default_paths
http.cert_store = store
end
header = {}
options.each {|k, v| header[k] = v if String === k }
resp = nil
http.start {
req = Net::HTTP::Get.new(request_uri, header)
if options.include? :http_basic_authentication
user, pass = options[:http_basic_authentication]
req.basic_auth user, pass
end
http.request(req) {|response|
resp = response
if options[:content_length_proc] && Net::HTTPSuccess === resp
if resp.key?('Content-Length')
options[:content_length_proc].call(resp['Content-Length'].to_i)
else
options[:content_length_proc].call(nil)
end
end
resp.read_body {|str|
buf << str
if options[:progress_proc] && Net::HTTPSuccess === resp
options[:progress_proc].call(buf.size)
end
}
}
}
io = buf.io
io.rewind
io.status = [resp.code, resp.message]
resp.each {|name,value| buf.io.meta_add_field name, value }
case resp
when Net::HTTPSuccess
when Net::HTTPMovedPermanently, Net::HTTPFound, Net::HTTPSeeOther, Net::HTTPTemporaryRedirect throw :open_uri_redirect, URI.parse(resp['location'])
else
raise OpenURI::HTTPError.new(io.status.join(' '), io)
end
end
class HTTPError < StandardError
def initialize(message, io)
super(message)
@io = io
end
attr_reader :io
end
class Buffer def initialize
@io = StringIO.new
@size = 0
end
attr_reader :size
StringMax = 10240
def <<(str)
@io << str
@size += str.length
if StringIO === @io && StringMax < @size
require 'tempfile'
io = Tempfile.new('open-uri')
io.binmode
Meta.init io, @io if @io.respond_to? :meta
io << @io.string
@io = io
end
end
def io
Meta.init @io unless @io.respond_to? :meta
@io
end
end
module Meta
def Meta.init(obj, src=nil) obj.extend Meta
obj.instance_eval {
@base_uri = nil
@meta = {}
}
if src
obj.status = src.status
obj.base_uri = src.base_uri
src.meta.each {|name, value|
obj.meta_add_field(name, value)
}
end
end
attr_accessor :status
attr_accessor :base_uri
attr_reader :meta
def meta_add_field(name, value) @meta[name.downcase] = value
end
def last_modified
if v = @meta['last-modified']
Time.httpdate(v)
else
nil
end
end
RE_LWS = /[\r\n\t ]+/n
RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n
RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
def content_type_parse # :nodoc:
v = @meta['content-type']
if v && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ v
type = $1.downcase
subtype = $2.downcase
parameters = []
$3.scan(/;#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?=#{RE_LWS}?(?:(#{RE_TOKEN})|(#{RE_QUOTED_STRING}))/no) {|att, val, qval|
val = qval.gsub(/[\r\n\t ! parameters << [att.downcase, val]
}
["#{type}/#{subtype}", *parameters]
else
nil
end
end
def content_type
type, *parameters = content_type_parse
type || 'application/octet-stream'
end
def charset
type, *parameters = content_type_parse
if pair = parameters.assoc('charset')
pair.last.downcase
elsif block_given?
yield
elsif type && %r{\Atext/} =~ type &&
@base_uri && /\Ahttp\z/i =~ @base_uri.scheme
"iso-8859-1" else
nil
end
end
def content_encoding
v = @meta['content-encoding']
if v && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ v
v.scan(RE_TOKEN).map {|content_coding| content_coding.downcase}
else
[]
end
end
end
module OpenRead
def open(*rest, &block)
OpenURI.open_uri(self, *rest, &block)
end
def read(options={})
self.open(options) {|f|
str = f.read
Meta.init str, f
str
}
end
end
end
module URI
class Generic
def find_proxy
name = self.scheme.downcase + '_proxy'
proxy_uri = nil
if name == 'http_proxy' && ENV.include?('REQUEST_METHOD') pairs = ENV.reject {|k, v| /\Ahttp_proxy\z/i !~ k }
case pairs.length
when 0 proxy_uri = nil
when 1
k, v = pairs.shift
if k == 'http_proxy' && ENV[k.upcase] == nil
proxy_uri = ENV[name]
else
proxy_uri = nil
end
else proxy_uri = ENV.to_hash[name]
end
if !proxy_uri
proxy_uri = ENV["CGI_#{name.upcase}"]
end
elsif name == 'http_proxy'
unless proxy_uri = ENV[name]
if proxy_uri = ENV[name.upcase]
warn 'The environment variable HTTP_PROXY is discouraged. Use http_proxy.'
end
end
else
proxy_uri = ENV[name] || ENV[name.upcase]
end
if proxy_uri && self.host
require 'socket'
begin
addr = IPSocket.getaddress(self.host)
proxy_uri = nil if /\A127\.|\A::1\z/ =~ addr
rescue SocketError
end
end
if proxy_uri
proxy_uri = URI.parse(proxy_uri)
name = 'no_proxy'
if no_proxy = ENV[name] || ENV[name.upcase]
no_proxy.scan(/([^:,]*)(?::(\d+))?/) {|host, port|
if /(\A|\.)#{Regexp.quote host}\z/i =~ self.host &&
(!port || self.port == port.to_i)
proxy_uri = nil
break
end
}
end
proxy_uri
else
nil
end
end
end
class HTTP
def buffer_open(buf, proxy, options) OpenURI.open_http(buf, self, proxy, options)
end
include OpenURI::OpenRead
end
class FTP
def buffer_open(buf, proxy, options) if proxy
OpenURI.open_http(buf, self, proxy, options)
return
end
require 'net/ftp'
directories = self.path.split(%r{/}, -1)
directories.shift if directories[0] == '' directories.each {|d|
d.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/) { [$1].pack("H2") }
}
unless filename = directories.pop
raise ArgumentError, "no filename: #{self.inspect}"
end
directories.each {|d|
if /[\r\n]/ =~ d
raise ArgumentError, "invalid directory: #{d.inspect}"
end
}
if /[\r\n]/ =~ filename
raise ArgumentError, "invalid filename: #{filename.inspect}"
end
typecode = self.typecode
if typecode && /\A[aid]\z/ !~ typecode
raise ArgumentError, "invalid typecode: #{typecode.inspect}"
end
ftp = Net::FTP.open(self.host)
user = 'anonymous'
passwd = nil
user, passwd = self.userinfo.split(/:/) if self.userinfo
ftp.login(user, passwd)
directories.each {|cwd|
ftp.voidcmd("CWD #{cwd}")
}
if typecode
ftp.voidcmd("TYPE #{typecode.upcase}")
end
if options[:content_length_proc]
options[:content_length_proc].call(ftp.size(filename))
end
ftp.retrbinary("RETR #{filename}", 4096) { |str|
buf << str
options[:progress_proc].call(buf.size) if options[:progress_proc]
}
ftp.close
buf.io.rewind
end
include OpenURI::OpenRead
end
end