require 'rexml/encoding'
module REXML
class SourceFactory
def SourceFactory::create_from(arg)
if arg.respond_to? :read and
arg.respond_to? :readline and
arg.respond_to? :nil? and
arg.respond_to? :eof?
IOSource.new(arg)
elsif arg.respond_to? :to_str
require 'stringio'
IOSource.new(StringIO.new(arg))
elsif arg.kind_of? Source
arg
else
raise "#{arg.class} is not a valid input stream. It must walk \n"+
"like either a String, an IO, or a Source."
end
end
end
class Source
include Encoding
attr_reader :buffer
attr_reader :line
attr_reader :encoding
def initialize(arg, encoding=nil)
@orig = @buffer = arg
if encoding
self.encoding = encoding
else
detect_encoding
end
@line = 0
end
def encoding=(enc)
return unless super
encoding_updated
end
def scan(pattern, cons=false)
return nil if @buffer.nil?
rv = @buffer.scan(pattern)
@buffer = $' if cons and rv.size>0
rv
end
def read
end
def consume( pattern )
@buffer = $' if pattern.match( @buffer )
end
def match_to( char, pattern )
return pattern.match(@buffer)
end
def match_to_consume( char, pattern )
md = pattern.match(@buffer)
@buffer = $'
return md
end
def match(pattern, cons=false)
md = pattern.match(@buffer)
@buffer = $' if cons and md
return md
end
def empty?
@buffer == ""
end
def position
@orig.index( @buffer )
end
def current_line
lines = @orig.split
res = lines.grep @buffer[0..30]
res = res[-1] if res.kind_of? Array
lines.index( res ) if res
end
private
def detect_encoding
buffer_encoding = @buffer.encoding
detected_encoding = "UTF-8"
begin
@buffer.force_encoding("ASCII-8BIT")
if @buffer[0, 2] == "\xfe\xff"
@buffer[0, 2] = ""
detected_encoding = "UTF-16BE"
elsif @buffer[0, 2] == "\xff\xfe"
@buffer[0, 2] = ""
detected_encoding = "UTF-16LE"
elsif @buffer[0, 3] == "\xef\xbb\xbf"
@buffer[0, 3] = ""
detected_encoding = "UTF-8"
end
ensure
@buffer.force_encoding(buffer_encoding)
end
self.encoding = detected_encoding
end
def encoding_updated
if @encoding != 'UTF-8'
@buffer = decode(@buffer)
@to_utf = true
else
@to_utf = false
@buffer.force_encoding ::Encoding::UTF_8
end
end
end
class IOSource < Source
def initialize(arg, block_size=500, encoding=nil)
@er_source = @source = arg
@to_utf = false
@pending_buffer = nil
if encoding
super("", encoding)
else
super(@source.read(3) || "")
end
if !@to_utf and
@buffer.respond_to?(:force_encoding) and
@source.respond_to?(:external_encoding) and
@source.external_encoding != ::Encoding::UTF_8
@force_utf8 = true
else
@force_utf8 = false
end
end
def scan(pattern, cons=false)
rv = super
if rv.size == 0
until @buffer =~ pattern or @source.nil?
begin
@buffer << readline
rescue Iconv::IllegalSequence
raise
rescue
@source = nil
end
end
rv = super
end
rv.taint
rv
end
def read
begin
@buffer << readline
rescue Exception, NameError
@source = nil
end
end
def consume( pattern )
match( pattern, true )
end
def match( pattern, cons=false )
rv = pattern.match(@buffer)
@buffer = $' if cons and rv
while !rv and @source
begin
@buffer << readline
rv = pattern.match(@buffer)
@buffer = $' if cons and rv
rescue
@source = nil
end
end
rv.taint
rv
end
def empty?
super and ( @source.nil? || @source.eof? )
end
def position
@er_source.pos rescue 0
end
def current_line
begin
pos = @er_source.pos lineno = @er_source.lineno @er_source.rewind
line = 0 begin
while @er_source.pos < pos
@er_source.readline
line += 1
end
rescue
end
rescue IOError
pos = -1
line = -1
end
[pos, lineno, line]
end
private
def readline
str = @source.readline(@line_break)
if @pending_buffer
if str.nil?
str = @pending_buffer
else
str = @pending_buffer + str
end
@pending_buffer = nil
end
return nil if str.nil?
if @to_utf
decode(str)
else
str.force_encoding(::Encoding::UTF_8) if @force_utf8
str
end
end
def encoding_updated
case @encoding
when "UTF-16BE", "UTF-16LE"
@source.binmode
@source.set_encoding(@encoding)
end
@line_break = encode(">")
@pending_buffer, @buffer = @buffer, ""
@pending_buffer.force_encoding(@encoding)
super
end
end
end