prettyprint.rb   [plain text]


# $Id$

# This class implements a pretty printing algorithm. It finds line breaks and
# nice indentations for grouped structure.
# 
# By default, the class assumes that primitive elements are strings and each
# byte in the strings have single column in width. But it can be used for
# other situations by giving suitable arguments for some methods:
# * newline object and space generation block for PrettyPrint.new
# * optional width argument for PrettyPrint#text
# * PrettyPrint#breakable
#
# There are several candidate uses:
# * text formatting using proportional fonts
# * multibyte characters which has columns different to number of bytes
# * non-string formatting
#
# == Bugs
# * Box based formatting?
# * Other (better) model/algorithm?
# 
# == References
# Christian Lindig, Strictly Pretty, March 2000,
# http://www.st.cs.uni-sb.de/~lindig/papers/#pretty
# 
# Philip Wadler, A prettier printer, March 1998,
# http://homepages.inf.ed.ac.uk/wadler/topics/language-design.html#prettier
# 
# == Author
# Tanaka Akira <akr@m17n.org>
# 
class PrettyPrint

  # This is a convenience method which is same as follows:
  # 
  #   begin
  #     q = PrettyPrint.new(output, maxwidth, newline, &genspace)
  #     ...
  #     q.flush
  #     output
  #   end
  # 
  def PrettyPrint.format(output='', maxwidth=79, newline="\n", genspace=lambda {|n| ' ' * n})
    q = PrettyPrint.new(output, maxwidth, newline, &genspace)
    yield q
    q.flush
    output
  end

  # This is similar to PrettyPrint::format but the result has no breaks.
  #
  # +maxwidth+, +newline+ and +genspace+ are ignored.
  #
  # The invocation of +breakable+ in the block doesn't break a line and is
  # treated as just an invocation of +text+.
  #
  def PrettyPrint.singleline_format(output='', maxwidth=nil, newline=nil, genspace=nil)
    q = SingleLine.new(output)
    yield q
    output
  end

  # Creates a buffer for pretty printing.
  #
  # +output+ is an output target. If it is not specified, '' is assumed. It
  # should have a << method which accepts the first argument +obj+ of
  # PrettyPrint#text, the first argument +sep+ of PrettyPrint#breakable, the
  # first argument +newline+ of PrettyPrint.new, and the result of a given
  # block for PrettyPrint.new.
  #
  # +maxwidth+ specifies maximum line length. If it is not specified, 79 is
  # assumed. However actual outputs may overflow +maxwidth+ if long
  # non-breakable texts are provided.
  #
  # +newline+ is used for line breaks. "\n" is used if it is not specified.
  #
  # The block is used to generate spaces. {|width| ' ' * width} is used if it
  # is not given.
  #
  def initialize(output='', maxwidth=79, newline="\n", &genspace)
    @output = output
    @maxwidth = maxwidth
    @newline = newline
    @genspace = genspace || lambda {|n| ' ' * n}

    @output_width = 0
    @buffer_width = 0
    @buffer = []

    root_group = Group.new(0)
    @group_stack = [root_group]
    @group_queue = GroupQueue.new(root_group)
    @indent = 0
  end
  attr_reader :output, :maxwidth, :newline, :genspace
  attr_reader :indent, :group_queue

  def current_group
    @group_stack.last
  end

  # first? is a predicate to test the call is a first call to first? with
  # current group.
  #
  # It is useful to format comma separated values as:
  #
  #   q.group(1, '[', ']') {
  #     xxx.each {|yyy|
  #       unless q.first?
  #         q.text ','
  #         q.breakable
  #       end
  #       ... pretty printing yyy ...
  #     }
  #   }
  #
  # first? is obsoleted in 1.8.2.
  #
  def first?
    warn "PrettyPrint#first? is obsoleted at 1.8.2."
    current_group.first?
  end

  def break_outmost_groups
    while @maxwidth < @output_width + @buffer_width
      return unless group = @group_queue.deq
      until group.breakables.empty?
        data = @buffer.shift
        @output_width = data.output(@output, @output_width)
        @buffer_width -= data.width
      end
      while !@buffer.empty? && Text === @buffer.first
        text = @buffer.shift
        @output_width = text.output(@output, @output_width)
        @buffer_width -= text.width
      end
    end
  end

  # This adds +obj+ as a text of +width+ columns in width.
  #
  # If +width+ is not specified, obj.length is used.
  #
  def text(obj, width=obj.length)
    if @buffer.empty?
      @output << obj
      @output_width += width
    else
      text = @buffer.last
      unless Text === text
        text = Text.new
        @buffer << text
      end
      text.add(obj, width)
      @buffer_width += width
      break_outmost_groups
    end
  end

  def fill_breakable(sep=' ', width=sep.length)
    group { breakable sep, width }
  end

  # This tells "you can break a line here if necessary", and a +width+\-column
  # text +sep+ is inserted if a line is not broken at the point.
  #
  # If +sep+ is not specified, " " is used.
  #
  # If +width+ is not specified, +sep.length+ is used. You will have to
  # specify this when +sep+ is a multibyte character, for example.
  #
  def breakable(sep=' ', width=sep.length)
    group = @group_stack.last
    if group.break?
      flush
      @output << @newline
      @output << @genspace.call(@indent)
      @output_width = @indent
      @buffer_width = 0
    else
      @buffer << Breakable.new(sep, width, self)
      @buffer_width += width
      break_outmost_groups
    end
  end

  # Groups line break hints added in the block. The line break hints are all
  # to be used or not.
  #
  # If +indent+ is specified, the method call is regarded as nested by
  # nest(indent) { ... }.
  #
  # If +open_obj+ is specified, <tt>text open_obj, open_width</tt> is called
  # before grouping. If +close_obj+ is specified, <tt>text close_obj,
  # close_width</tt> is called after grouping.
  #
  def group(indent=0, open_obj='', close_obj='', open_width=open_obj.length, close_width=close_obj.length)
    text open_obj, open_width
    group_sub {
      nest(indent) {
        yield
      }
    }
    text close_obj, close_width
  end

  def group_sub
    group = Group.new(@group_stack.last.depth + 1)
    @group_stack.push group
    @group_queue.enq group
    begin
      yield
    ensure
      @group_stack.pop
      if group.breakables.empty?
        @group_queue.delete group
      end
    end
  end

  # Increases left margin after newline with +indent+ for line breaks added in
  # the block.
  #
  def nest(indent)
    @indent += indent
    begin
      yield
    ensure
      @indent -= indent
    end
  end

  # outputs buffered data.
  #
  def flush
    @buffer.each {|data|
      @output_width = data.output(@output, @output_width)
    }
    @buffer.clear
    @buffer_width = 0
  end

  class Text
    def initialize
      @objs = []
      @width = 0
    end
    attr_reader :width

    def output(out, output_width)
      @objs.each {|obj| out << obj}
      output_width + @width
    end

    def add(obj, width)
      @objs << obj
      @width += width
    end
  end

  class Breakable
    def initialize(sep, width, q)
      @obj = sep
      @width = width
      @pp = q
      @indent = q.indent
      @group = q.current_group
      @group.breakables.push self
    end
    attr_reader :obj, :width, :indent

    def output(out, output_width)
      @group.breakables.shift
      if @group.break?
        out << @pp.newline
        out << @pp.genspace.call(@indent)
        @indent
      else
        @pp.group_queue.delete @group if @group.breakables.empty?
        out << @obj
        output_width + @width
      end
    end
  end

  class Group
    def initialize(depth)
      @depth = depth
      @breakables = []
      @break = false
    end
    attr_reader :depth, :breakables

    def break
      @break = true
    end

    def break?
      @break
    end

    def first?
      if defined? @first
        false
      else
        @first = false
        true
      end
    end
  end

  class GroupQueue
    def initialize(*groups)
      @queue = []
      groups.each {|g| enq g}
    end

    def enq(group)
      depth = group.depth
      @queue << [] until depth < @queue.length
      @queue[depth] << group
    end

    def deq
      @queue.each {|gs|
        (gs.length-1).downto(0) {|i|
          unless gs[i].breakables.empty?
            group = gs.slice!(i, 1).first
            group.break
            return group
          end
        }
        gs.each {|group| group.break}
        gs.clear
      }
      return nil
    end

    def delete(group)
      @queue[group.depth].delete(group)
    end
  end

  class SingleLine
    def initialize(output, maxwidth=nil, newline=nil)
      @output = output
      @first = [true]
    end

    def text(obj, width=nil)
      @output << obj
    end

    def breakable(sep=' ', width=nil)
      @output << sep
    end

    def nest(indent)
      yield
    end

    def group(indent=nil, open_obj='', close_obj='', open_width=nil, close_width=nil)
      @first.push true
      @output << open_obj
      yield
      @output << close_obj
      @first.pop
    end

    def flush
    end

    def first?
      result = @first[-1]
      @first[-1] = false
      result
    end
  end
end

if __FILE__ == $0
  require 'test/unit'

  class WadlerExample < Test::Unit::TestCase # :nodoc:
    def setup
      @tree = Tree.new("aaaa", Tree.new("bbbbb", Tree.new("ccc"),
                                                 Tree.new("dd")),
                               Tree.new("eee"),
                               Tree.new("ffff", Tree.new("gg"),
                                                Tree.new("hhh"),
                                                Tree.new("ii")))
    end

    def hello(width)
      PrettyPrint.format('', width) {|hello|
        hello.group {
          hello.group {
            hello.group {
              hello.group {
                hello.text 'hello'
                hello.breakable; hello.text 'a'
              }
              hello.breakable; hello.text 'b'
            }
            hello.breakable; hello.text 'c'
          }
          hello.breakable; hello.text 'd'
        }
      }
    end

    def test_hello_00_06
      expected = <<'End'.chomp
hello
a
b
c
d
End
      assert_equal(expected, hello(0))
      assert_equal(expected, hello(6))
    end

    def test_hello_07_08
      expected = <<'End'.chomp
hello a
b
c
d
End
      assert_equal(expected, hello(7))
      assert_equal(expected, hello(8))
    end

    def test_hello_09_10
      expected = <<'End'.chomp
hello a b
c
d
End
      out = hello(9); assert_equal(expected, out)
      out = hello(10); assert_equal(expected, out)
    end

    def test_hello_11_12
      expected = <<'End'.chomp
hello a b c
d
End
      assert_equal(expected, hello(11))
      assert_equal(expected, hello(12))
    end

    def test_hello_13
      expected = <<'End'.chomp
hello a b c d
End
      assert_equal(expected, hello(13))
    end

    def tree(width)
      PrettyPrint.format('', width) {|q| @tree.show(q)}
    end

    def test_tree_00_19
      expected = <<'End'.chomp
aaaa[bbbbb[ccc,
           dd],
     eee,
     ffff[gg,
          hhh,
          ii]]
End
      assert_equal(expected, tree(0))
      assert_equal(expected, tree(19))
    end

    def test_tree_20_22
      expected = <<'End'.chomp
aaaa[bbbbb[ccc, dd],
     eee,
     ffff[gg,
          hhh,
          ii]]
End
      assert_equal(expected, tree(20))
      assert_equal(expected, tree(22))
    end

    def test_tree_23_43
      expected = <<'End'.chomp
aaaa[bbbbb[ccc, dd],
     eee,
     ffff[gg, hhh, ii]]
End
      assert_equal(expected, tree(23))
      assert_equal(expected, tree(43))
    end

    def test_tree_44
      assert_equal(<<'End'.chomp, tree(44))
aaaa[bbbbb[ccc, dd], eee, ffff[gg, hhh, ii]]
End
    end

    def tree_alt(width)
      PrettyPrint.format('', width) {|q| @tree.altshow(q)}
    end

    def test_tree_alt_00_18
      expected = <<'End'.chomp
aaaa[
  bbbbb[
    ccc,
    dd
  ],
  eee,
  ffff[
    gg,
    hhh,
    ii
  ]
]
End
      assert_equal(expected, tree_alt(0))
      assert_equal(expected, tree_alt(18))
    end

    def test_tree_alt_19_20
      expected = <<'End'.chomp
aaaa[
  bbbbb[ ccc, dd ],
  eee,
  ffff[
    gg,
    hhh,
    ii
  ]
]
End
      assert_equal(expected, tree_alt(19))
      assert_equal(expected, tree_alt(20))
    end

    def test_tree_alt_20_49
      expected = <<'End'.chomp
aaaa[
  bbbbb[ ccc, dd ],
  eee,
  ffff[ gg, hhh, ii ]
]
End
      assert_equal(expected, tree_alt(21))
      assert_equal(expected, tree_alt(49))
    end

    def test_tree_alt_50
      expected = <<'End'.chomp
aaaa[ bbbbb[ ccc, dd ], eee, ffff[ gg, hhh, ii ] ]
End
      assert_equal(expected, tree_alt(50))
    end

    class Tree # :nodoc:
      def initialize(string, *children)
        @string = string
        @children = children
      end

      def show(q)
        q.group {
          q.text @string
          q.nest(@string.length) {
            unless @children.empty?
              q.text '['
              q.nest(1) {
                first = true
                @children.each {|t|
                  if first
                    first = false
                  else
                    q.text ','
                    q.breakable
                  end
                  t.show(q)
                }
              }
              q.text ']'
            end
          }
        }
      end

      def altshow(q)
        q.group {
          q.text @string
          unless @children.empty?
            q.text '['
            q.nest(2) {
              q.breakable
              first = true
              @children.each {|t|
                if first
                  first = false
                else
                  q.text ','
                  q.breakable
                end
                t.altshow(q)
              }
            }
            q.breakable
            q.text ']'
          end
        }
      end

    end
  end

  class StrictPrettyExample < Test::Unit::TestCase # :nodoc:
    def prog(width)
      PrettyPrint.format('', width) {|q|
        q.group {
          q.group {q.nest(2) {
                       q.text "if"; q.breakable;
                       q.group {
                         q.nest(2) {
                           q.group {q.text "a"; q.breakable; q.text "=="}
                           q.breakable; q.text "b"}}}}
          q.breakable
          q.group {q.nest(2) {
                       q.text "then"; q.breakable;
                       q.group {
                         q.nest(2) {
                           q.group {q.text "a"; q.breakable; q.text "<<"}
                           q.breakable; q.text "2"}}}}
          q.breakable
          q.group {q.nest(2) {
                       q.text "else"; q.breakable;
                       q.group {
                         q.nest(2) {
                           q.group {q.text "a"; q.breakable; q.text "+"}
                           q.breakable; q.text "b"}}}}}
      }
    end

    def test_00_04
      expected = <<'End'.chomp
if
  a
    ==
    b
then
  a
    <<
    2
else
  a
    +
    b
End
      assert_equal(expected, prog(0))
      assert_equal(expected, prog(4))
    end

    def test_05
      expected = <<'End'.chomp
if
  a
    ==
    b
then
  a
    <<
    2
else
  a +
    b
End
      assert_equal(expected, prog(5))
    end

    def test_06
      expected = <<'End'.chomp
if
  a ==
    b
then
  a <<
    2
else
  a +
    b
End
      assert_equal(expected, prog(6))
    end

    def test_07
      expected = <<'End'.chomp
if
  a ==
    b
then
  a <<
    2
else
  a + b
End
      assert_equal(expected, prog(7))
    end

    def test_08
      expected = <<'End'.chomp
if
  a == b
then
  a << 2
else
  a + b
End
      assert_equal(expected, prog(8))
    end

    def test_09
      expected = <<'End'.chomp
if a == b
then
  a << 2
else
  a + b
End
      assert_equal(expected, prog(9))
    end

    def test_10
      expected = <<'End'.chomp
if a == b
then
  a << 2
else a + b
End
      assert_equal(expected, prog(10))
    end

    def test_11_31
      expected = <<'End'.chomp
if a == b
then a << 2
else a + b
End
      assert_equal(expected, prog(11))
      assert_equal(expected, prog(15))
      assert_equal(expected, prog(31))
    end

    def test_32
      expected = <<'End'.chomp
if a == b then a << 2 else a + b
End
      assert_equal(expected, prog(32))
    end

  end

  class TailGroup < Test::Unit::TestCase # :nodoc:
    def test_1
      out = PrettyPrint.format('', 10) {|q|
        q.group {
          q.group {
            q.text "abc"
            q.breakable
            q.text "def"
          }
          q.group {
            q.text "ghi"
            q.breakable
            q.text "jkl"
          }
        }
      }
      assert_equal("abc defghi\njkl", out)
    end
  end

  class NonString < Test::Unit::TestCase # :nodoc:
    def format(width)
      PrettyPrint.format([], width, 'newline', lambda {|n| "#{n} spaces"}) {|q|
        q.text(3, 3)
        q.breakable(1, 1)
        q.text(3, 3)
      }
    end

    def test_6
      assert_equal([3, "newline", "0 spaces", 3], format(6))
    end

    def test_7
      assert_equal([3, 1, 3], format(7))
    end

  end

  class Fill < Test::Unit::TestCase # :nodoc:
    def format(width)
      PrettyPrint.format('', width) {|q|
        q.group {
          q.text 'abc'
          q.fill_breakable
          q.text 'def'
          q.fill_breakable
          q.text 'ghi'
          q.fill_breakable
          q.text 'jkl'
          q.fill_breakable
          q.text 'mno'
          q.fill_breakable
          q.text 'pqr'
          q.fill_breakable
          q.text 'stu'
        }
      }
    end

    def test_00_06
      expected = <<'End'.chomp
abc
def
ghi
jkl
mno
pqr
stu
End
      assert_equal(expected, format(0))
      assert_equal(expected, format(6))
    end

    def test_07_10
      expected = <<'End'.chomp
abc def
ghi jkl
mno pqr
stu
End
      assert_equal(expected, format(7))
      assert_equal(expected, format(10))
    end

    def test_11_14
      expected = <<'End'.chomp
abc def ghi
jkl mno pqr
stu
End
      assert_equal(expected, format(11))
      assert_equal(expected, format(14))
    end

    def test_15_18
      expected = <<'End'.chomp
abc def ghi jkl
mno pqr stu
End
      assert_equal(expected, format(15))
      assert_equal(expected, format(18))
    end

    def test_19_22
      expected = <<'End'.chomp
abc def ghi jkl mno
pqr stu
End
      assert_equal(expected, format(19))
      assert_equal(expected, format(22))
    end

    def test_23_26
      expected = <<'End'.chomp
abc def ghi jkl mno pqr
stu
End
      assert_equal(expected, format(23))
      assert_equal(expected, format(26))
    end

    def test_27
      expected = <<'End'.chomp
abc def ghi jkl mno pqr stu
End
      assert_equal(expected, format(27))
    end

  end
end