so_reverse_complement.yml   [plain text]


prelude: |
  bm_so_fasta = <<'EOS'
  # The Computer Language Shootout
  # http://shootout.alioth.debian.org/
  # Contributed by Sokolov Yura

  $last = 42.0
  def gen_random(max, im=139968, ia=3877, ic=29573)
      (max * ($last = ($last * ia + ic) % im)) / im
  end

  alu =
     "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"+
     "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"+
     "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"+
     "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"+
     "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"+
     "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"+
     "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"

  iub = [
      ["a", 0.27],
      ["c", 0.12],
      ["g", 0.12],
      ["t", 0.27],

      ["B", 0.02],
      ["D", 0.02],
      ["H", 0.02],
      ["K", 0.02],
      ["M", 0.02],
      ["N", 0.02],
      ["R", 0.02],
      ["S", 0.02],
      ["V", 0.02],
      ["W", 0.02],
      ["Y", 0.02],
  ]
  homosapiens = [
      ["a", 0.3029549426680],
      ["c", 0.1979883004921],
      ["g", 0.1975473066391],
      ["t", 0.3015094502008],
  ]

  def make_repeat_fasta(id, desc, src, n)
      puts ">#{id} #{desc}"
      v = nil
      width = 60
      l = src.length
      s = src * ((n / l) + 1)
      s.slice!(n, l)
      puts(s.scan(/.{1,#{width}}/).join("\n"))
  end

  def make_random_fasta(id, desc, table, n)
      puts ">#{id} #{desc}"
      rand, v = nil,nil
      width = 60
      chunk = 1 * width
      prob = 0.0
      table.each{|v| v[1]= (prob += v[1])}
      for i in 1..(n/width)
          puts((1..width).collect{
              rand = gen_random(1.0)
              table.find{|v| v[1]>rand}[0]
          }.join)
      end
      if n%width != 0
          puts((1..(n%width)).collect{
              rand = gen_random(1.0)
              table.find{|v| v[1]>rand}[0]
          }.join)
      end
  end


  n = (ARGV[0] or 250_000).to_i

  make_repeat_fasta('ONE', 'Homo sapiens alu', alu, n*2)
  make_random_fasta('TWO', 'IUB ambiguity codes', iub, n*3)
  make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, n*5)
  EOS
benchmark:
  - name: so_reverse_complement
    prelude: |
      script = File.join(File.dirname($0), 'bm_so_fasta.rb')
      File.write(script, bm_so_fasta)

      def prepare_fasta_output n
        filebase = File.join(File.dirname($0), 'fasta.output')
        script = File.join(File.dirname($0), 'bm_so_fasta.rb')
        file = "#{filebase}.#{n}"

        unless FileTest.exist?(file)
          STDERR.puts "preparing #{file}"

          open(file, 'w'){|f|
            ARGV[0] = n
            $stdout = f
            load script
            $stdout = STDOUT
          }
        end
      end
      prepare_fasta_output(2_500_000)
    script: |
      # The Great Computer Language Shootout
      # http://shootout.alioth.debian.org/
      #
      # Contributed by Peter Bjarke Olsen
      # Modified by Doug King

      seq=Array.new

      def revcomp(seq)
        seq.reverse!.tr!('wsatugcyrkmbdhvnATUGCYRKMBDHVN','WSTAACGRYMKVHDBNTAACGRYMKVHDBN')
        stringlen=seq.length
        0.step(stringlen-1,60) {|x| print seq.slice(x,60) , "\n"}
      end

      input = open(File.join(File.dirname($0), 'fasta.output.2500000'), 'rb')

      while input.gets
        if $_ =~ />/
          if seq.length != 0
            revcomp(seq.join)
            seq=Array.new
          end
          puts $_
        else
          $_.sub(/\n/,'')
          seq.push $_
        end
      end
      revcomp(seq.join)
    loop_count: 1