#! /usr/bin/env ruby def readfasta( fp, name, seq ) nseq = 0 tmpseq = "" while fp.gets if $_ =~ /^>/ then name.push( $_.sub(/>/,"").strip ) seq.push( tmpseq ) if nseq > 0 nseq += 1 tmpseq = "" else tmpseq += $_.strip end end seq.push( tmpseq ) return nseq end tname = [] tseq = [] infp = File.open( ARGV[0], "r" ) tin = readfasta( infp, tname, tseq ) infp.close ref = tseq[0] puts "# posinaln posinref aainref changes" puts "# changes < 0 means the site is gap in ref" len = ref.length for i in 1..(tin-1) # EXCLUDE FIRST SEQUENCE (human) for j in 0..(len-1) if tseq[i][j..j] == '-' then tseq[i][j..j] = 'X' else break end end for j in 0..(len-1) p = len-1-j if tseq[i][p..p] == '-' then tseq[i][p..p] = 'X' else break end end end #for i in 0..(tin-1) # puts ">" # puts tseq[i] #end # #exit posinref = 0 for posinaln in 0..(len-1) column = [] for i in 1..(tin-1) # EXCLUDE FIRST SEQUENCE column.push( tseq[i][posinaln] ) unless tseq[i][posinaln..posinaln] == 'X' end column.uniq! # p column changes = column.length changes *= -1 if ref[posinaln..posinaln] == '-' # p ref[posinaln..posinaln] puts (posinaln+1).to_s + "\t" + (posinref+1).to_s + "\t" + ref[posinaln..posinaln] + "\t" + (changes).to_s + "->" + column.join() posinref += 1 if ref[posinaln..posinaln] != '-' end