#!/usr/bin/perl -w =pod =head1 NAME summ_blasttab_hits.pl =head1 SYNOPSIS summ_blasttab_hits.pl file1 file2 file3 ... summ_blasttab_hits.pl gstt1_drome_hits.bp62 gstt1_drome_hits.vt80 gstt1_drome_hits.vt20 =head1 OPTIONS -h short help --help include description --expect show E()-value -t transpose =head1 DESCRIPTION =head1 AUTHOR William R. Pearson, wrp@virginia.edu =cut # parses: blastp -outfmt=7 text: # # # Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # # 10 hits found # sp|P10649|GSTM1_MOUSE gi|121719|sp|P08010|GSTM2_RAT 77.06 218 50 0 1 218 1 218 4e-133 376 # sp|P10649|GSTM1_MOUSE gi|121746|sp|P09211|GSTP1_HUMAN 30.81 211 133 3 2 208 3 204 3e-29 109 # sp|P10649|GSTM1_MOUSE gi|121749|sp|P04906|GSTP1_RAT 30.81 211 133 3 2 208 3 204 2e-27 104 # sp|P10649|GSTM1_MOUSE gi|62822551|sp|P00502|GSTA1_RAT 27.35 223 144 7 4 218 6 218 4e-12 61.6 # ################ # # for each query in a set of searches, report (for several different results files) # (1) the best hit (expect, percid, a_len) # (2) the number of hits better than expect # # for all the queries in the files use strict; use Pod::Usage; use Getopt::Long; use vars qw($file $fd @file_list $shelp $help); use vars qw($expect $a_len $f_id $bits $all $trans); ($expect,$a_len,$f_id, $bits, $all, $trans) = ( 1e-3, 0, 0, 0, 0, 0); pod2usage(1) unless @ARGV; GetOptions("h|?" => \$shelp, "help" => \$help, "all" => \$all, "expect:f" => \$expect, "t" => \$trans, "bits" => \$bits, "a_len" => \$a_len, "f_id" => \$f_id, ); pod2usage(1) if $shelp; pod2usage(exitstatus => 0, verbose => 2) if $help; my @b_fields = qw(q_acc l_acc percid a_len miss gap q_start q_end s_start s_end evalue bits); my @res_fields = (); if ($all) { push @res_fields, qw( evalue bits percid a_len ); } elsif ($expect || $bits || $f_id || $a_len) { if ($expect) { push @res_fields, "evalue"; } if ($bits) { push @res_fields, "bits"; } if ($f_id) { push @res_fields, "percid"; } if ($a_len) { push @res_fields, "a_len"; } } else { @res_fields = qw( evalue bits percid a_len ); } my %f_results = (); my @query_list = (); for (my $i=0; $i < @ARGV; $i++) { $file = $ARGV[$i]; # open the file unless (open($fd,$file)) { warn "Cannot open $file\n"; next; } # extract the suffix (no longer) # my ($f_name) = ($file =~ m/(\w+)\.?.*$/); my $f_name = $file; push @file_list, $f_name; my $q_acc = ""; my $sig_hits = 0; my ($max_evalue, $min_percid); my $hit_summary_r = 0; my %results = (); while (my $line = <$fd>) { # skip over the comments if ($line =~ /^#/) { if ($line =~ /^# Query: (\S+)\s/) { my $new_q_acc = $1; if ($i == 0) {push @query_list, $new_q_acc;} if ($hit_summary_r) { $hit_summary_r->{hits} = $sig_hits; $hit_summary_r->{max_evalue} = $max_evalue; $hit_summary_r->{min_percid} = $min_percid; $results{$q_acc} = $hit_summary_r; } $sig_hits = 0; $q_acc = $new_q_acc; $hit_summary_r = {q_acc => $q_acc}; } next; } chomp($line); # read in the accessions, other info my %data = (); @data{@b_fields} = split(/\t/,$line); $data{l_acc} =~ s/^gi\|\d+\|//; next unless $data{evalue} <= $expect; if ($sig_hits == 0) { @{$hit_summary_r}{qw(mx_l_acc percid evalue a_len bits)} = @data{qw(l_acc percid evalue a_len bits)}; } $sig_hits++; $max_evalue = $data{evalue}; $min_percid = $data{percid}; } if ($hit_summary_r) { $hit_summary_r->{hits} = $sig_hits; $hit_summary_r->{max_evalue} = $max_evalue; $hit_summary_r->{min_percid} = $min_percid; $results{$q_acc} = $hit_summary_r; } $f_results{$f_name} = \%results; } my $tab_fill = "\t" x 7 ; if ($trans) { print "#\t\t\t".join($tab_fill,@query_list) . "\n"; for my $f_name ( @file_list ) { for my $q_acc ( @query_list ) { print "$f_name\t"; my $f_result_p = $f_results{$f_name}{$q_acc}; if ($f_result_p->{hits}) { print join("\t",@{$f_result_p}{qw(mx_l_acc evalue percid hits max_evalue)}),"\t"; } else { print $tab_fill; } } print "\n"; } } else { print "#\t\t\t".join($tab_fill,@file_list) . "\n"; for my $q_acc ( @query_list ) { print "$q_acc\t"; for my $f_name ( @file_list ) { my $f_result_p = $f_results{$f_name}{$q_acc}; if ($f_result_p->{hits}) { print join("\t",@{$f_result_p}{qw(mx_l_acc evalue percid hits max_evalue min_percid)}),"\t"; } else { print $tab_fill; } } print "\n"; } } __END__