#!/usr/bin/perl -w =pod =head1 NAME merge_hits.pl =head1 SYNOPSIS merge_hits.pl file1 file2 file3 ... merge_hits.pl gstt1_drome_hits.bp62 gstt1_drome_hits.vt80 gstt1_drome_hits.vt20 =head1 OPTIONS -h short help --help include description --expect show E()-value --all show all scores (default) --bits show bits --f_id show fraction identical --a_len show alignment length =head1 DESCRIPTION Ctakes a set of results files produced by the C or C(bp_blastp.pl> scripts, and merges the results in these files, so that they can be compared. The C<*_hits.*> are expected to contain lines containing an accession, E()-value, bit score, fraction identical, and alignment length, ending with a description: #Hits: ACC E() bits f_id a_len descr NP_000552 2e-145 409.0 1.000 218 glutathione S-transferase Mu 1 isoform 1 [Homo sapiens] Results from each of the files are merged using the unique accession (and ordered according to the results in the first file). =head1 AUTHOR William R. Pearson, wrp@virginia.edu =cut use strict; use Pod::Usage; use Getopt::Long; use vars qw($file $fd %results @file_list $matrix @hit_list $shelp $help); use vars qw($expect $a_len $f_id $bits $all); ($expect,$a_len,$f_id, $bits, $all) = ( 0, 0, 0, 0, 0); pod2usage(1) unless @ARGV; GetOptions("h|?" => \$shelp, "help" => \$help, "all" => \$all, "expect" => \$expect, "bits" => \$bits, "a_len" => \$a_len, "f_id" => \$f_id, ); pod2usage(1) if $shelp; pod2usage(exitstatus => 0, verbose => 2) if $help; my @res_fields = (); if ($all) { @res_fields = qw( expect bits f_id a_len ); } elsif ($expect || $bits || $f_id || $a_len) { if ($expect) { push @res_fields, "expect";} if ($bits) { push @res_fields, "bits";} if ($f_id) { push @res_fields, "f_id";} if ($a_len) { push @res_fields, "a_len";} } else { @res_fields = qw( expect bits f_id a_len ); } for (my $i=0; $i < @ARGV; $i++) { $file = $ARGV[$i]; # open the file unless (open($fd,$file)) { warn "Cannot open $file\n"; next; } # extract the suffix ($matrix) = ($file =~ m/\w+\.(\w+)$/); push @file_list, $matrix; while (my $line = <$fd>) { # skip over the comments next if ($line =~ /^#/); chomp($line); # read in the accessions, other info my @data = split(/\t/,$line); my $acc = $data[0]; if ($i == 0) { push @hit_list, $acc;} my %values = (); @values{qw(expect bits f_id a_len)} = @data[1..4]; unless ($results{$acc}) { $results{$acc} = {descr => $data[-1]}; } $results{$acc}{$matrix} = \%values; } } #print Dumper(%results); my $tab_fill = "\t" x scalar(@res_fields); print "#\t\t".join($tab_fill,@file_list) . "\n"; for my $acc ( @hit_list) { print $acc; for $matrix ( @file_list ) { if ($results{$acc}{$matrix}) { my $result_p = $results{$acc}{$matrix}; print "\t",join("\t",@$result_p{@res_fields}); } else { print $tab_fill; } } print "\t".$results{$acc}{descr} . "\n"; } __END__