Ext package
This package contains modules which are C Language extensions. Some of these are for interfacing with a C library. All of these use the Perl XS system to link to C code.
Contents |
The latest code
The latest, and not completely tested, code can be viewed here:
http://code.open-bio.org/svnweb/index.cgi/bioperl/browse/bioperl-ext/trunk
You can download it by following the instructions on the main Subversion page. The repository name is bioperl-ext.
SeqIO modules linked to Staden
Some of the modules include Bio::SeqIO::staden::read for linking to the Staden IO library ( io_lib) for reading sequence tracefiles like ABI sequence format, SCF sequence format, and ZTR sequence format. Note : Use io_lib v. 1.8.11 or 1.8.12. Newer versions do not seem to work, likely due to API changes.
Alignment modules
The Bio::Ext::Align modules were written by Ewan Birney to interface with his Wise library. Most notably there is a Smith-Waterman implementation. You can execute it with the following code.
use Bio::Ext::Align;
use Bio::Seq;
use Bio::AlignIO;
use strict;
&Bio::Ext::Align::change_max_BaseMatrix_kbytes(20000);
my $cm = &Bio::Ext::Align::CompMat::read_Blast_file_CompMat("blosum62.bla");
my $seq1 = &Bio::Ext::Align::new_Sequence_from_strings("one","WLGQRNLVSSTGGNLLNVWLKDW");
my $seq2 = &Bio::Ext::Align::new_Sequence_from_strings("two","WMGNRNVVNLLNVWFRDW");
my $aln = &Bio::Ext::Align::Align_Sequences_ProteinSmithWaterman($seq1,$seq2,
$cm,-12,-2);
$alnout = new Bio::AlignIO(-format => 'clustalw');
# print it out in Wise alignment format
&Bio::Ext::Align::write_pretty_str_align($alb,$seq1->name,
$seq1->seq,$seq2->name,
$seq2->seq,15,50,STDERR)
# or make a Bioperl alignment object and write that out
$out = Bio::SimpleAlign->new();
$out->add_seq(Bio::LocatableSeq->new(-seq => $aln->aln1,
-start => $aln->start1,
-end => $aln->end1,
-id => "one"));
$out->add_seq(Bio::LocatableSeq->new(-seq => $aln->aln2,
-start => $aln->start2,
-end => $aln->end2,
-id => "two"));
$alnout->write_aln($out);
HMM modules
The package also contains modules for implementing HMM algorithms. The author is Yee Man Chan. Here is some example code for testing out the modules.
use Bio::Matrix::Scoring;
use Bio::Tools::HMM;
$hmm = new Bio::Tools::HMM('-symbols' => "123456", '-states' => "FL");
$seq1 = "315116246446644245311321631164152133625144543631656626566666";
$obs1 = "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFLLLLLLLLLLLLLLL";
$seq1 .= "651166453132651245636664631636663162326455236266666625151631";
$obs1 .= "LLLLLLFFFFFFFFFFFFLLLLLLLLLLLLLLLLFFFLLLLLLLLLLLLLLFFFFFFFFF";
$seq1 .= "222555441666566563564324364131513465146353411126414626253356";
$obs1 .= "FFFFFFFFLLLLLLLLLLLLLFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF";
$seq1 .= "366163666466232534413661661163252562462255265252265435353336";
$obs1 .= "LLLLLLLLFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF";
$seq1 .= "233121625364414432335163243633665562466662632666612355245242";
$obs1 .= "FFFFFFFFFFFFFFFFFFFFFFFFFFFLLLLLLLLLLLLLLLLLLLLLLFFFFFFFFFFF";
$seq2 = "544552213525245666363632432522253566166546666666533666543261";
$obs2 = "FFFFFFFFFFFFLLLLLLLLLLLFFFFFFFFFFFFLLLLLLLLLLLLLLLLLLLFFFFFF";
$seq2 .= "363546253252546524422555242223224344432423341365415551632161";
$obs2 .= "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF";
$seq2 .= "144212242323456563652263346116214136666156616666566421456123";
$obs2 .= "FFFFFFLLLFFFFFFFFFFFFFFFFFFFFFFFFLFLLLLLLLLLLLLLLLLFFFFFFFFF";
$seq2 .= "346313546514332164351242356166641344615135266642261112465663";
$obs2 .= "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF";
@seqs = ($seq1, $seq2);
printf "Baum-Welch Training\n";
printf "===================\n";
$hmm->baum_welch_training(\@seqs);
printf "Initial Probability Array:\n";
$init = $hmm->init_prob;
foreach $s (@{$init}) {
printf "%g\t", $s;
}
printf "\n";
printf "Transition Probability Matrix:\n";
$matrix = $hmm->transition_prob;
foreach $r ($matrix->row_names) {
foreach $c ($matrix->column_names) {
printf "%g\t", $matrix->entry($r, $c);
}
printf "\n";
}
printf "Emission Probability Matrix:\n";
$matrix = $hmm->emission_prob;
foreach $r ($matrix->row_names) {
foreach $c ($matrix->column_names) {
printf "%g\t", $matrix->entry($r, $c);
}
printf "\n";
}
printf "\n";
printf "Log Probability of sequence 1: %g\n", $hmm->likelihood($seq1);
printf "Log Probability of sequence 2: %g\n", $hmm->likelihood($seq2);
printf "\n";
printf "Statistical Training\n";
printf "====================\n";
@obs = ($obs1, $obs2);
$hmm->statistical_training(\@seqs, \@obs);
printf "Initial Probability Array:\n";
$init = $hmm->init_prob;
$hmm->init_prob($init);
foreach $s (@{$init}) {
printf "%g\t", $s;
}
printf "\n";
printf "Transition Probability Matrix:\n";
$matrix = $hmm->transition_prob;
$hmm->transition_prob($matrix);
foreach $r ($matrix->row_names) {
foreach $c ($matrix->column_names) {
printf "%g\t", $matrix->entry($r, $c);
}
printf "\n";
}
printf "Emission Probability Matrix:\n";
$matrix = $hmm->emission_prob;
$hmm->emission_prob($matrix);
foreach $r ($matrix->row_names) {
foreach $c ($matrix->column_names) {
printf "%g\t", $matrix->entry($r, $c);
}
printf "\n";
}
printf "Vitebi Algorithm:\n";
$obs3 = $hmm->viterbi($seq1);
printf "%s\n", $obs3;