#!/usr/bin/perl -w # $Id: modules.pl 10084 2006-07-04 22:23:29Z mauricio $ # =head1 NAME modules.pl - information about modules in BioPerl core =head1 SYNOPSIS B [B<-V|--verbose>] [B<-c|--count>] | [B<-l|--list>] | [B<-u|--untested>] | [B<-i|--info> class] | [B<-i|--inherit> | [B<-d|--dir> path ] | [B<-v|--version> | [B<-?|-h|--help>] =head1 DESCRIPTION This script counts, lists and provides other information about bioperl modules. It is mainly meant to be run by bioperl maintainers. The default action is to count modules in the bioperl core distribution. Based on the class name it tries to classify them into categories. The following is a tentative glossary of terms used. =over 4 =item Base Synonyms: Generic class, parameterized class, generic module. A class that you don't instantiate in your scripts, but that it's a template for other classes. Examples: Bio::Tools::Run::WrapperBase - a base object for wrappers around executables. Bio::Tools::Analysis::SimpleAnalysisBase - an abstract superclass for SimpleAnalysis implementations This are counted with C; They have "Base" in the beginning or end of the name. =item Interface Synonyms: protocol, feature set. Class that defines a set of features that are common to a group of classes. Example: Bio::Tree::NodeI - interface describing a Tree Node. This are counted with C; They have "I" at the end of the name. =item Component A class that implements a small subset of their superclass. They are in a directory with an identical name of the superclass. There are plenty of them. You need only a small number of methods to be overridden. Example: Bio::SeqIO::fasta. This is counted with C; Classes are inside their base directory and all in lowercase. =item Instance The rest of them. It is sometimes helpful to divide them into two types: =over 2 =item Algorithmic classes Example: Bio::AlignIO - Handler for AlignIO formats =item Storage classes Example: Bio::SimpleAlign - Multiple alignments held as a set of sequences =back =back =cut # # The helper class to store class status; # package BioClass; sub new { my $class = shift; my $name = shift; die unless $name; my $self = {}; $self->{'name'} = $name; $self->{'tested'} = 0; $self->{'type'} = ''; $self->{'path'} = ''; bless $self, $class; } sub name { my $self = shift; return $self->{'name'}; } sub tested { my $self = shift; my $value = shift; $self->{'tested'} = 1 if defined $value && $value; return $self->{'tested'} || 0; } sub type { my $self = shift; my $value = shift; $self->{'type'} = $value if defined $value; return $self->{'type'}; } sub path { my $self = shift; my $value = shift; $self->{'path'} = $value if defined $value; return $self->{'path'}; } sub add_superclass { my $self = shift; my $superclass = shift; return unless $superclass; $self->{'superclasses'}->{$superclass} = 1 ; } sub each_superclass { my $self = shift; return keys %{$self->{'superclasses'}}; } sub add_used_class { my $self = shift; my $used_class = shift; return unless $used_class; $self->{'used_classes'}->{$used_class} = 1 ; } sub each_used_class { my $self = shift; return keys %{$self->{'used_classes'}}; } package main; use File::Find; use Getopt::Long; use Data::Dumper; use strict; # declare subroutines sub dir; sub modules; sub count; sub list_all; sub untested; sub info; sub inherit; sub synopsis; sub version; # command line options my ($dir, $count,$list, $verbose,$info,$untested, $inherit, $synopsis, $version); GetOptions( 'dir:s' => \$dir, 'count' => \$count, 'list' => \$list, 'test_BioClass' => \&_test_BioClass, 'V|verbose' => \$verbose, 'untested' => \$untested, 'info:s' => \$info, 'inherit' => \$inherit, 'synopsis' => \$synopsis, 'version' => \$version, 'h|help|?' => sub{ exec('perldoc',$0); exit(0) } ); our %MODULES; # storage structure # find modules my $pwd = $ENV{PWD}; my $seachdir = "$pwd/../Bio"; #default my %FIND_OPTIONS = ( wanted => \&modules ); $seachdir = "$pwd/$dir" if $dir; find \%FIND_OPTIONS, $seachdir; # call subroutines if ($list) { list_all } elsif ($untested) { untested } elsif ($info) { info($info) } elsif ($inherit) { inherit } elsif ($synopsis) { synopsis } elsif ($version) { version } else { count } ################# end main #################### # # subroutines; # sub _test_BioClass { $a = new BioClass('Bio::Test'); print "Class name: ", $a->name(), "\n"; $a->add_superclass('Bio::Super'); $a->add_superclass('Bio::Super2'); $a->tested(1); $a->type('instance'); print Dumper [$a->each_superclass] if $a->tested; print Dumper $a; exit; } sub modules { return unless /\.pm$/ ; #return unless -e $_; #print "file: $_\n" if $verbose; open (F, $_) or warn "can't open file $_: $!" && return; my $class; while () { if (/^package\s+([\w:]+)\s*;/) { #print $1, "\n" if $verbose; $_ = $1; $class = new BioClass($_); $MODULES{$_} = $class; if (/.*:[a-z]/) { $class->type('component'); } elsif (/:Base/ | /Base$/) { $class->type('base'); } elsif (/[^A-Z]I$/) { $class->type('interface'); } else { $class->type('instance'); } $class->path($File::Find::name); } if (/^\w*use/ && /(Bio[\w:]+)\W*;/ && not /base/) { next unless $class; #print "\t$1\n" if $verbose; $class->add_used_class($1); } if ((/\@ISA/ || /use base/) && /Bio/) { next unless $class; my $line = $_; while ( $line =~ /(Bio[\w:]+)/g) { #print "\t$1\n" if $verbose; $class->add_superclass($1); } } if (/\@ISA/ && /Bio/) { next unless $class; my $line = $_; while ( $line =~ /(Bio[\w:]+)/g) { #print "\t$1\n" if $verbose; $class->add_superclass($1); } } } close F; } =head1 OPTIONS Only one option is processed on each run of the script. The --verbose is an exception, it modifies the amount of output. =over 4 =item B<-V | --verbose> B Set this option if you want to see more verbose output. Often that will mean seeing warnings normally going into STDERR. =cut =item B<-d | --dir> path Overides the default directories to check by one directory 'path' and all its subdirectories. =item B<-c | --count> The default action if no other option is given. Gives the count of modules broken to B ("usable"), B ( (abstract)? superclass) , B (the "I" files) and B (used from instantiable parent) modules, in addition to total number of modules. Note that abstract superclass in bioperl is not an enforced concept and they are not clearly indicateded in the class name. =cut sub count { printf "Instance : %3d\n", scalar (grep $MODULES{$_}->type =~ /instance/ , keys %MODULES); printf "Base : %3d\n", scalar (grep $MODULES{$_}->type =~ /base/ , keys %MODULES); printf "Interface: %3d\n", scalar (grep $MODULES{$_}->type =~ /interface/ , keys %MODULES); printf "Component: %3d\n", scalar (grep $MODULES{$_}->type =~ /component/ , keys %MODULES); print "--------------\n"; printf "Total : %3d\n", scalar (keys %MODULES); } =item B<-l | --list> Prints all the module names in alphabetical order. The output is a tab separated list of category (see above) and module name per line. The output can be processed with standard UNIX command line tools. =cut sub list_all { foreach ( sort keys %MODULES) { print $MODULES{$_}->type. "\t$_\n"; } } =item B<-u | --untested> Prints a list of instance modules which are I explicitly used by test files in the directory. Superclasess or any classes used by others are not reported, either, since their methods are assumed to be tested by subclass tests. =cut sub _used_and_super { my $name = shift; # print "-:$name\n" if /Locati/; foreach ($MODULES{$name}->each_superclass) { next unless defined $MODULES{$_}; # print "-^$_\n" if /Locati/; # unless (defined $MODULES{$_} or $MODULES{$_}->tested) { if (not $MODULES{$_}->tested) { $MODULES{$_}->tested(1); _used_and_super($_); } } foreach ($MODULES{$name}->each_used_class) { next unless defined $MODULES{$_}; # print "--$_\n" if /Locati/; # unless (defined $MODULES{$_} or $MODULES{$_}->tested) { if (not $MODULES{$_}->tested) { $MODULES{$_}->tested(1); _used_and_super($_); } # $MODULES{$_}->tested(1) && _used_and_super($_) # unless defined $MODULES{$_} or $MODULES{$_}->tested; } return 1; } sub untested { foreach (`find ../t -name "*.t" -print | xargs grep -hs "[ur][se][eq]"`) { s/.*use +//; s/.*require +//; next unless /^Bio/; s/[\W;]+$//; s/([\w:]+).*/$1/; my $name = $_; next unless $MODULES{$_}; $MODULES{$_}->tested(1) unless defined $MODULES{$_} and $MODULES{$_}->tested; next if $MODULES{$name}->name eq "Bio::SeqIO::abi"; # exception: requires bioperl ext package next if $MODULES{$name}->name eq "Bio::SeqIO::ctf"; # exception: requires bioperl ext package next if $MODULES{$name}->name eq "Bio::SeqIO::exp"; # exception: requires bioperl ext package next if $MODULES{$name}->name eq "Bio::SeqIO::pln"; # exception: requires bioperl ext package next if $MODULES{$name}->name eq "Bio::SeqIO::ztr"; # exception: requires bioperl ext package # print $MODULES{$name}->name, "\n"; # print Dumper $MODULES{$name}; _used_and_super($name); } foreach ( sort keys %MODULES) { # skip some name spaces next if /^Bio::Search/; # Bio::Search and Bio::SearchIO are extensively tested # but classes are used by attribute naming print "$_\n" if $MODULES{$_}->type eq 'instance' and ($MODULES{$_}->tested == 0) ; } } =item B<-i | --info> class Dumps information about a class given as an argument. =cut sub info { my $class = shift; die "" unless $class; #print Dumper $MODULES{$class}; my $c = $MODULES{$class}; print $c->name, "\n"; printf " Type:\n\t%s\n", $c->type; print " Superclasses:\n"; foreach (sort $c->each_superclass) { print "\t$_\n"; } print " Used classes:\n"; foreach (sort $c->each_used_class) { print "\t$_\n"; } } =item B<-i | --inherit> Finds interface modules which inherit from an instantiable class. Could be extended to check other bad inheritance patterns. =cut sub inherit { foreach ( sort keys %MODULES) { my $c=$MODULES{$_}; next unless $c->type =~ /interface/; foreach my $super ($c->each_superclass) { next if $super =~ /I$/; print "Check this inheritance: ", $c->name, " <-- $super\n"; } } } =item B<-s | --synopsis> Test SYNOPSIS section of bioperl modules for runnability =cut sub synopsis { foreach ( sort keys %MODULES) { my $c=$MODULES{$_}; next unless $c->type eq "instance"; next if $c->name eq 'Bio::Root::Version'; next if $c->name eq 'Bio::Tools::HMM'; my $synopsis = ''; open (F, $c->path) or warn "can't open file ".$c->name.": $!" && return; my $flag = 0; while () { last if $flag && /^=/; $synopsis .= $_ if $flag; $flag = 1 if /^=head1 +SYNOPSIS/; } # remove comments $synopsis =~ s/[^\$]#[^\n]*//g; # allow linking to an other Bio module, e.g.: See L. $synopsis =~ s/[^\n]*L&1 `; next if $res =~ /syntax OK/; print $c->path, "\n"; print $synopsis; print $res; print "-" x 70, "\n"; # print "SYNOPSIS not runnable\n"; close F; } } =item B<-v | --version> Test the VERSION of the module against the global one set in Bio::Root::Variation. Print out the different ones. =cut sub version { use Bio::Root::Version; my $version = $Bio::Root::Version::VERSION; my %skip = ( # these are defined together with an other module # and can not be use independently 'Bio::AnalysisI::JobI' => 1, 'Bio::PrimarySeq::Fasta' => 1, 'Bio::DB::Fasta::Stream' => 1, 'Bio::DB::GFF::ID_Iterator' => 1, 'Bio::DB::GFF::Adaptor::dbi::faux_dbh' =>1, 'Bio::LiveSeq::IO::SRS' =>1 # tries to call an external module ); foreach ( sort keys %MODULES) { my $n=$MODULES{$_}->name; next if $skip{$n}; my $vv= "\$${n}::VERSION"; my $v = `perl -we 'use $n; print $vv;'`; printf "%50s %-3s\n", $n, $v unless $version eq $v; } } __END__ =item B<-? | -h | --help> This help text. =back =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing list. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track of the bugs and their resolution. Bug reports can be submitted via the web: http://bugzilla.open-bio.org/ =head1 AUTHOR Heikki Lehvaslaiho, heikki-at-bioperl-dot-org =head1 Contributors Albert Vilella, avilella-AT-gmail-DOT-com =cut